Fix memory barrier in a debug function
[netbsd-mini2440.git] / sys / kern / kern_tc.c
blobf9e29cff7eb36c1e7b936d3bc4842e1229cbcd3c
1 /* $NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $ */
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 /*-
33 * ----------------------------------------------------------------------------
34 * "THE BEER-WARE LICENSE" (Revision 42):
35 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
36 * can do whatever you want with this stuff. If we meet some day, and you think
37 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
38 * ---------------------------------------------------------------------------
41 #include <sys/cdefs.h>
42 /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
43 __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $");
45 #include "opt_ntp.h"
47 #include <sys/param.h>
48 #include <sys/kernel.h>
49 #include <sys/reboot.h> /* XXX just to get AB_VERBOSE */
50 #include <sys/sysctl.h>
51 #include <sys/syslog.h>
52 #include <sys/systm.h>
53 #include <sys/timepps.h>
54 #include <sys/timetc.h>
55 #include <sys/timex.h>
56 #include <sys/evcnt.h>
57 #include <sys/kauth.h>
58 #include <sys/mutex.h>
59 #include <sys/atomic.h>
60 #include <sys/xcall.h>
63 * A large step happens on boot. This constant detects such steps.
64 * It is relatively small so that ntp_update_second gets called enough
65 * in the typical 'missed a couple of seconds' case, but doesn't loop
66 * forever when the time step is large.
68 #define LARGE_STEP 200
71 * Implement a dummy timecounter which we can use until we get a real one
72 * in the air. This allows the console and other early stuff to use
73 * time services.
76 static u_int
77 dummy_get_timecount(struct timecounter *tc)
79 static u_int now;
81 return (++now);
84 static struct timecounter dummy_timecounter = {
85 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, NULL,
88 struct timehands {
89 /* These fields must be initialized by the driver. */
90 struct timecounter *th_counter; /* active timecounter */
91 int64_t th_adjustment; /* frequency adjustment */
92 /* (NTP/adjtime) */
93 u_int64_t th_scale; /* scale factor (counter */
94 /* tick->time) */
95 u_int64_t th_offset_count; /* offset at last time */
96 /* update (tc_windup()) */
97 struct bintime th_offset; /* bin (up)time at windup */
98 struct timeval th_microtime; /* cached microtime */
99 struct timespec th_nanotime; /* cached nanotime */
100 /* Fields not to be copied in tc_windup start with th_generation. */
101 volatile u_int th_generation; /* current genration */
102 struct timehands *th_next; /* next timehand */
105 static struct timehands th0;
106 static struct timehands th9 = { .th_next = &th0, };
107 static struct timehands th8 = { .th_next = &th9, };
108 static struct timehands th7 = { .th_next = &th8, };
109 static struct timehands th6 = { .th_next = &th7, };
110 static struct timehands th5 = { .th_next = &th6, };
111 static struct timehands th4 = { .th_next = &th5, };
112 static struct timehands th3 = { .th_next = &th4, };
113 static struct timehands th2 = { .th_next = &th3, };
114 static struct timehands th1 = { .th_next = &th2, };
115 static struct timehands th0 = {
116 .th_counter = &dummy_timecounter,
117 .th_scale = (uint64_t)-1 / 1000000,
118 .th_offset = { .sec = 1, .frac = 0 },
119 .th_generation = 1,
120 .th_next = &th1,
123 static struct timehands *volatile timehands = &th0;
124 struct timecounter *timecounter = &dummy_timecounter;
125 static struct timecounter *timecounters = &dummy_timecounter;
127 time_t time_second = 1;
128 time_t time_uptime = 1;
130 static struct bintime timebasebin;
132 static int timestepwarnings;
134 kmutex_t timecounter_lock;
135 static u_int timecounter_mods;
136 static volatile int timecounter_removals = 1;
137 static u_int timecounter_bad;
139 #ifdef __FreeBSD__
140 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
141 &timestepwarnings, 0, "");
142 #endif /* __FreeBSD__ */
145 * sysctl helper routine for kern.timercounter.hardware
147 static int
148 sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS)
150 struct sysctlnode node;
151 int error;
152 char newname[MAX_TCNAMELEN];
153 struct timecounter *newtc, *tc;
155 tc = timecounter;
157 strlcpy(newname, tc->tc_name, sizeof(newname));
159 node = *rnode;
160 node.sysctl_data = newname;
161 node.sysctl_size = sizeof(newname);
163 error = sysctl_lookup(SYSCTLFN_CALL(&node));
165 if (error ||
166 newp == NULL ||
167 strncmp(newname, tc->tc_name, sizeof(newname)) == 0)
168 return error;
170 if (l != NULL && (error = kauth_authorize_system(l->l_cred,
171 KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname,
172 NULL, NULL)) != 0)
173 return (error);
175 if (!cold)
176 mutex_spin_enter(&timecounter_lock);
177 error = EINVAL;
178 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
179 if (strcmp(newname, newtc->tc_name) != 0)
180 continue;
181 /* Warm up new timecounter. */
182 (void)newtc->tc_get_timecount(newtc);
183 (void)newtc->tc_get_timecount(newtc);
184 timecounter = newtc;
185 error = 0;
186 break;
188 if (!cold)
189 mutex_spin_exit(&timecounter_lock);
190 return error;
193 static int
194 sysctl_kern_timecounter_choice(SYSCTLFN_ARGS)
196 char buf[MAX_TCNAMELEN+48];
197 char *where;
198 const char *spc;
199 struct timecounter *tc;
200 size_t needed, left, slen;
201 int error, mods;
203 if (newp != NULL)
204 return (EPERM);
205 if (namelen != 0)
206 return (EINVAL);
208 mutex_spin_enter(&timecounter_lock);
209 retry:
210 spc = "";
211 error = 0;
212 needed = 0;
213 left = *oldlenp;
214 where = oldp;
215 for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
216 if (where == NULL) {
217 needed += sizeof(buf); /* be conservative */
218 } else {
219 slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64
220 " Hz)", spc, tc->tc_name, tc->tc_quality,
221 tc->tc_frequency);
222 if (left < slen + 1)
223 break;
224 mods = timecounter_mods;
225 mutex_spin_exit(&timecounter_lock);
226 error = copyout(buf, where, slen + 1);
227 mutex_spin_enter(&timecounter_lock);
228 if (mods != timecounter_mods) {
229 goto retry;
231 spc = " ";
232 where += slen;
233 needed += slen;
234 left -= slen;
237 mutex_spin_exit(&timecounter_lock);
239 *oldlenp = needed;
240 return (error);
243 SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup")
245 const struct sysctlnode *node;
247 sysctl_createv(clog, 0, NULL, &node,
248 CTLFLAG_PERMANENT,
249 CTLTYPE_NODE, "timecounter",
250 SYSCTL_DESCR("time counter information"),
251 NULL, 0, NULL, 0,
252 CTL_KERN, CTL_CREATE, CTL_EOL);
254 if (node != NULL) {
255 sysctl_createv(clog, 0, NULL, NULL,
256 CTLFLAG_PERMANENT,
257 CTLTYPE_STRING, "choice",
258 SYSCTL_DESCR("available counters"),
259 sysctl_kern_timecounter_choice, 0, NULL, 0,
260 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
262 sysctl_createv(clog, 0, NULL, NULL,
263 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
264 CTLTYPE_STRING, "hardware",
265 SYSCTL_DESCR("currently active time counter"),
266 sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN,
267 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
269 sysctl_createv(clog, 0, NULL, NULL,
270 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
271 CTLTYPE_INT, "timestepwarnings",
272 SYSCTL_DESCR("log time steps"),
273 NULL, 0, &timestepwarnings, 0,
274 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
278 #ifdef TC_COUNTERS
279 #define TC_STATS(name) \
280 static struct evcnt n##name = \
281 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name); \
282 EVCNT_ATTACH_STATIC(n##name)
283 TC_STATS(binuptime); TC_STATS(nanouptime); TC_STATS(microuptime);
284 TC_STATS(bintime); TC_STATS(nanotime); TC_STATS(microtime);
285 TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime);
286 TC_STATS(getbintime); TC_STATS(getnanotime); TC_STATS(getmicrotime);
287 TC_STATS(setclock);
288 #define TC_COUNT(var) var.ev_count++
289 #undef TC_STATS
290 #else
291 #define TC_COUNT(var) /* nothing */
292 #endif /* TC_COUNTERS */
294 static void tc_windup(void);
297 * Return the difference between the timehands' counter value now and what
298 * was when we copied it to the timehands' offset_count.
300 static __inline u_int
301 tc_delta(struct timehands *th)
303 struct timecounter *tc;
305 tc = th->th_counter;
306 return ((tc->tc_get_timecount(tc) -
307 th->th_offset_count) & tc->tc_counter_mask);
311 * Functions for reading the time. We have to loop until we are sure that
312 * the timehands that we operated on was not updated under our feet. See
313 * the comment in <sys/timevar.h> for a description of these 12 functions.
316 void
317 binuptime(struct bintime *bt)
319 struct timehands *th;
320 lwp_t *l;
321 u_int lgen, gen;
323 TC_COUNT(nbinuptime);
326 * Provide exclusion against tc_detach().
328 * We record the number of timecounter removals before accessing
329 * timecounter state. Note that the LWP can be using multiple
330 * "generations" at once, due to interrupts (interrupted while in
331 * this function). Hardware interrupts will borrow the interrupted
332 * LWP's l_tcgen value for this purpose, and can themselves be
333 * interrupted by higher priority interrupts. In this case we need
334 * to ensure that the oldest generation in use is recorded.
336 * splsched() is too expensive to use, so we take care to structure
337 * this code in such a way that it is not required. Likewise, we
338 * do not disable preemption.
340 * Memory barriers are also too expensive to use for such a
341 * performance critical function. The good news is that we do not
342 * need memory barriers for this type of exclusion, as the thread
343 * updating timecounter_removals will issue a broadcast cross call
344 * before inspecting our l_tcgen value (this elides memory ordering
345 * issues).
347 l = curlwp;
348 lgen = l->l_tcgen;
349 if (__predict_true(lgen == 0)) {
350 l->l_tcgen = timecounter_removals;
352 __insn_barrier();
354 do {
355 th = timehands;
356 gen = th->th_generation;
357 *bt = th->th_offset;
358 bintime_addx(bt, th->th_scale * tc_delta(th));
359 } while (gen == 0 || gen != th->th_generation);
361 __insn_barrier();
362 l->l_tcgen = lgen;
365 void
366 nanouptime(struct timespec *tsp)
368 struct bintime bt;
370 TC_COUNT(nnanouptime);
371 binuptime(&bt);
372 bintime2timespec(&bt, tsp);
375 void
376 microuptime(struct timeval *tvp)
378 struct bintime bt;
380 TC_COUNT(nmicrouptime);
381 binuptime(&bt);
382 bintime2timeval(&bt, tvp);
385 void
386 bintime(struct bintime *bt)
389 TC_COUNT(nbintime);
390 binuptime(bt);
391 bintime_add(bt, &timebasebin);
394 void
395 nanotime(struct timespec *tsp)
397 struct bintime bt;
399 TC_COUNT(nnanotime);
400 bintime(&bt);
401 bintime2timespec(&bt, tsp);
404 void
405 microtime(struct timeval *tvp)
407 struct bintime bt;
409 TC_COUNT(nmicrotime);
410 bintime(&bt);
411 bintime2timeval(&bt, tvp);
414 void
415 getbinuptime(struct bintime *bt)
417 struct timehands *th;
418 u_int gen;
420 TC_COUNT(ngetbinuptime);
421 do {
422 th = timehands;
423 gen = th->th_generation;
424 *bt = th->th_offset;
425 } while (gen == 0 || gen != th->th_generation);
428 void
429 getnanouptime(struct timespec *tsp)
431 struct timehands *th;
432 u_int gen;
434 TC_COUNT(ngetnanouptime);
435 do {
436 th = timehands;
437 gen = th->th_generation;
438 bintime2timespec(&th->th_offset, tsp);
439 } while (gen == 0 || gen != th->th_generation);
442 void
443 getmicrouptime(struct timeval *tvp)
445 struct timehands *th;
446 u_int gen;
448 TC_COUNT(ngetmicrouptime);
449 do {
450 th = timehands;
451 gen = th->th_generation;
452 bintime2timeval(&th->th_offset, tvp);
453 } while (gen == 0 || gen != th->th_generation);
456 void
457 getbintime(struct bintime *bt)
459 struct timehands *th;
460 u_int gen;
462 TC_COUNT(ngetbintime);
463 do {
464 th = timehands;
465 gen = th->th_generation;
466 *bt = th->th_offset;
467 } while (gen == 0 || gen != th->th_generation);
468 bintime_add(bt, &timebasebin);
471 void
472 getnanotime(struct timespec *tsp)
474 struct timehands *th;
475 u_int gen;
477 TC_COUNT(ngetnanotime);
478 do {
479 th = timehands;
480 gen = th->th_generation;
481 *tsp = th->th_nanotime;
482 } while (gen == 0 || gen != th->th_generation);
485 void
486 getmicrotime(struct timeval *tvp)
488 struct timehands *th;
489 u_int gen;
491 TC_COUNT(ngetmicrotime);
492 do {
493 th = timehands;
494 gen = th->th_generation;
495 *tvp = th->th_microtime;
496 } while (gen == 0 || gen != th->th_generation);
500 * Initialize a new timecounter and possibly use it.
502 void
503 tc_init(struct timecounter *tc)
505 u_int u;
507 u = tc->tc_frequency / tc->tc_counter_mask;
508 /* XXX: We need some margin here, 10% is a guess */
509 u *= 11;
510 u /= 10;
511 if (u > hz && tc->tc_quality >= 0) {
512 tc->tc_quality = -2000;
513 aprint_verbose(
514 "timecounter: Timecounter \"%s\" frequency %ju Hz",
515 tc->tc_name, (uintmax_t)tc->tc_frequency);
516 aprint_verbose(" -- Insufficient hz, needs at least %u\n", u);
517 } else if (tc->tc_quality >= 0 || bootverbose) {
518 aprint_verbose(
519 "timecounter: Timecounter \"%s\" frequency %ju Hz "
520 "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency,
521 tc->tc_quality);
524 mutex_spin_enter(&timecounter_lock);
525 tc->tc_next = timecounters;
526 timecounters = tc;
527 timecounter_mods++;
529 * Never automatically use a timecounter with negative quality.
530 * Even though we run on the dummy counter, switching here may be
531 * worse since this timecounter may not be monotonous.
533 if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality ||
534 (tc->tc_quality == timecounter->tc_quality &&
535 tc->tc_frequency > timecounter->tc_frequency))) {
536 (void)tc->tc_get_timecount(tc);
537 (void)tc->tc_get_timecount(tc);
538 timecounter = tc;
539 tc_windup();
541 mutex_spin_exit(&timecounter_lock);
545 * Pick a new timecounter due to the existing counter going bad.
547 static void
548 tc_pick(void)
550 struct timecounter *best, *tc;
552 KASSERT(mutex_owned(&timecounter_lock));
554 for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) {
555 if (tc->tc_quality > best->tc_quality)
556 best = tc;
557 else if (tc->tc_quality < best->tc_quality)
558 continue;
559 else if (tc->tc_frequency > best->tc_frequency)
560 best = tc;
562 (void)best->tc_get_timecount(best);
563 (void)best->tc_get_timecount(best);
564 timecounter = best;
568 * A timecounter has gone bad, arrange to pick a new one at the next
569 * clock tick.
571 void
572 tc_gonebad(struct timecounter *tc)
575 tc->tc_quality = -100;
576 membar_producer();
577 atomic_inc_uint(&timecounter_bad);
581 * Stop using a timecounter and remove it from the timecounters list.
584 tc_detach(struct timecounter *target)
586 struct timecounter *tc;
587 struct timecounter **tcp = NULL;
588 int removals;
589 uint64_t where;
590 lwp_t *l;
592 /* First, find the timecounter. */
593 mutex_spin_enter(&timecounter_lock);
594 for (tcp = &timecounters, tc = timecounters;
595 tc != NULL;
596 tcp = &tc->tc_next, tc = tc->tc_next) {
597 if (tc == target)
598 break;
600 if (tc == NULL) {
601 mutex_spin_exit(&timecounter_lock);
602 return ESRCH;
605 /* And now, remove it. */
606 *tcp = tc->tc_next;
607 if (timecounter == target) {
608 tc_pick();
609 tc_windup();
611 timecounter_mods++;
612 removals = timecounter_removals++;
613 mutex_spin_exit(&timecounter_lock);
616 * We now have to determine if any threads in the system are still
617 * making use of this timecounter.
619 * We issue a broadcast cross call to elide memory ordering issues,
620 * then scan all LWPs in the system looking at each's timecounter
621 * generation number. We need to see a value of zero (not actively
622 * using a timecounter) or a value greater than our removal value.
624 * We may race with threads that read `timecounter_removals' and
625 * and then get preempted before updating `l_tcgen'. This is not
626 * a problem, since it means that these threads have not yet started
627 * accessing timecounter state. All we do need is one clean
628 * snapshot of the system where every thread appears not to be using
629 * old timecounter state.
631 for (;;) {
632 where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
633 xc_wait(where);
635 mutex_enter(proc_lock);
636 LIST_FOREACH(l, &alllwp, l_list) {
637 if (l->l_tcgen == 0 || l->l_tcgen > removals) {
639 * Not using timecounter or old timecounter
640 * state at time of our xcall or later.
642 continue;
644 break;
646 mutex_exit(proc_lock);
649 * If the timecounter is still in use, wait at least 10ms
650 * before retrying.
652 if (l == NULL) {
653 return 0;
655 (void)kpause("tcdetach", false, mstohz(10), NULL);
659 /* Report the frequency of the current timecounter. */
660 u_int64_t
661 tc_getfrequency(void)
664 return (timehands->th_counter->tc_frequency);
668 * Step our concept of UTC. This is done by modifying our estimate of
669 * when we booted.
671 void
672 tc_setclock(const struct timespec *ts)
674 struct timespec ts2;
675 struct bintime bt, bt2;
677 mutex_spin_enter(&timecounter_lock);
678 TC_COUNT(nsetclock);
679 binuptime(&bt2);
680 timespec2bintime(ts, &bt);
681 bintime_sub(&bt, &bt2);
682 bintime_add(&bt2, &timebasebin);
683 timebasebin = bt;
684 tc_windup();
685 mutex_spin_exit(&timecounter_lock);
687 if (timestepwarnings) {
688 bintime2timespec(&bt2, &ts2);
689 log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n",
690 (long long)ts2.tv_sec, ts2.tv_nsec,
691 (long long)ts->tv_sec, ts->tv_nsec);
696 * Initialize the next struct timehands in the ring and make
697 * it the active timehands. Along the way we might switch to a different
698 * timecounter and/or do seconds processing in NTP. Slightly magic.
700 static void
701 tc_windup(void)
703 struct bintime bt;
704 struct timehands *th, *tho;
705 u_int64_t scale;
706 u_int delta, ncount, ogen;
707 int i, s_update;
708 time_t t;
710 KASSERT(mutex_owned(&timecounter_lock));
712 s_update = 0;
715 * Make the next timehands a copy of the current one, but do not
716 * overwrite the generation or next pointer. While we update
717 * the contents, the generation must be zero. Ensure global
718 * visibility of the generation before proceeding.
720 tho = timehands;
721 th = tho->th_next;
722 ogen = th->th_generation;
723 th->th_generation = 0;
724 membar_producer();
725 bcopy(tho, th, offsetof(struct timehands, th_generation));
728 * Capture a timecounter delta on the current timecounter and if
729 * changing timecounters, a counter value from the new timecounter.
730 * Update the offset fields accordingly.
732 delta = tc_delta(th);
733 if (th->th_counter != timecounter)
734 ncount = timecounter->tc_get_timecount(timecounter);
735 else
736 ncount = 0;
737 th->th_offset_count += delta;
738 bintime_addx(&th->th_offset, th->th_scale * delta);
741 * Hardware latching timecounters may not generate interrupts on
742 * PPS events, so instead we poll them. There is a finite risk that
743 * the hardware might capture a count which is later than the one we
744 * got above, and therefore possibly in the next NTP second which might
745 * have a different rate than the current NTP second. It doesn't
746 * matter in practice.
748 if (tho->th_counter->tc_poll_pps)
749 tho->th_counter->tc_poll_pps(tho->th_counter);
752 * Deal with NTP second processing. The for loop normally
753 * iterates at most once, but in extreme situations it might
754 * keep NTP sane if timeouts are not run for several seconds.
755 * At boot, the time step can be large when the TOD hardware
756 * has been read, so on really large steps, we call
757 * ntp_update_second only twice. We need to call it twice in
758 * case we missed a leap second.
759 * If NTP is not compiled in ntp_update_second still calculates
760 * the adjustment resulting from adjtime() calls.
762 bt = th->th_offset;
763 bintime_add(&bt, &timebasebin);
764 i = bt.sec - tho->th_microtime.tv_sec;
765 if (i > LARGE_STEP)
766 i = 2;
767 for (; i > 0; i--) {
768 t = bt.sec;
769 ntp_update_second(&th->th_adjustment, &bt.sec);
770 s_update = 1;
771 if (bt.sec != t)
772 timebasebin.sec += bt.sec - t;
775 /* Update the UTC timestamps used by the get*() functions. */
776 /* XXX shouldn't do this here. Should force non-`get' versions. */
777 bintime2timeval(&bt, &th->th_microtime);
778 bintime2timespec(&bt, &th->th_nanotime);
779 /* Now is a good time to change timecounters. */
780 if (th->th_counter != timecounter) {
781 th->th_counter = timecounter;
782 th->th_offset_count = ncount;
783 s_update = 1;
787 * Recalculate the scaling factor. We want the number of 1/2^64
788 * fractions of a second per period of the hardware counter, taking
789 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
790 * processing provides us with.
792 * The th_adjustment is nanoseconds per second with 32 bit binary
793 * fraction and we want 64 bit binary fraction of second:
795 * x = a * 2^32 / 10^9 = a * 4.294967296
797 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
798 * we can only multiply by about 850 without overflowing, but that
799 * leaves suitably precise fractions for multiply before divide.
801 * Divide before multiply with a fraction of 2199/512 results in a
802 * systematic undercompensation of 10PPM of th_adjustment. On a
803 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable.
805 * We happily sacrifice the lowest of the 64 bits of our result
806 * to the goddess of code clarity.
809 if (s_update) {
810 scale = (u_int64_t)1 << 63;
811 scale += (th->th_adjustment / 1024) * 2199;
812 scale /= th->th_counter->tc_frequency;
813 th->th_scale = scale * 2;
816 * Now that the struct timehands is again consistent, set the new
817 * generation number, making sure to not make it zero. Ensure
818 * changes are globally visible before changing.
820 if (++ogen == 0)
821 ogen = 1;
822 membar_producer();
823 th->th_generation = ogen;
826 * Go live with the new struct timehands. Ensure changes are
827 * globally visible before changing.
829 time_second = th->th_microtime.tv_sec;
830 time_uptime = th->th_offset.sec;
831 membar_producer();
832 timehands = th;
835 * Force users of the old timehand to move on. This is
836 * necessary for MP systems; we need to ensure that the
837 * consumers will move away from the old timehand before
838 * we begin updating it again when we eventually wrap
839 * around.
841 if (++tho->th_generation == 0)
842 tho->th_generation = 1;
846 * RFC 2783 PPS-API implementation.
850 pps_ioctl(u_long cmd, void *data, struct pps_state *pps)
852 pps_params_t *app;
853 pps_info_t *pipi;
854 #ifdef PPS_SYNC
855 int *epi;
856 #endif
858 KASSERT(mutex_owned(&timecounter_lock));
860 KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_ioctl") */
861 switch (cmd) {
862 case PPS_IOC_CREATE:
863 return (0);
864 case PPS_IOC_DESTROY:
865 return (0);
866 case PPS_IOC_SETPARAMS:
867 app = (pps_params_t *)data;
868 if (app->mode & ~pps->ppscap)
869 return (EINVAL);
870 pps->ppsparam = *app;
871 return (0);
872 case PPS_IOC_GETPARAMS:
873 app = (pps_params_t *)data;
874 *app = pps->ppsparam;
875 app->api_version = PPS_API_VERS_1;
876 return (0);
877 case PPS_IOC_GETCAP:
878 *(int*)data = pps->ppscap;
879 return (0);
880 case PPS_IOC_FETCH:
881 pipi = (pps_info_t *)data;
882 pps->ppsinfo.current_mode = pps->ppsparam.mode;
883 *pipi = pps->ppsinfo;
884 return (0);
885 case PPS_IOC_KCBIND:
886 #ifdef PPS_SYNC
887 epi = (int *)data;
888 /* XXX Only root should be able to do this */
889 if (*epi & ~pps->ppscap)
890 return (EINVAL);
891 pps->kcmode = *epi;
892 return (0);
893 #else
894 return (EOPNOTSUPP);
895 #endif
896 default:
897 return (EPASSTHROUGH);
901 void
902 pps_init(struct pps_state *pps)
905 KASSERT(mutex_owned(&timecounter_lock));
907 pps->ppscap |= PPS_TSFMT_TSPEC;
908 if (pps->ppscap & PPS_CAPTUREASSERT)
909 pps->ppscap |= PPS_OFFSETASSERT;
910 if (pps->ppscap & PPS_CAPTURECLEAR)
911 pps->ppscap |= PPS_OFFSETCLEAR;
914 void
915 pps_capture(struct pps_state *pps)
917 struct timehands *th;
919 KASSERT(mutex_owned(&timecounter_lock));
920 KASSERT(pps != NULL);
922 th = timehands;
923 pps->capgen = th->th_generation;
924 pps->capth = th;
925 pps->capcount = (u_int64_t)tc_delta(th) + th->th_offset_count;
926 if (pps->capgen != th->th_generation)
927 pps->capgen = 0;
930 void
931 pps_event(struct pps_state *pps, int event)
933 struct bintime bt;
934 struct timespec ts, *tsp, *osp;
935 u_int64_t tcount, *pcount;
936 int foff, fhard;
937 pps_seq_t *pseq;
939 KASSERT(mutex_owned(&timecounter_lock));
941 KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_event") */
942 /* If the timecounter was wound up underneath us, bail out. */
943 if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation)
944 return;
946 /* Things would be easier with arrays. */
947 if (event == PPS_CAPTUREASSERT) {
948 tsp = &pps->ppsinfo.assert_timestamp;
949 osp = &pps->ppsparam.assert_offset;
950 foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
951 fhard = pps->kcmode & PPS_CAPTUREASSERT;
952 pcount = &pps->ppscount[0];
953 pseq = &pps->ppsinfo.assert_sequence;
954 } else {
955 tsp = &pps->ppsinfo.clear_timestamp;
956 osp = &pps->ppsparam.clear_offset;
957 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
958 fhard = pps->kcmode & PPS_CAPTURECLEAR;
959 pcount = &pps->ppscount[1];
960 pseq = &pps->ppsinfo.clear_sequence;
964 * If the timecounter changed, we cannot compare the count values, so
965 * we have to drop the rest of the PPS-stuff until the next event.
967 if (pps->ppstc != pps->capth->th_counter) {
968 pps->ppstc = pps->capth->th_counter;
969 *pcount = pps->capcount;
970 pps->ppscount[2] = pps->capcount;
971 return;
974 /* Convert the count to a timespec. */
975 tcount = pps->capcount - pps->capth->th_offset_count;
976 bt = pps->capth->th_offset;
977 bintime_addx(&bt, pps->capth->th_scale * tcount);
978 bintime_add(&bt, &timebasebin);
979 bintime2timespec(&bt, &ts);
981 /* If the timecounter was wound up underneath us, bail out. */
982 if (pps->capgen != pps->capth->th_generation)
983 return;
985 *pcount = pps->capcount;
986 (*pseq)++;
987 *tsp = ts;
989 if (foff) {
990 timespecadd(tsp, osp, tsp);
991 if (tsp->tv_nsec < 0) {
992 tsp->tv_nsec += 1000000000;
993 tsp->tv_sec -= 1;
996 #ifdef PPS_SYNC
997 if (fhard) {
998 u_int64_t scale;
1001 * Feed the NTP PLL/FLL.
1002 * The FLL wants to know how many (hardware) nanoseconds
1003 * elapsed since the previous event.
1005 tcount = pps->capcount - pps->ppscount[2];
1006 pps->ppscount[2] = pps->capcount;
1007 tcount &= pps->capth->th_counter->tc_counter_mask;
1008 scale = (u_int64_t)1 << 63;
1009 scale /= pps->capth->th_counter->tc_frequency;
1010 scale *= 2;
1011 bt.sec = 0;
1012 bt.frac = 0;
1013 bintime_addx(&bt, scale * tcount);
1014 bintime2timespec(&bt, &ts);
1015 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec);
1017 #endif
1021 * Timecounters need to be updated every so often to prevent the hardware
1022 * counter from overflowing. Updating also recalculates the cached values
1023 * used by the get*() family of functions, so their precision depends on
1024 * the update frequency.
1027 static int tc_tick;
1029 void
1030 tc_ticktock(void)
1032 static int count;
1034 if (++count < tc_tick)
1035 return;
1036 count = 0;
1037 mutex_spin_enter(&timecounter_lock);
1038 if (timecounter_bad != 0) {
1039 /* An existing timecounter has gone bad, pick a new one. */
1040 (void)atomic_swap_uint(&timecounter_bad, 0);
1041 if (timecounter->tc_quality < 0) {
1042 tc_pick();
1045 tc_windup();
1046 mutex_spin_exit(&timecounter_lock);
1049 void
1050 inittimecounter(void)
1052 u_int p;
1054 mutex_init(&timecounter_lock, MUTEX_DEFAULT, IPL_HIGH);
1057 * Set the initial timeout to
1058 * max(1, <approx. number of hardclock ticks in a millisecond>).
1059 * People should probably not use the sysctl to set the timeout
1060 * to smaller than its inital value, since that value is the
1061 * smallest reasonable one. If they want better timestamps they
1062 * should use the non-"get"* functions.
1064 if (hz > 1000)
1065 tc_tick = (hz + 500) / 1000;
1066 else
1067 tc_tick = 1;
1068 p = (tc_tick * 1000000) / hz;
1069 aprint_verbose("timecounter: Timecounters tick every %d.%03u msec\n",
1070 p / 1000, p % 1000);
1072 /* warm up new timecounter (again) and get rolling. */
1073 (void)timecounter->tc_get_timecount(timecounter);
1074 (void)timecounter->tc_get_timecount(timecounter);