4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
22 /* All Rights Reserved */
26 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
33 #include <sys/t_lock.h>
34 #include <sys/taskq.h>
36 #include <sys/processor.h>
37 #include <sys/cyclic.h>
38 #include <sys/kstat.h>
39 #include <sys/systm.h>
47 typedef struct callout_list callout_list_t
;
50 * The callout mechanism provides general-purpose event scheduling:
51 * an arbitrary function is called in a specified amount of time.
52 * The expiration time for a callout is kept in its callout list
55 typedef struct callout
{
56 struct callout
*c_idnext
; /* next in ID hash, or on freelist */
57 struct callout
*c_idprev
; /* prev in ID hash */
58 struct callout
*c_clnext
; /* next in callout list */
59 struct callout
*c_clprev
; /* prev in callout list */
60 callout_id_t c_xid
; /* extended callout ID; see below */
61 callout_list_t
*c_list
; /* callout list */
62 void (*c_func
)(void *); /* function to call */
63 void *c_arg
; /* argument to function */
67 * The callout ID (callout_id_t) uniquely identifies a callout. The callout
68 * ID is always 64 bits internally. The lower 32 bits contain an ID value.
69 * The upper 32 bits contain a generation number and flags. When the ID value
70 * wraps the generation number is incremented during ID generation. This
71 * protects callers from ID collisions that can happen as a result of the wrap.
73 * The kernel internal interface, timeout_generic(), always returns a
74 * callout_id_t. But the legacy interfaces, timeout() and realtime_timeout()
75 * return a timeout_id_t. On a 64-bit system, timeout_id_t is also 64 bits.
76 * So, the full 64-bit ID (sans the flags) can be returned. However, on 32-bit
77 * systems, timeout_id_t is 32 bits. So, only the lower 32 bits can be
78 * returned. In such cases, a default generation number of 0 is assigned to
81 * The lower 32-bit ID space is partitioned into two spaces - one for 32-bit
82 * IDs and the other for 64-bit IDs. The 32-bit ID space is further divided
83 * into two spaces - one for short-term callouts and one for long-term.
85 * Here is the bit layout for the callout ID:
87 * 63 62 61 ... 32 31 30 29 .. X+1 X ... 1 0
88 * -----------------------------------------------------------------------
89 * | Exec | Hres | Generation | Long | Counter | ID bits | Table | Type |
90 * | | time | number | term | High | | number | |
91 * -----------------------------------------------------------------------
94 * This is the executing bit which is only set in the extended callout
95 * ID. This bit indicates that the callout handler is currently being
99 * Kernel features like condition variables use hrestime (system date) in
100 * conjunction with callouts. Under normal circumstances, these callouts
101 * are handled in the usual manner. They go off at specified times. But
102 * when the system time is changed abruptly (e.g., via stime()), these
103 * callouts are required to be processed immediately so that they can
104 * wakeup their threads immediately. The Hrestime bit is used to mark
105 * such callouts. When the system time is changed, the callout subsystem
106 * is called to process all callouts with this bit set.
109 * This is the generation part of the ID.
112 * This bit indicates whether this is a short-term or a long-term callout.
113 * The long-term bit exists to address the problem of callout ID collision
114 * on 32-bit systems. This is an issue because the system typically
115 * generates a large number of timeout() requests, which means that callout
116 * IDs eventually get recycled. Most timeouts are very short-lived, so that
117 * ID recycling isn't a problem; but there are a handful of timeouts which
118 * are sufficiently long-lived to see their own IDs reused. We use the
119 * long-term bit to partition the ID namespace into pieces; the short-term
120 * space gets all the heavy traffic and can wrap frequently (i.e., on the
121 * order of a day) with no ill effects; the long-term space gets very little
122 * traffic and thus never wraps. That said, we need to future proof callouts
123 * in case 32-bit systems grow in size and are able to consume callout IDs
124 * at faster rates. So, we should make all the kernel clients that use
125 * callouts to use the internal interface so that they can use IDs outside
126 * of the legacy space with a proper generation number.
128 * Counter High + ID counter bits:
129 * These bits represent the actual ID bits in the callout ID.
130 * The highest bit of the running counter is always set; this ensures that
131 * the callout ID is always non-zero, thus eliminating the need for an
132 * explicit wrap-around test during ID generation.
135 * These bits carry the table number for the callout table where the callout
136 * is queued. Each CPU has its own callout table. So, the callout tables are
137 * numbered from 0 - (max_ncpus - 1). Because max_ncpus is different on
138 * different systems, the actual number of table number bits will vary
139 * accordingly. And so will the ID counter bits.
142 * This bit represents the callout (table) type. Each CPU has one realtime
143 * and one normal callout table.
145 #define CALLOUT_EXECUTING 0x8000000000000000ULL
146 #define CALLOUT_HRESTIME 0x4000000000000000ULL
147 #define CALLOUT_ID_MASK ~(CALLOUT_EXECUTING | CALLOUT_HRESTIME)
148 #define CALLOUT_GENERATION_LOW 0x100000000ULL
149 #define CALLOUT_LONGTERM 0x80000000
150 #define CALLOUT_COUNTER_HIGH 0x40000000
151 #define CALLOUT_TYPE_BITS 1
152 #define CALLOUT_NTYPES (1 << CALLOUT_TYPE_BITS)
153 #define CALLOUT_TYPE_MASK (CALLOUT_NTYPES - 1)
154 #define CALLOUT_COUNTER_SHIFT callout_table_bits
155 #define CALLOUT_TABLE(t, f) (((f) << CALLOUT_TYPE_BITS) | (t))
156 #define CALLOUT_TABLE_NUM(ct) ((ct) - callout_table)
157 #define CALLOUT_TABLE_TYPE(ct) (CALLOUT_TABLE_NUM(ct) & CALLOUT_TYPE_MASK)
158 #define CALLOUT_TABLE_SEQID(ct) (CALLOUT_TABLE_NUM(ct) >> CALLOUT_TYPE_BITS)
161 * We assume that during any period of CALLOUT_LONGTERM_TICKS ticks, at most
162 * (CALLOUT_COUNTER_HIGH / callout_counter_low) callouts will be generated.
164 #define CALLOUT_LONGTERM_TICKS 0x4000UL
165 #define CALLOUT_BUCKET_SHIFT 9
166 #define CALLOUT_BUCKETS (1 << CALLOUT_BUCKET_SHIFT)
167 #define CALLOUT_BUCKET_MASK (CALLOUT_BUCKETS - 1)
168 #define CALLOUT_HASH(x) ((x) & CALLOUT_BUCKET_MASK)
169 #define CALLOUT_IDHASH(x) CALLOUT_HASH((x) >> CALLOUT_COUNTER_SHIFT)
171 * The multiply by 0 and 1 below are cosmetic. Just to align things better
172 * and make it more readable. The multiplications will be done at compile
175 #define CALLOUT_CLHASH(x) \
177 ((x)>>(CALLOUT_BUCKET_SHIFT*0)) ^ \
178 ((x)>>(CALLOUT_BUCKET_SHIFT*1)) ^ \
179 ((x)>>(CALLOUT_BUCKET_SHIFT*2)) ^ \
180 ((x)>>(CALLOUT_BUCKET_SHIFT*3)))
182 #define CALLOUT_ID_TO_TABLE(id) ((id) & callout_table_mask)
184 #define CALLOUT_SHORT_ID(table) \
185 ((callout_id_t)(table) | CALLOUT_COUNTER_HIGH)
186 #define CALLOUT_LONG_ID(table) \
187 (CALLOUT_SHORT_ID(table) | CALLOUT_LONGTERM)
189 #define CALLOUT_THREADS 2 /* keep it simple for now */
191 #define CALLOUT_REALTIME 0 /* realtime callout type */
192 #define CALLOUT_NORMAL 1 /* normal callout type */
195 * callout_t's are cache-aligned structures allocated from kmem caches. One kmem
196 * cache is created per lgrp and is shared by all CPUs in that lgrp. Benefits:
197 * - cache pages are mapped only in the TLBs of the CPUs of the lgrp
198 * - data in cache pages is present only in those CPU caches
199 * - memory access performance improves with locality-awareness in kmem
201 * The following structure is used to manage per-lgroup kmem caches.
203 * NOTE: Free callout_t's go to a callout table's freelist. CPUs map to callout
204 * tables via their sequence IDs, not CPU IDs. DR operations can cause a
205 * free list to have callouts from multiple lgrp caches. This takes away some
206 * performance, but is no worse than if we did not use lgrp caches at all.
208 typedef struct callout_cache
{
209 struct callout_cache
*cc_next
; /* link in the global list */
210 lgrp_handle_t cc_hand
; /* lgroup handle */
211 kmem_cache_t
*cc_cache
; /* kmem cache pointer */
212 kmem_cache_t
*cc_lcache
; /* kmem cache pointer */
216 * The callout hash structure is used for queueing both callouts and
217 * callout lists. That is why the fields are declared as void *.
219 typedef struct callout_hash
{
224 struct callout_list
{
225 callout_list_t
*cl_next
; /* next in clhash */
226 callout_list_t
*cl_prev
; /* prev in clhash */
227 hrtime_t cl_expiration
; /* expiration for callouts in list */
228 callout_hash_t cl_callouts
; /* list of callouts */
229 kcondvar_t cl_done
; /* signal callout completion */
230 ushort_t cl_waiting
; /* count of waiting untimeouts */
231 kthread_id_t cl_executor
; /* thread executing callout */
232 ulong_t cl_pad
; /* cache alignment */
236 * Per-callout table kstats.
239 * Callouts created since boot.
240 * CALLOUT_TIMEOUTS_PENDING
241 * Number of outstanding callouts.
242 * CALLOUT_UNTIMEOUTS_UNEXPIRED
243 * Number of cancelled callouts that have not expired.
244 * CALLOUT_UNTIMEOUTS_EXECUTING
245 * Number of cancelled callouts that were executing at the time of
247 * CALLOUT_UNTIMEOUTS_EXPIRED
248 * Number of cancelled callouts that had already expired at the time
250 * CALLOUT_EXPIRATIONS
251 * Number of callouts that expired.
252 * CALLOUT_ALLOCATIONS
253 * Number of callout structures allocated.
255 typedef enum callout_stat_type
{
257 CALLOUT_TIMEOUTS_PENDING
,
258 CALLOUT_UNTIMEOUTS_UNEXPIRED
,
259 CALLOUT_UNTIMEOUTS_EXECUTING
,
260 CALLOUT_UNTIMEOUTS_EXPIRED
,
264 } callout_stat_type_t
;
269 * CALLOUT_FLAG_ROUNDUP
270 * Roundup the expiration time to the nearest resolution boundary.
271 * If this flag is not specified, the expiration time is rounded down.
272 * CALLOUT_FLAG_ABSOLUTE
273 * Normally, the expiration passed to the timeout API functions is an
274 * expiration interval. If this flag is specified, then it is
275 * interpreted as the expiration time itself.
276 * CALLOUT_FLAG_HRESTIME
277 * Normally, callouts are not affected by changes to system time
278 * (hrestime). This flag is used to create a callout that is affected
279 * by system time. If system time changes, these timers must expire
280 * at once. These are used by condition variables and LWP timers that
281 * need this behavior.
283 * Legacy interfaces timeout() and realtime_timeout() pass this flag
284 * to timeout_generic() to indicate that a 32-bit ID should be allocated.
286 #define CALLOUT_FLAG_ROUNDUP 0x1
287 #define CALLOUT_FLAG_ABSOLUTE 0x2
288 #define CALLOUT_FLAG_HRESTIME 0x4
289 #define CALLOUT_FLAG_32BIT 0x8
292 * On 32-bit systems, the legacy interfaces, timeout() and realtime_timeout(),
293 * must pass CALLOUT_FLAG_32BIT to timeout_generic() so that a 32-bit ID
297 #define CALLOUT_LEGACY 0
299 #define CALLOUT_LEGACY CALLOUT_FLAG_32BIT
303 * All of the state information associated with a callout table.
304 * The fields are ordered with cache performance in mind.
306 typedef struct callout_table
{
307 kmutex_t ct_mutex
; /* protects all callout state */
308 callout_t
*ct_free
; /* free callout structures */
309 callout_list_t
*ct_lfree
; /* free callout list structures */
310 callout_id_t ct_short_id
; /* most recently issued short-term ID */
311 callout_id_t ct_long_id
; /* most recently issued long-term ID */
312 callout_hash_t
*ct_idhash
; /* ID hash chains */
313 callout_hash_t
*ct_clhash
; /* callout list hash */
314 kstat_named_t
*ct_kstat_data
; /* callout kstat data */
316 uint_t ct_type
; /* callout table type */
317 uint_t ct_suspend
; /* suspend count */
318 cyclic_id_t ct_cyclic
; /* cyclic for this table */
319 hrtime_t
*ct_heap
; /* callout expiration heap */
320 ulong_t ct_heap_num
; /* occupied slots in the heap */
321 ulong_t ct_heap_max
; /* end of the heap */
322 kmem_cache_t
*ct_cache
; /* callout kmem cache */
323 kmem_cache_t
*ct_lcache
; /* callout list kmem cache */
324 callout_id_t ct_gen_id
; /* generation based ID */
326 callout_hash_t ct_expired
; /* list of expired callout lists */
327 taskq_t
*ct_taskq
; /* taskq to execute normal callouts */
328 kstat_t
*ct_kstats
; /* callout kstats */
330 ulong_t ct_pad
[4]; /* cache alignment */
332 ulong_t ct_pad
[7]; /* cache alignment */
337 * Short hand definitions for the callout kstats.
339 #define ct_timeouts \
340 ct_kstat_data[CALLOUT_TIMEOUTS].value.ui64
341 #define ct_timeouts_pending \
342 ct_kstat_data[CALLOUT_TIMEOUTS_PENDING].value.ui64
343 #define ct_untimeouts_unexpired \
344 ct_kstat_data[CALLOUT_UNTIMEOUTS_UNEXPIRED].value.ui64
345 #define ct_untimeouts_executing \
346 ct_kstat_data[CALLOUT_UNTIMEOUTS_EXECUTING].value.ui64
347 #define ct_untimeouts_expired \
348 ct_kstat_data[CALLOUT_UNTIMEOUTS_EXPIRED].value.ui64
349 #define ct_expirations \
350 ct_kstat_data[CALLOUT_EXPIRATIONS].value.ui64
351 #define ct_allocations \
352 ct_kstat_data[CALLOUT_ALLOCATIONS].value.ui64
354 #define CALLOUT_CHUNK 128
356 #define CALLOUT_HEAP_PARENT(index) (((index) - 1) >> 1)
357 #define CALLOUT_HEAP_RIGHT(index) (((index) + 1) << 1)
358 #define CALLOUT_HEAP_LEFT(index) ((((index) + 1) << 1) - 1)
360 #define CALLOUT_CYCLIC_HANDLER(t) \
361 ((t == CALLOUT_REALTIME) ? callout_realtime : callout_normal)
364 * We define a blanket minimum resolution for callouts of 1 millisecond.
365 * 1 millisecond is a safe value as it is already supported when the clock
366 * resolution is set to high.
368 #define CALLOUT_MIN_RESOLUTION 1000000ULL
369 #define CALLOUT_TCP_RESOLUTION 10000000ULL
371 #define CALLOUT_ALIGN 64 /* cache line size */
374 #define CALLOUT_MAX_TICKS NSEC_TO_TICK(CY_INFINITY);
376 #define CALLOUT_MAX_TICKS LONG_MAX
379 extern void callout_init(void);
380 extern void membar_sync(void);
381 extern void callout_cpu_online(cpu_t
*);
382 extern void callout_cpu_offline(cpu_t
*);
383 extern void callout_hrestime(void);
391 #endif /* _SYS_CALLO_H */