docs/how-to-build.md: use proper markup for directory names
[unleashed/tickless.git] / include / sys / callo.h
blob1cbf4643fa4230b9dffbf0b2ddf2c9fdcb634028
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
22 /* All Rights Reserved */
26 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
30 #ifndef _SYS_CALLO_H
31 #define _SYS_CALLO_H
33 #include <sys/t_lock.h>
34 #include <sys/taskq.h>
35 #include <sys/lgrp.h>
36 #include <sys/processor.h>
37 #include <sys/cyclic.h>
38 #include <sys/kstat.h>
39 #include <sys/systm.h>
41 #ifdef __cplusplus
42 extern "C" {
43 #endif
45 #ifdef _KERNEL
47 typedef struct callout_list callout_list_t;
50 * The callout mechanism provides general-purpose event scheduling:
51 * an arbitrary function is called in a specified amount of time.
52 * The expiration time for a callout is kept in its callout list
53 * structure.
55 typedef struct callout {
56 struct callout *c_idnext; /* next in ID hash, or on freelist */
57 struct callout *c_idprev; /* prev in ID hash */
58 struct callout *c_clnext; /* next in callout list */
59 struct callout *c_clprev; /* prev in callout list */
60 callout_id_t c_xid; /* extended callout ID; see below */
61 callout_list_t *c_list; /* callout list */
62 void (*c_func)(void *); /* function to call */
63 void *c_arg; /* argument to function */
64 kthread_t *c_executor; /* executing thread */
65 kcondvar_t c_done; /* signal callout completion */
66 ushort_t c_waiting; /* untimeout waiting flag */
67 } callout_t;
70 * The callout ID (callout_id_t) uniquely identifies a callout. The callout
71 * ID is always 64 bits internally. The lower 32 bits contain an ID value.
72 * The upper 32 bits contain a generation number and flags. When the ID value
73 * wraps the generation number is incremented during ID generation. This
74 * protects callers from ID collisions that can happen as a result of the wrap.
76 * The kernel internal interface, timeout_generic(), always returns a
77 * callout_id_t. But the legacy interfaces, timeout() and realtime_timeout()
78 * return a timeout_id_t. On a 64-bit system, timeout_id_t is also 64 bits.
79 * So, the full 64-bit ID (sans the flags) can be returned. However, on 32-bit
80 * systems, timeout_id_t is 32 bits. So, only the lower 32 bits can be
81 * returned. In such cases, a default generation number of 0 is assigned to
82 * the legacy IDs.
84 * The lower 32-bit ID space is partitioned into two spaces - one for
85 * short-term callouts and one for long-term.
87 * Here is the bit layout for the callout ID:
89 * 63 62 61 ... 32 31 30 29 .. X+1 X ... 1 0
90 * -----------------------------------------------------------------------
91 * | Free | Exec | Generation | Long | Counter | ID bits | Table | Type |
92 * | | | number | term | High | | number | |
93 * -----------------------------------------------------------------------
95 * Free:
96 * This bit indicates that this callout has been freed. This is for
97 * debugging purposes.
99 * Exec(uting):
100 * This is the executing bit which is only set in the extended callout
101 * ID. This bit indicates that the callout handler is currently being
102 * executed.
104 * Generation number:
105 * This is the generation part of the ID.
107 * Long term:
108 * This bit indicates whether this is a short-term or a long-term callout.
109 * The long-term bit exists to address the problem of callout ID collision
110 * on 32-bit systems. This is an issue because the system typically
111 * generates a large number of timeout() requests, which means that callout
112 * IDs eventually get recycled. Most timeouts are very short-lived, so that
113 * ID recycling isn't a problem; but there are a handful of timeouts which
114 * are sufficiently long-lived to see their own IDs reused. We use the
115 * long-term bit to partition the ID namespace into pieces; the short-term
116 * space gets all the heavy traffic and can wrap frequently (i.e., on the
117 * order of a day) with no ill effects; the long-term space gets very little
118 * traffic and thus never wraps. That said, we need to future proof callouts
119 * in case 32-bit systems grow in size and are able to consume callout IDs
120 * at faster rates. So, we should make all the kernel clients that use
121 * callouts to use the internal interface so that they can use IDs outside
122 * of the legacy space with a proper generation number.
124 * Counter High + ID counter bits:
125 * These bits represent the actual ID bits in the callout ID.
126 * The highest bit of the running counter is always set; this ensures that
127 * the callout ID is always non-zero, thus eliminating the need for an
128 * explicit wrap-around test during ID generation.
130 * Table number:
131 * These bits carry the table number for the callout table where the callout
132 * is queued. Each CPU has its own callout table. So, the callout tables are
133 * numbered from 0 - (max_ncpus - 1). Because max_ncpus is different on
134 * different systems, the actual number of table number bits will vary
135 * accordingly. And so will the ID counter bits.
137 * Type:
138 * This bit represents the callout (table) type. Each CPU has one realtime
139 * and one normal callout table.
141 #define CALLOUT_ID_FREE 0x8000000000000000ULL
142 #define CALLOUT_EXECUTING 0x4000000000000000ULL
143 #define CALLOUT_ID_FLAGS (CALLOUT_ID_FREE | CALLOUT_EXECUTING)
144 #define CALLOUT_ID_MASK ~CALLOUT_ID_FLAGS
145 #define CALLOUT_GENERATION_LOW 0x100000000ULL
146 #define CALLOUT_LONGTERM 0x80000000
147 #define CALLOUT_COUNTER_HIGH 0x40000000
148 #define CALLOUT_TYPE_BITS 1
149 #define CALLOUT_NTYPES (1 << CALLOUT_TYPE_BITS)
150 #define CALLOUT_TYPE_MASK (CALLOUT_NTYPES - 1)
151 #define CALLOUT_COUNTER_SHIFT callout_table_bits
152 #define CALLOUT_TABLE(t, f) (((f) << CALLOUT_TYPE_BITS) | (t))
153 #define CALLOUT_TABLE_NUM(ct) ((ct) - callout_table)
154 #define CALLOUT_TABLE_SEQID(ct) (CALLOUT_TABLE_NUM(ct) >> CALLOUT_TYPE_BITS)
157 * We assume that during any period of CALLOUT_LONGTERM_TICKS ticks, at most
158 * (CALLOUT_COUNTER_HIGH / callout_counter_low) callouts will be generated.
160 #define CALLOUT_LONGTERM_TICKS 0x4000UL
161 #define CALLOUT_BUCKET_SHIFT 9
162 #define CALLOUT_BUCKETS (1 << CALLOUT_BUCKET_SHIFT)
163 #define CALLOUT_BUCKET_MASK (CALLOUT_BUCKETS - 1)
164 #define CALLOUT_HASH(x) ((x) & CALLOUT_BUCKET_MASK)
165 #define CALLOUT_IDHASH(x) CALLOUT_HASH((x) >> CALLOUT_COUNTER_SHIFT)
167 * The multiply by 0 and 1 below are cosmetic. Just to align things better
168 * and make it more readable. The multiplications will be done at compile
169 * time.
171 #define CALLOUT_CLHASH(x) \
172 CALLOUT_HASH( \
173 ((x)>>(CALLOUT_BUCKET_SHIFT*0)) ^ \
174 ((x)>>(CALLOUT_BUCKET_SHIFT*1)) ^ \
175 ((x)>>(CALLOUT_BUCKET_SHIFT*2)) ^ \
176 ((x)>>(CALLOUT_BUCKET_SHIFT*3)))
178 #define CALLOUT_ID_TO_TABLE(id) ((id) & callout_table_mask)
180 #define CALLOUT_SHORT_ID(table) \
181 ((callout_id_t)(table) | CALLOUT_COUNTER_HIGH)
182 #define CALLOUT_LONG_ID(table) \
183 (CALLOUT_SHORT_ID(table) | CALLOUT_LONGTERM)
185 #define CALLOUT_THREADS 2
187 #define CALLOUT_REALTIME 0 /* realtime callout type */
188 #define CALLOUT_NORMAL 1 /* normal callout type */
191 * callout_t's are cache-aligned structures allocated from kmem caches. One kmem
192 * cache is created per lgrp and is shared by all CPUs in that lgrp. Benefits:
193 * - cache pages are mapped only in the TLBs of the CPUs of the lgrp
194 * - data in cache pages is present only in those CPU caches
195 * - memory access performance improves with locality-awareness in kmem
197 * The following structure is used to manage per-lgroup kmem caches.
199 * NOTE: Free callout_t's go to a callout table's freelist. CPUs map to callout
200 * tables via their sequence IDs, not CPU IDs. DR operations can cause a
201 * free list to have callouts from multiple lgrp caches. This takes away some
202 * performance, but is no worse than if we did not use lgrp caches at all.
204 typedef struct callout_cache {
205 struct callout_cache *cc_next; /* link in the global list */
206 lgrp_handle_t cc_hand; /* lgroup handle */
207 kmem_cache_t *cc_cache; /* kmem cache pointer */
208 kmem_cache_t *cc_lcache; /* kmem cache pointer */
209 } callout_cache_t;
212 * The callout hash structure is used for queueing both callouts and
213 * callout lists. That is why the fields are declared as void *.
215 typedef struct callout_hash {
216 void *ch_head;
217 void *ch_tail;
218 } callout_hash_t;
221 * CALLOUT_LIST_FLAG_FREE
222 * Callout list is free.
223 * CALLOUT_LIST_FLAG_ABSOLUTE
224 * Callout list contains absolute timers.
225 * CALLOUT_LIST_FLAG_HRESTIME
226 * Callout list contains hrestime timers.
227 * CALLOUT_LIST_FLAG_NANO
228 * Callout list contains 1-nanosecond resolution callouts.
229 * CALLOUT_LIST_FLAG_HEAPED
230 * Callout list is present in the callout heap.
231 * CALLOUT_LIST_FLAG_QUEUED
232 * Callout list is present in the callout queue.
234 #define CALLOUT_LIST_FLAG_FREE 0x1
235 #define CALLOUT_LIST_FLAG_ABSOLUTE 0x2
236 #define CALLOUT_LIST_FLAG_HRESTIME 0x4
237 #define CALLOUT_LIST_FLAG_NANO 0x8
238 #define CALLOUT_LIST_FLAG_HEAPED 0x10
239 #define CALLOUT_LIST_FLAG_QUEUED 0x20
241 struct callout_list {
242 callout_list_t *cl_next; /* next in clhash */
243 callout_list_t *cl_prev; /* prev in clhash */
244 hrtime_t cl_expiration; /* expiration for callouts in list */
245 callout_hash_t cl_callouts; /* list of callouts */
246 int cl_flags; /* callout flags */
250 * Callout heap element. Each element in the heap stores the expiration
251 * as well as the corresponding callout list. This is to avoid a lookup
252 * of the callout list when the heap is processed. Because we store the
253 * callout list pointer in the heap element, we have to always remove
254 * a heap element and its callout list together. We cannot remove one
255 * without the other.
257 * This structure's size must be a power of two because we want an
258 * integral number of these to fit into a page.
260 typedef struct callout_heap {
261 hrtime_t ch_expiration;
262 callout_list_t *ch_list;
263 #ifndef _LP64
264 char ch_pad[4]; /* pad to power of 2 */
265 #endif
266 } callout_heap_t;
269 * When the heap contains too many empty callout lists, it needs to be
270 * cleaned up. The decision to clean up the heap is a function of the
271 * number of empty entries and the heap size. Also, we don't want to
272 * clean up small heaps.
274 #define CALLOUT_MIN_REAP (CALLOUT_BUCKETS >> 3)
275 #define CALLOUT_CLEANUP(ct) ((ct->ct_nreap >= callout_min_reap) && \
276 (ct->ct_nreap >= (ct->ct_heap_num >> 1)))
279 * Per-callout table kstats.
281 * CALLOUT_TIMEOUTS
282 * Callouts created since boot.
283 * CALLOUT_TIMEOUTS_PENDING
284 * Number of outstanding callouts.
285 * CALLOUT_UNTIMEOUTS_UNEXPIRED
286 * Number of cancelled callouts that have not expired.
287 * CALLOUT_UNTIMEOUTS_EXECUTING
288 * Number of cancelled callouts that were executing at the time of
289 * cancellation.
290 * CALLOUT_UNTIMEOUTS_EXPIRED
291 * Number of cancelled callouts that had already expired at the time
292 * of cancellations.
293 * CALLOUT_EXPIRATIONS
294 * Number of callouts that expired.
295 * CALLOUT_ALLOCATIONS
296 * Number of callout structures allocated.
297 * CALLOUT_CLEANUPS
298 * Number of times a callout table is cleaned up.
300 typedef enum callout_stat_type {
301 CALLOUT_TIMEOUTS,
302 CALLOUT_TIMEOUTS_PENDING,
303 CALLOUT_UNTIMEOUTS_UNEXPIRED,
304 CALLOUT_UNTIMEOUTS_EXECUTING,
305 CALLOUT_UNTIMEOUTS_EXPIRED,
306 CALLOUT_EXPIRATIONS,
307 CALLOUT_ALLOCATIONS,
308 CALLOUT_CLEANUPS,
309 CALLOUT_NUM_STATS
310 } callout_stat_type_t;
313 * Callout flags:
315 * CALLOUT_FLAG_ROUNDUP
316 * Roundup the expiration time to the next resolution boundary.
317 * If this flag is not specified, the expiration time is rounded down.
318 * CALLOUT_FLAG_ABSOLUTE
319 * Normally, the expiration passed to the timeout API functions is an
320 * expiration interval. If this flag is specified, then it is
321 * interpreted as the expiration time itself.
322 * CALLOUT_FLAG_HRESTIME
323 * Normally, callouts are not affected by changes to system time
324 * (hrestime). This flag is used to create a callout that is affected
325 * by system time. If system time changes, these timers must be
326 * handled in a special way (see callout.c). These are used by condition
327 * variables and LWP timers that need this behavior.
328 * CALLOUT_FLAG_32BIT
329 * Legacy interfaces timeout() and realtime_timeout() pass this flag
330 * to timeout_generic() to indicate that a 32-bit ID should be allocated.
332 #define CALLOUT_FLAG_ROUNDUP 0x1
333 #define CALLOUT_FLAG_ABSOLUTE 0x2
334 #define CALLOUT_FLAG_HRESTIME 0x4
335 #define CALLOUT_FLAG_32BIT 0x8
338 * On 32-bit systems, the legacy interfaces, timeout() and realtime_timeout(),
339 * must pass CALLOUT_FLAG_32BIT to timeout_generic() so that a 32-bit ID
340 * can be generated.
342 #ifdef _LP64
343 #define CALLOUT_LEGACY 0
344 #else
345 #define CALLOUT_LEGACY CALLOUT_FLAG_32BIT
346 #endif
349 * All of the state information associated with a callout table.
350 * The fields are ordered with cache performance in mind.
352 typedef struct callout_table {
353 kmutex_t ct_mutex; /* protects all callout state */
354 callout_t *ct_free; /* free callout structures */
355 callout_list_t *ct_lfree; /* free callout list structures */
356 callout_id_t ct_short_id; /* most recently issued short-term ID */
357 callout_id_t ct_long_id; /* most recently issued long-term ID */
358 callout_hash_t *ct_idhash; /* ID hash chains */
359 callout_hash_t *ct_clhash; /* callout list hash */
360 kstat_named_t *ct_kstat_data; /* callout kstat data */
362 uint_t ct_type; /* callout table type */
363 uint_t ct_suspend; /* suspend count */
364 cyclic_id_t ct_cyclic; /* cyclic for this table */
365 callout_heap_t *ct_heap; /* callout expiration heap */
366 ulong_t ct_heap_num; /* occupied slots in the heap */
367 ulong_t ct_heap_max; /* end of the heap */
368 kmem_cache_t *ct_cache; /* callout kmem cache */
369 kmem_cache_t *ct_lcache; /* callout list kmem cache */
370 callout_id_t ct_gen_id; /* generation based ID */
372 callout_hash_t ct_expired; /* list of expired callout lists */
373 taskq_t *ct_taskq; /* taskq to execute normal callouts */
374 kstat_t *ct_kstats; /* callout kstats */
375 int ct_nreap; /* # heap entries that need reaping */
376 cyclic_id_t ct_qcyclic; /* cyclic for the callout queue */
377 callout_hash_t ct_queue; /* overflow queue of callouts */
378 #ifndef _LP64
379 char ct_pad[12]; /* cache alignment */
380 #endif
382 * This structure should be aligned to a 64-byte (cache-line)
383 * boundary. Make sure the padding is right for 32-bit as well
384 * as 64-bit kernels.
386 } callout_table_t;
389 * Short hand definitions for the callout kstats.
391 #define ct_timeouts \
392 ct_kstat_data[CALLOUT_TIMEOUTS].value.ui64
393 #define ct_timeouts_pending \
394 ct_kstat_data[CALLOUT_TIMEOUTS_PENDING].value.ui64
395 #define ct_untimeouts_unexpired \
396 ct_kstat_data[CALLOUT_UNTIMEOUTS_UNEXPIRED].value.ui64
397 #define ct_untimeouts_executing \
398 ct_kstat_data[CALLOUT_UNTIMEOUTS_EXECUTING].value.ui64
399 #define ct_untimeouts_expired \
400 ct_kstat_data[CALLOUT_UNTIMEOUTS_EXPIRED].value.ui64
401 #define ct_expirations \
402 ct_kstat_data[CALLOUT_EXPIRATIONS].value.ui64
403 #define ct_allocations \
404 ct_kstat_data[CALLOUT_ALLOCATIONS].value.ui64
405 #define ct_cleanups \
406 ct_kstat_data[CALLOUT_CLEANUPS].value.ui64
409 * CALLOUT_CHUNK is the minimum initial size of each heap, and the amount
410 * by which a full heap is expanded to make room for new entries.
412 #define CALLOUT_CHUNK (PAGESIZE / sizeof (callout_heap_t))
415 * CALLOUT_MIN_HEAP_SIZE defines the minimum size for the callout heap for
416 * the whole system.
418 #define CALLOUT_MIN_HEAP_SIZE (64 * 1024 * sizeof (callout_heap_t))
421 * CALLOUT_MEM_FRACTION defines the fraction of available physical memory that
422 * can be allocated towards the callout heap for the whole system.
424 #define CALLOUT_MEM_FRACTION 4096
426 #define CALLOUT_HEAP_PARENT(index) (((index) - 1) >> 1)
427 #define CALLOUT_HEAP_RIGHT(index) (((index) + 1) << 1)
428 #define CALLOUT_HEAP_LEFT(index) ((((index) + 1) << 1) - 1)
430 #define CALLOUT_TCP_RESOLUTION 10000000ULL
432 #define CALLOUT_ALIGN 64 /* cache line size */
434 #ifdef _LP64
435 #define CALLOUT_MAX_TICKS NSEC_TO_TICK(CY_INFINITY);
436 #else
437 #define CALLOUT_MAX_TICKS LONG_MAX
438 #endif
440 #define CALLOUT_TOLERANCE 200000 /* nanoseconds */
442 extern void callout_init(void);
443 extern void membar_sync(void);
444 extern void callout_cpu_online(cpu_t *);
445 extern void callout_cpu_offline(cpu_t *);
446 extern void callout_hrestime(void);
448 #endif
450 #ifdef __cplusplus
452 #endif
454 #endif /* _SYS_CALLO_H */