docs/how-to-build.md: use proper markup for directory names
[unleashed/tickless.git] / include / sys / poll_impl.h
blob67b47f9a1e49b9e9bc53da6e02aead49d6c515d9
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Copyright 2015, Joyent, Inc.
31 #ifndef _SYS_POLL_IMPL_H
32 #define _SYS_POLL_IMPL_H
35 * Caching Poll Subsystem:
37 * Each kernel thread (1), if engaged in poll system call, has a reference to
38 * a pollstate_t (2), which contains relevant flags and locks. The pollstate_t
39 * contains a pointer to a pollcache_t (3), which caches the state of previous
40 * calls to poll. A bitmap (4) is stored inside the poll cache, where each
41 * bit represents a file descriptor. The bits are set if the corresponding
42 * device has a polled event pending. Only fds with their bit set will be
43 * examined on the next poll invocation. The pollstate_t also contains a list
44 * of fd sets (5), which are represented by the pollcacheset_t type. These
45 * structures keep track of the pollfd_t arrays (6) passed in from userland.
46 * Each polled file descriptor has a corresponding polldat_t which can be
47 * chained onto a device's pollhead, and these are kept in a hash table (7)
48 * inside the pollcache_t. The hash table allows efficient conversion of a
49 * given fd to its corresponding polldat_t.
51 * (1) (2)
52 * +-----------+ +-------------+
53 * | kthread_t |--->| pollstate_t |-->+-------------+ (6)
54 * +-----------+ +-------------+(5)| pcacheset_t |->[_][_][_][_] pollfd_t
55 * | +-------------+
56 * | | pcacheset_t |->[_][_][_][_] pollfd_t
57 * (1a) | +-------------+
58 * +---------------+ |
59 * | /dev/poll tbl | |
60 * +-v-------------+ |
61 * | |
62 * +------------------+ |
63 * (7) (3) V v
64 * polldat hash +-------------+ (4) bitmap representing fd space
65 * [_][_][_][_]<----| |--->000010010010001010101010101010110
66 * | | | | | pollcache_t |
67 * . v . . | |
68 * [polldat_t] +-------------+
69 * |
70 * [polldat_t]
71 * |
72 * v
73 * NULL
76 * Both poll system call and /dev/poll use the pollcache_t structure
77 * definition and the routines managing the structure. But poll(2) and
78 * /dev/poll have their own copy of the structures. The /dev/poll driver
79 * table (1a) contains an array of pointers, each pointing at a pollcache_t
80 * struct (3). A device minor number is used as an device table index.
83 #include <sys/poll.h>
85 #if defined(_KERNEL) || defined(_KMEMUSER)
87 #include <sys/thread.h>
88 #include <sys/file.h>
89 #include <sys/port_kernel.h>
91 #ifdef __cplusplus
92 extern "C" {
93 #endif
96 * Typedefs
98 struct pollcache;
99 struct pollstate;
100 struct pcachelink;
101 struct polldat;
103 typedef struct pollcache pollcache_t;
104 typedef struct pollstate pollstate_t;
105 typedef struct pcachelink pcachelink_t;
106 typedef struct polldat polldat_t;
109 * description of pollcacheset structure
111 typedef struct pollcacheset {
112 uintptr_t pcs_usradr; /* usr pollfd array address */
113 pollfd_t *pcs_pollfd; /* cached poll lists */
114 size_t pcs_nfds; /* number of poll fd in cached list */
115 ulong_t pcs_count; /* for LU replacement policy */
116 } pollcacheset_t;
118 #define POLLFDSETS 2
121 * Maximum depth for recusive poll operations.
123 #define POLLMAXDEPTH 5
126 * State information kept by each polling thread
128 struct pollstate {
129 pollfd_t *ps_pollfd; /* hold the current poll list */
130 size_t ps_nfds; /* size of ps_pollfd */
131 kmutex_t ps_lock; /* mutex for sleep/wakeup */
132 pollcache_t *ps_pcache; /* cached poll fd set */
133 pollcacheset_t *ps_pcacheset; /* cached poll lists */
134 int ps_nsets; /* no. of cached poll sets */
135 pollfd_t *ps_dpbuf; /* return pollfd buf used by devpoll */
136 size_t ps_dpbufsize; /* size of ps_dpbuf */
137 int ps_depth; /* epoll recursion depth */
138 pollcache_t *ps_pc_stack[POLLMAXDEPTH]; /* epoll recursion state */
139 pollcache_t *ps_contend_pc; /* pollcache waited on */
140 pollstate_t *ps_contend_nextp; /* next in contender list */
141 pollstate_t **ps_contend_pnextp; /* pointer-to-previous-next */
142 int ps_flags; /* state flags */
145 /* pollstate flags */
146 #define POLLSTATE_STALEMATE 0x1
147 #define POLLSTATE_ULFAIL 0x2
149 /* pollstate_enter results */
150 #define PSE_SUCCESS 0
151 #define PSE_FAIL_DEPTH 1
152 #define PSE_FAIL_LOOP 2
153 #define PSE_FAIL_DEADLOCK 3
154 #define PSE_FAIL_POLLSTATE 4
157 * poll cache size defines
159 #define POLLCHUNKSHIFT 8 /* hash table increment size is 256 */
160 #define POLLHASHCHUNKSZ (1 << POLLCHUNKSHIFT)
161 #define POLLHASHINC 2 /* poll hash table growth factor */
162 #define POLLHASHTHRESHOLD 2 /* poll hash list length threshold */
163 #define POLLHASH(x, y) ((y) % (x)) /* poll hash function */
166 * poll.c assumes the POLLMAPCHUNK is power of 2
168 #define POLLMAPCHUNK 2048 /* bitmap inc -- each for 2K of polled fd's */
171 * used to refrence from watched fd back to the fd position in cached
172 * poll list for quick revents update.
174 typedef struct xref {
175 ssize_t xf_position; /* xref fd position in poll fd list */
176 short xf_refcnt; /* ref cnt of same fd in poll list */
177 } xref_t;
179 #define POLLPOSINVAL (-1L) /* xf_position is invalid */
180 #define POLLPOSTRANS (-2L) /* xf_position is transient state */
183 typedef enum pclstate {
184 PCL_INIT = 0, /* just allocated/zeroed, prior */
185 PCL_VALID, /* linked with both parent and child pollcaches */
186 PCL_STALE, /* still linked but marked stale, pending refresh */
187 PCL_INVALID, /* dissociated from one pollcache, awaiting cleanup */
188 PCL_FREE /* only meant to indicate use-after-free */
189 } pclstate_t;
192 * The pcachelink struct creates an association between parent and child
193 * pollcaches in a recursive /dev/poll operation. Fields are protected by
194 * pcl_lock although manipulation of pcl_child_next or pcl_parent_next also
195 * requires holding pc_lock in the respective pcl_parent_pc or pcl_child_pc
196 * pollcache.
198 struct pcachelink {
199 kmutex_t pcl_lock; /* protects contents */
200 pclstate_t pcl_state; /* status of link entry */
201 int pcl_refcnt; /* ref cnt of linked pcaches */
202 pollcache_t *pcl_child_pc; /* child pollcache */
203 pollcache_t *pcl_parent_pc; /* parent pollcache */
204 pcachelink_t *pcl_child_next; /* next in child list */
205 pcachelink_t *pcl_parent_next; /* next in parents list */
210 * polldat is an entry for a cached poll fd. A polldat struct can be in
211 * poll cache table as well as on pollhead ph_list, which is used by
212 * pollwakeup to wake up a sleeping poller. There should be one polldat
213 * per polled fd hanging off pollstate struct.
215 struct polldat {
216 int pd_fd; /* cached poll fd */
217 int pd_events; /* union of all polled events */
218 file_t *pd_fp; /* used to detect fd reuse */
219 pollhead_t *pd_php; /* used to undo poll registration */
220 kthread_t *pd_thread; /* used for waking up a sleep thrd */
221 pollcache_t *pd_pcache; /* a ptr to the pollcache of this fd */
222 polldat_t *pd_next; /* next on pollhead's ph_list */
223 polldat_t *pd_hashnext; /* next on pollhead's ph_list */
224 int pd_count; /* total count from all ref'ed sets */
225 int pd_nsets; /* num of xref sets, used by poll(2) */
226 xref_t *pd_ref; /* ptr to xref info, 1 for each set */
227 port_kevent_t *pd_portev; /* associated port event struct */
228 uint64_t pd_epolldata; /* epoll data, if any */
232 * One cache for each thread that polls. Points to a bitmap (used by pollwakeup)
233 * and a hash table of polldats.
234 * The offset of pc_lock field must be kept in sync with the pc_lock offset
235 * of port_fdcache_t, both structs implement pc_lock with offset 0 (see also
236 * pollrelock()).
238 struct pollcache {
239 kmutex_t pc_lock; /* lock to protect pollcache */
240 ulong_t *pc_bitmap; /* point to poll fd bitmap */
241 polldat_t **pc_hash; /* points to a hash table of ptrs */
242 int pc_mapend; /* the largest fd encountered so far */
243 int pc_mapsize; /* the size of current map */
244 int pc_hashsize; /* the size of current hash table */
245 int pc_fdcount; /* track how many fd's are hashed */
246 int pc_flag; /* see pc_flag define below */
247 int pc_busy; /* can only exit when its 0 */
248 kmutex_t pc_no_exit; /* protects pc_busy*, can't be nested */
249 kcondvar_t pc_busy_cv; /* cv to wait on if ps_busy != 0 */
250 kcondvar_t pc_cv; /* cv to wait on if needed */
251 pid_t pc_pid; /* for check acc rights, devpoll only */
252 int pc_mapstart; /* where search start, devpoll only */
253 pcachelink_t *pc_parents; /* linked list of epoll parents */
254 pcachelink_t *pc_children; /* linked list of epoll children */
257 /* pc_flag */
258 #define PC_POLLWAKE 0x02 /* pollwakeup() occurred */
260 #if defined(_KERNEL)
262 * Internal routines.
264 extern void pollnotify(pollcache_t *, int);
267 * public poll head interfaces (see poll.h):
269 * pollhead_clean clean up all polldats on a pollhead list
271 extern void pollhead_clean(pollhead_t *);
274 * private poll head interfaces:
276 * pollhead_insert adds a polldat to a pollhead list
277 * pollhead_delete removes a polldat from a pollhead list
279 extern void pollhead_insert(pollhead_t *, polldat_t *);
280 extern void pollhead_delete(pollhead_t *, polldat_t *);
283 * poll state interfaces:
285 * pollstate_create initializes per-thread pollstate
286 * pollstate_destroy cleans up per-thread pollstate
287 * pollstate_enter safely lock pollcache for pollstate
288 * pollstate_exit unlock pollcache from pollstate
290 extern pollstate_t *pollstate_create(void);
291 extern void pollstate_destroy(pollstate_t *);
292 extern int pollstate_enter(pollcache_t *);
293 extern void pollstate_exit(pollcache_t *);
296 * public pcache interfaces:
298 * pcache_alloc allocate a poll cache skeleton
299 * pcache_create creates all poll cache supporting data struct
300 * pcache_insert cache a poll fd, calls pcache_insert_fd
301 * pcache_lookup given an fd list, returns a cookie
302 * pcache_poll polls the cache for fd's having events on them
303 * pcache_clean clean up all the pollhead and fpollinfo reference
304 * pcache_destroy destroys the pcache
306 extern pollcache_t *pcache_alloc();
307 extern void pcache_create(pollcache_t *, nfds_t);
308 extern int pcache_insert(pollstate_t *, file_t *, pollfd_t *, int *, ssize_t,
309 int);
310 extern int pcache_poll(pollfd_t *, pollstate_t *, nfds_t, int *, int);
311 extern void pcache_clean(pollcache_t *);
312 extern void pcache_destroy(pollcache_t *);
315 * private pcache interfaces:
317 * pcache_lookup_fd lookup an fd, returns a polldat
318 * pcache_alloc_fd allocates and returns a polldat
319 * pcache_insert_fd insert an fd into pcache (called by pcache_insert)
320 * pcache_delete_fd insert an fd into pcache (called by pcacheset_delete_fd)
321 * pcache_grow_hashtbl grows the pollcache hash table and rehash
322 * pcache_grow_map grows the pollcache bitmap
323 * pcache_update_xref update cross ref (from polldat back to cacheset) info
324 * pcache_clean_entry cleanup an entry in pcache and more...
325 * pcache_wake_parents wake linked parent pollcaches
327 extern polldat_t *pcache_lookup_fd(pollcache_t *, int);
328 extern polldat_t *pcache_alloc_fd(int);
329 extern void pcache_insert_fd(pollcache_t *, polldat_t *, nfds_t);
330 extern int pcache_delete_fd(pollstate_t *, int, size_t, int, uint_t);
331 extern void pcache_grow_hashtbl(pollcache_t *, nfds_t);
332 extern void pcache_grow_map(pollcache_t *, int);
333 extern void pcache_update_xref(pollcache_t *, int, ssize_t, int);
334 extern void pcache_clean_entry(pollstate_t *, int);
335 extern void pcache_wake_parents(pollcache_t *);
338 * pcacheset interfaces:
340 * pcacheset_create creates new pcachesets (easier for dynamic pcachesets)
341 * pcacheset_destroy destroys a pcacheset
342 * pcacheset_cache_list caches and polls a new poll list
343 * pcacheset_remove_list removes (usually a partial) cached poll list
344 * pcacheset_resolve resolves extant pcacheset and fd list
345 * pcacheset_cmp compares a pcacheset with an fd list
346 * pcacheset_invalidate invalidate entries in pcachesets
347 * pcacheset_reset_count resets the usage counter of pcachesets
348 * pcacheset_replace selects a poll cacheset for replacement
350 extern pollcacheset_t *pcacheset_create(int);
351 extern void pcacheset_destroy(pollcacheset_t *, int);
352 extern int pcacheset_cache_list(pollstate_t *, pollfd_t *, int *, int);
353 extern void pcacheset_remove_list(pollstate_t *, pollfd_t *, int, int, int,
354 int);
355 extern int pcacheset_resolve(pollstate_t *, nfds_t, int *, int);
356 extern int pcacheset_cmp(pollfd_t *, pollfd_t *, pollfd_t *, int);
357 extern void pcacheset_invalidate(pollstate_t *, polldat_t *);
358 extern void pcacheset_reset_count(pollstate_t *, int);
359 extern int pcacheset_replace(pollstate_t *);
361 #endif /* defined(_KERNEL) */
363 #ifdef __cplusplus
365 #endif
367 #endif /* defined(_KERNEL) || defined(_KMEMUSER) */
369 #endif /* _SYS_POLL_IMPL_H */