dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / portfs / port.c
blob6955d2648f005ea096ecf1fd24e97d5b3f1a3db7
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/types.h>
28 #include <sys/systm.h>
29 #include <sys/cred.h>
30 #include <sys/modctl.h>
31 #include <sys/vfs.h>
32 #include <sys/sysmacros.h>
33 #include <sys/cmn_err.h>
34 #include <sys/stat.h>
35 #include <sys/errno.h>
36 #include <sys/kmem.h>
37 #include <sys/file.h>
38 #include <sys/kstat.h>
39 #include <sys/port_impl.h>
40 #include <sys/task.h>
41 #include <sys/project.h>
44 * Event Ports can be shared across threads or across processes.
45 * Every thread/process can use an own event port or a group of them
46 * can use a single port. A major request was also to get the ability
47 * to submit user-defined events to a port. The idea of the
48 * user-defined events is to use the event ports for communication between
49 * threads/processes (like message queues). User defined-events are queued
50 * in a port with the same priority as other event types.
52 * Events are delivered only once. The thread/process which is waiting
53 * for events with the "highest priority" (priority here is related to the
54 * internal strategy to wakeup waiting threads) will retrieve the event,
55 * all other threads/processes will not be notified. There is also
56 * the requirement to have events which should be submitted immediately
57 * to all "waiting" threads. That is the main task of the alert event.
58 * The alert event is submitted by the application to a port. The port
59 * changes from a standard mode to the alert mode. Now all waiting threads
60 * will be awaken immediately and they will return with the alert event.
61 * Threads trying to retrieve events from a port in alert mode will
62 * return immediately with the alert event.
65 * An event port is like a kernel queue, which accept events submitted from
66 * user level as well as events submitted from kernel sub-systems. Sub-systems
67 * able to submit events to a port are the so-called "event sources".
68 * Current event sources:
69 * PORT_SOURCE_AIO : events submitted per transaction completion from
70 * POSIX-I/O framework.
71 * PORT_SOURCE_TIMER : events submitted when a timer fires
72 * (see timer_create(3RT)).
73 * PORT_SOURCE_FD : events submitted per file descriptor (see poll(2)).
74 * PORT_SOURCE_ALERT : events submitted from user. This is not really a
75 * single event, this is actually a port mode
76 * (see port_alert(3c)).
77 * PORT_SOURCE_USER : events submitted by applications with
78 * port_send(3c) or port_sendn(3c).
79 * PORT_SOURCE_FILE : events submitted per file being watched for file
80 * change events (see port_create(3c).
82 * There is a user API implemented in the libc library as well as a
83 * kernel API implemented in port_subr.c in genunix.
84 * The available user API functions are:
85 * port_create() : create a port as a file descriptor of portfs file system
86 * The standard close(2) function closes a port.
87 * port_associate() : associate a file descriptor with a port to be able to
88 * retrieve events from that file descriptor.
89 * port_dissociate(): remove the association of a file descriptor with a port.
90 * port_alert() : set/unset a port in alert mode
91 * port_send() : send an event of type PORT_SOURCE_USER to a port
92 * port_sendn() : send an event of type PORT_SOURCE_USER to a list of ports
93 * port_get() : retrieve a single event from a port
94 * port_getn() : retrieve a list of events from a port
96 * The available kernel API functions are:
97 * port_allocate_event(): allocate an event slot/structure of/from a port
98 * port_init_event() : set event data in the event structure
99 * port_send_event() : send event to a port
100 * port_free_event() : deliver allocated slot/structure back to a port
101 * port_associate_ksource(): associate a kernel event source with a port
102 * port_dissociate_ksource(): dissociate a kernel event source from a port
104 * The libc implementation consists of small functions which pass the
105 * arguments to the kernel using the "portfs" system call. It means, all the
106 * synchronisation work is being done in the kernel. The "portfs" system
107 * call loads the portfs file system into the kernel.
109 * PORT CREATION
110 * The first function to be used is port_create() which internally creates
111 * a vnode and a portfs node. The portfs node is represented by the port_t
112 * structure, which again includes all the data necessary to control a port.
113 * port_create() returns a file descriptor, which needs to be used in almost
114 * all other event port functions.
115 * The maximum number of ports per system is controlled by the resource
116 * control: project:port-max-ids.
118 * EVENT GENERATION
119 * The second step is the triggering of events, which could be sent to a port.
120 * Every event source implements an own method to generate events for a port:
121 * PORT_SOURCE_AIO:
122 * The sigevent structure of the standard POSIX-IO functions
123 * was extended by an additional notification type.
124 * Standard notification types:
125 * SIGEV_NONE, SIGEV_SIGNAL and SIGEV_THREAD
126 * Event ports introduced now SIGEV_PORT.
127 * The notification type SIGEV_PORT specifies that a structure
128 * of type port_notify_t has to be attached to the sigev_value.
129 * The port_notify_t structure contains the event port file
130 * descriptor and a user-defined pointer.
131 * Internally the AIO implementation will use the kernel API
132 * functions to allocate an event port slot per transaction (aiocb)
133 * and sent the event to the port as soon as the transaction completes.
134 * All the events submitted per transaction are of type
135 * PORT_SOURCE_AIO.
136 * PORT_SOURCE_TIMER:
137 * The timer_create() function uses the same method as the
138 * PORT_SOURCE_AIO event source. It also uses the sigevent structure
139 * to deliver the port information.
140 * Internally the timer code will allocate a single event slot/struct
141 * per timer and it will send the timer event as soon as the timer
142 * fires. If the timer-fired event is not delivered to the application
143 * before the next period elapsed, then an overrun counter will be
144 * incremented. The timer event source uses a callback function to
145 * detect the delivery of the event to the application. At that time
146 * the timer callback function will update the event overrun counter.
147 * PORT_SOURCE_FD:
148 * This event source uses the port_associate() function to allocate
149 * an event slot/struct from a port. The application defines in the
150 * events argument of port_associate() the type of events which it is
151 * interested on.
152 * The internal pollwakeup() function is used by all the file
153 * systems --which are supporting the fop_poll() interface- to notify
154 * the upper layer (poll(2), devpoll(7d) and now event ports) about
155 * the event triggered (see valid events in poll(2)).
156 * The pollwakeup() function forwards the event to the layer registered
157 * to receive the current event.
158 * The port_dissociate() function can be used to free the allocated
159 * event slot from the port. Anyway, file descriptors deliver events
160 * only one time and remain deactivated until the application
161 * reactivates the association of a file descriptor with port_associate().
162 * If an associated file descriptor is closed then the file descriptor
163 * will be dissociated automatically from the port.
165 * PORT_SOURCE_ALERT:
166 * This event type is generated when the port was previously set in
167 * alert mode using the port_alert() function.
168 * A single alert event is delivered to every thread which tries to
169 * retrieve events from a port.
170 * PORT_SOURCE_USER:
171 * This type of event is generated from user level using the port_send()
172 * function to send a user event to a port or the port_sendn() function
173 * to send an event to a list of ports.
174 * PORT_SOURCE_FILE:
175 * This event source uses the port_associate() interface to register
176 * a file to be monitored for changes. The file name that needs to be
177 * monitored is specified in the file_obj_t structure, a pointer to which
178 * is passed as an argument. The event types to be monitored are specified
179 * in the events argument.
180 * A file events monitor is represented internal per port per object
181 * address(the file_obj_t pointer). Which means there can be multiple
182 * watches registered on the same file using different file_obj_t
183 * structure pointer. With the help of the FEM(File Event Monitoring)
184 * hooks, the file's vnode ops are intercepted and relevant events
185 * delivered. The port_dissociate() function is used to de-register a
186 * file events monitor on a file. When the specified file is
187 * removed/renamed, the file events watch/monitor is automatically
188 * removed.
190 * EVENT DELIVERY / RETRIEVING EVENTS
191 * Events remain in the port queue until:
192 * - the application uses port_get() or port_getn() to retrieve events,
193 * - the event source cancel the event,
194 * - the event port is closed or
195 * - the process exits.
196 * The maximal number of events in a port queue is the maximal number
197 * of event slots/structures which can be allocated by event sources.
198 * The allocation of event slots/structures is controlled by the resource
199 * control: process.port-max-events.
200 * The port_get() function retrieves a single event and the port_getn()
201 * function retrieves a list of events.
202 * Events are classified as shareable and non-shareable events across processes.
203 * Non-shareable events are invisible for the port_get(n)() functions of
204 * processes other than the owner of the event.
205 * Shareable event types are:
206 * PORT_SOURCE_USER events
207 * This type of event is unconditionally shareable and without
208 * limitations. If the parent process sends a user event and closes
209 * the port afterwards, the event remains in the port and the child
210 * process will still be able to retrieve the user event.
211 * PORT_SOURCE_ALERT events
212 * This type of event is shareable between processes.
213 * Limitation: The alert mode of the port is removed if the owner
214 * (process which set the port in alert mode) of the
215 * alert event closes the port.
216 * PORT_SOURCE_FD events
217 * This type of event is conditional shareable between processes.
218 * After fork(2) all forked file descriptors are shareable between
219 * the processes. The child process is allowed to retrieve events
220 * from the associated file descriptors and it can also re-associate
221 * the fd with the port.
222 * Limitations: The child process is not allowed to dissociate
223 * the file descriptor from the port. Only the
224 * owner (process) of the association is allowed to
225 * dissociate the file descriptor from the port.
226 * If the owner of the association closes the port
227 * the association will be removed.
228 * PORT_SOURCE_AIO events
229 * This type of event is not shareable between processes.
230 * PORT_SOURCE_TIMER events
231 * This type of event is not shareable between processes.
232 * PORT_SOURCE_FILE events
233 * This type of event is not shareable between processes.
235 * FORK BEHAVIOUR
236 * On fork(2) the child process inherits all opened file descriptors from
237 * the parent process. This is also valid for port file descriptors.
238 * Associated file descriptors with a port maintain the association across the
239 * fork(2). It means, the child process gets full access to the port and
240 * it can retrieve events from all common associated file descriptors.
241 * Events of file descriptors created and associated with a port after the
242 * fork(2) are non-shareable and can only be retrieved by the same process.
244 * If the parent or the child process closes an exported port (using fork(2)
245 * or I_SENDFD) all the file descriptors associated with the port by the
246 * process will be dissociated from the port. Events of dissociated file
247 * descriptors as well as all non-shareable events will be discarded.
248 * The other process can continue working with the port as usual.
250 * CLOSING A PORT
251 * close(2) has to be used to close a port. See FORK BEHAVIOUR for details.
253 * PORT EVENT STRUCTURES
254 * The global control structure of the event ports framework is port_control_t.
255 * port_control_t keeps track of the number of created ports in the system.
256 * The cache of the port event structures is also located in port_control_t.
258 * On port_create() the vnode and the portfs node is also created.
259 * The portfs node is represented by the port_t structure.
260 * The port_t structure manages all port specific tasks:
261 * - management of resource control values
262 * - port fop_poll interface
263 * - creation time
264 * - uid and gid of the port
266 * The port_t structure contains the port_queue_t structure.
267 * The port_queue_t structure contains all the data necessary for the
268 * queue management:
269 * - locking
270 * - condition variables
271 * - event counters
272 * - submitted events (represented by port_kevent_t structures)
273 * - threads waiting for event delivery (check portget_t structure)
274 * - PORT_SOURCE_FD cache (managed by the port_fdcache_t structure)
275 * - event source management (managed by the port_source_t structure)
276 * - alert mode management (check port_alert_t structure)
278 * EVENT MANAGEMENT
279 * The event port file system creates a kmem_cache for internal allocation of
280 * event port structures.
282 * 1. Event source association with a port:
283 * The first step to do for event sources is to get associated with a port
284 * using the port_associate_ksource() function or adding an entry to the
285 * port_ksource_tab[]. An event source can get dissociated from a port
286 * using the port_dissociate_ksource() function. An entry in the
287 * port_ksource_tab[] implies that the source will be associated
288 * automatically with every new created port.
289 * The event source can deliver a callback function, which is used by the
290 * port to notify the event source about close(2). The idea is that
291 * in such a case the event source should free all allocated resources
292 * and it must return to the port all allocated slots/structures.
293 * The port_close() function will wait until all allocated event
294 * structures/slots are returned to the port.
295 * The callback function is not necessary when the event source does not
296 * maintain local resources, a second condition is that the event source
297 * can guarantee that allocated event slots will be returned without
298 * delay to the port (it will not block and sleep somewhere).
300 * 2. Reservation of an event slot / event structure
301 * The event port reliability is based on the reservation of an event "slot"
302 * (allocation of an event structure) by the event source as part of the
303 * application call. If the maximal number of event slots is exhausted then
304 * the event source can return a corresponding error code to the application.
306 * The port_alloc_event() function has to be used by event sources to
307 * allocate an event slot (reserve an event structure). The port_alloc_event()
308 * doesn not block and it will return a 0 value on success or an error code
309 * if it fails.
310 * An argument of port_alloc_event() is a flag which determines the behavior
311 * of the event after it was delivered to the application:
312 * PORT_ALLOC_DEFAULT : event slot becomes free after delivery to the
313 * application.
314 * PORT_ALLOC_PRIVATE : event slot remains under the control of the event
315 * source. This kind of slots can not be used for
316 * event delivery and should only be used internally
317 * by the event source.
318 * PORT_KEV_CACHED : event slot remains under the control of an event
319 * port cache. It does not become free after delivery
320 * to the application.
321 * PORT_ALLOC_SCACHED : event slot remains under the control of the event
322 * source. The event source takes the control over
323 * the slot after the event is delivered to the
324 * application.
326 * 3. Delivery of events to the event port
327 * Earlier allocated event structure/slot has to be used to deliver
328 * event data to the port. Event source has to use the function
329 * port_send_event(). The single argument is a pointer to the previously
330 * reserved event structure/slot.
331 * The portkev_events field of the port_kevent_t structure can be updated/set
332 * in two ways:
333 * 1. using the port_set_event() function, or
334 * 2. updating the portkev_events field out of the callback function:
335 * The event source can deliver a callback function to the port as an
336 * argument of port_init_event().
337 * One of the arguments of the callback function is a pointer to the
338 * events field, which will be delivered to the application.
339 * (see Delivery of events to the application).
340 * Event structures/slots can be delivered to the event port only one time,
341 * they remain blocked until the data is delivered to the application and the
342 * slot becomes free or it is delivered back to the event source
343 * (PORT_ALLOC_SCACHED). The activation of the callback function mentioned above
344 * is at the same time the indicator for the event source that the event
345 * structure/slot is free for reuse.
347 * 4. Delivery of events to the application
348 * The events structures/slots delivered by event sources remain in the
349 * port queue until they are retrieved by the application or the port
350 * is closed (exit(2) also closes all opened file descriptors)..
351 * The application uses port_get() or port_getn() to retrieve events from
352 * a port. port_get() retrieves a single event structure/slot and port_getn()
353 * retrieves a list of event structures/slots.
354 * Both functions are able to poll for events and return immediately or they
355 * can specify a timeout value.
356 * Before the events are delivered to the application they are moved to a
357 * second temporary internal queue. The idea is to avoid lock collisions or
358 * contentions of the global queue lock.
359 * The global queue lock is used every time when an event source delivers
360 * new events to the port.
361 * The port_get() and port_getn() functions
362 * a) retrieve single events from the temporary queue,
363 * b) prepare the data to be passed to the application memory,
364 * c) activate the callback function of the event sources:
365 * - to get the latest event data,
366 * - the event source can free all allocated resources associated with the
367 * current event,
368 * - the event source can re-use the current event slot/structure
369 * - the event source can deny the delivery of the event to the application
370 * (e.g. because of the wrong process).
371 * d) put the event back to the temporary queue if the event delivery was denied
372 * e) repeat a) until d) as long as there are events in the queue and
373 * there is enough user space available.
375 * The loop described above could block for a very long time the global mutex,
376 * to avoid that a second mutex was introduced to synchronized concurrent
377 * threads accessing the temporary queue.
380 static int64_t portfs(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
381 uintptr_t);
383 static struct sysent port_sysent = {
385 SE_ARGC | SE_64RVAL | SE_NOUNLOAD,
386 (int (*)())portfs,
389 static struct modlsys modlsys = {
390 &mod_syscallops, "event ports", &port_sysent
393 #ifdef _SYSCALL32_IMPL
395 static int64_t
396 portfs32(uint32_t arg1, int32_t arg2, uint32_t arg3, uint32_t arg4,
397 uint32_t arg5, uint32_t arg6);
399 static struct sysent port_sysent32 = {
401 SE_ARGC | SE_64RVAL | SE_NOUNLOAD,
402 (int (*)())portfs32,
405 static struct modlsys modlsys32 = {
406 &mod_syscallops32,
407 "32-bit event ports syscalls",
408 &port_sysent32
410 #endif /* _SYSCALL32_IMPL */
412 static struct modlinkage modlinkage = {
413 MODREV_1,
414 &modlsys,
415 #ifdef _SYSCALL32_IMPL
416 &modlsys32,
417 #endif
418 NULL
421 port_kstat_t port_kstat = {
422 { "ports", KSTAT_DATA_UINT32 }
425 dev_t portdev;
426 extern const struct vnodeops port_vnodeops;
427 struct vfs port_vfs;
429 extern rctl_hndl_t rc_process_portev;
430 extern rctl_hndl_t rc_project_portids;
431 extern void aio_close_port(void *, int, pid_t, int);
434 * This table contains a list of event sources which need a static
435 * association with a port (every port).
436 * The last NULL entry in the table is required to detect "end of table".
438 struct port_ksource port_ksource_tab[] = {
439 {PORT_SOURCE_AIO, aio_close_port, NULL, NULL},
440 {0, NULL, NULL, NULL}
443 /* local functions */
444 static int port_getn(port_t *, port_event_t *, uint_t, uint_t *,
445 port_gettimer_t *);
446 static int port_sendn(int [], int [], uint_t, int, void *, uint_t *);
447 static int port_alert(port_t *, int, int, void *);
448 static int port_dispatch_event(port_t *, int, int, int, uintptr_t, void *);
449 static int port_send(port_t *, int, int, void *);
450 static int port_create(int *);
451 static int port_get_alert(port_alert_t *, port_event_t *);
452 static int port_copy_event(port_event_t *, port_kevent_t *, list_t *);
453 static int *port_errorn(int *, int, int, int);
454 static int port_noshare(void *, int *, pid_t, int, void *);
455 static int port_get_timeout(timespec_t *, timespec_t *, timespec_t **, int *,
456 int);
457 static void port_init(port_t *);
458 static void port_remove_alert(port_queue_t *);
459 static void port_add_ksource_local(port_t *, port_ksource_t *);
460 static void port_check_return_cond(port_queue_t *);
461 static void port_dequeue_thread(port_queue_t *, portget_t *);
462 static portget_t *port_queue_thread(port_queue_t *, uint_t);
463 static void port_kstat_init(void);
465 #ifdef _SYSCALL32_IMPL
466 static int port_copy_event32(port_event32_t *, port_kevent_t *, list_t *);
467 #endif
469 /* yes, we want defaults */
470 static const struct vfsops port_vfsops;
473 _init(void)
475 major_t major;
477 if ((major = getudev()) == (major_t)-1)
478 return (ENXIO);
479 portdev = makedevice(major, 0);
481 vfs_setops(&port_vfs, &port_vfsops);
482 port_vfs.vfs_flag = VFS_RDONLY;
483 port_vfs.vfs_dev = portdev;
484 vfs_make_fsid(&(port_vfs.vfs_fsid), portdev, 0);
486 mutex_init(&port_control.pc_mutex, NULL, MUTEX_DEFAULT, NULL);
487 port_control.pc_nents = 0; /* number of active ports */
489 /* create kmem_cache for port event structures */
490 port_control.pc_cache = kmem_cache_create("port_cache",
491 sizeof (port_kevent_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
493 port_kstat_init(); /* init port kstats */
494 return (mod_install(&modlinkage));
498 _info(struct modinfo *modinfop)
500 return (mod_info(&modlinkage, modinfop));
504 * System call wrapper for all port related system calls from 32-bit programs.
506 #ifdef _SYSCALL32_IMPL
507 static int64_t
508 portfs32(uint32_t opcode, int32_t a0, uint32_t a1, uint32_t a2, uint32_t a3,
509 uint32_t a4)
511 int64_t error;
513 switch (opcode & PORT_CODE_MASK) {
514 case PORT_GET:
515 error = portfs(PORT_GET, a0, a1, (int)a2, (int)a3, a4);
516 break;
517 case PORT_SENDN:
518 error = portfs(opcode, (uint32_t)a0, a1, a2, a3, a4);
519 break;
520 default:
521 error = portfs(opcode, a0, a1, a2, a3, a4);
522 break;
524 return (error);
526 #endif /* _SYSCALL32_IMPL */
529 * System entry point for port functions.
530 * a0 is a port file descriptor (except for PORT_SENDN and PORT_CREATE).
531 * The libc uses PORT_SYS_NOPORT in functions which do not deliver a
532 * port file descriptor as first argument.
534 static int64_t
535 portfs(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3,
536 uintptr_t a4)
538 rval_t r;
539 port_t *pp;
540 int error = 0;
541 uint_t nget;
542 file_t *fp;
543 port_gettimer_t port_timer;
545 r.r_vals = 0;
546 if (opcode & PORT_SYS_NOPORT) {
547 opcode &= PORT_CODE_MASK;
548 if (opcode == PORT_SENDN) {
549 error = port_sendn((int *)a0, (int *)a1, (uint_t)a2,
550 (int)a3, (void *)a4, (uint_t *)&r.r_val1);
551 if (error && (error != EIO))
552 return ((int64_t)set_errno(error));
553 return (r.r_vals);
556 if (opcode == PORT_CREATE) {
557 error = port_create(&r.r_val1);
558 if (error)
559 return ((int64_t)set_errno(error));
560 return (r.r_vals);
564 /* opcodes using port as first argument (a0) */
566 if ((fp = getf((int)a0)) == NULL)
567 return ((uintptr_t)set_errno(EBADF));
569 if (fp->f_vnode->v_type != VPORT) {
570 releasef((int)a0);
571 return ((uintptr_t)set_errno(EBADFD));
574 pp = VTOEP(fp->f_vnode);
576 switch (opcode & PORT_CODE_MASK) {
577 case PORT_GET:
579 /* see PORT_GETN description */
580 struct timespec timeout;
582 port_timer.pgt_flags = PORTGET_ONE;
583 port_timer.pgt_loop = 0;
584 port_timer.pgt_rqtp = NULL;
585 if (a4 != (uintptr_t)NULL) {
586 port_timer.pgt_timeout = &timeout;
587 timeout.tv_sec = (time_t)a2;
588 timeout.tv_nsec = (long)a3;
589 } else {
590 port_timer.pgt_timeout = NULL;
592 do {
593 nget = 1;
594 error = port_getn(pp, (port_event_t *)a1, 1,
595 (uint_t *)&nget, &port_timer);
596 } while (nget == 0 && error == 0 && port_timer.pgt_loop);
597 break;
599 case PORT_GETN:
602 * port_getn() can only retrieve own or shareable events from
603 * other processes. The port_getn() function remains in the
604 * kernel until own or shareable events are available or the
605 * timeout elapses.
607 port_timer.pgt_flags = 0;
608 port_timer.pgt_loop = 0;
609 port_timer.pgt_rqtp = NULL;
610 port_timer.pgt_timeout = (struct timespec *)a4;
611 do {
612 nget = a3;
613 error = port_getn(pp, (port_event_t *)a1, (uint_t)a2,
614 (uint_t *)&nget, &port_timer);
615 } while (nget == 0 && error == 0 && port_timer.pgt_loop);
616 r.r_val1 = nget;
617 r.r_val2 = error;
618 releasef((int)a0);
619 if (error && error != ETIME)
620 return ((int64_t)set_errno(error));
621 return (r.r_vals);
623 case PORT_ASSOCIATE:
625 switch ((int)a1) {
626 case PORT_SOURCE_FD:
627 error = port_associate_fd(pp, (int)a1, (uintptr_t)a2,
628 (int)a3, (void *)a4);
629 break;
630 case PORT_SOURCE_FILE:
631 error = port_associate_fop(pp, (int)a1, (uintptr_t)a2,
632 (int)a3, (void *)a4);
633 break;
634 default:
635 error = EINVAL;
636 break;
638 break;
640 case PORT_SEND:
642 /* user-defined events */
643 error = port_send(pp, PORT_SOURCE_USER, (int)a1, (void *)a2);
644 break;
646 case PORT_DISPATCH:
649 * library events, blocking
650 * Only events of type PORT_SOURCE_AIO or PORT_SOURCE_MQ
651 * are currently allowed.
653 if ((int)a1 != PORT_SOURCE_AIO && (int)a1 != PORT_SOURCE_MQ) {
654 error = EINVAL;
655 break;
657 error = port_dispatch_event(pp, (int)opcode, (int)a1, (int)a2,
658 (uintptr_t)a3, (void *)a4);
659 break;
661 case PORT_DISSOCIATE:
663 switch ((int)a1) {
664 case PORT_SOURCE_FD:
665 error = port_dissociate_fd(pp, (uintptr_t)a2);
666 break;
667 case PORT_SOURCE_FILE:
668 error = port_dissociate_fop(pp, (uintptr_t)a2);
669 break;
670 default:
671 error = EINVAL;
672 break;
674 break;
676 case PORT_ALERT:
678 if ((int)a2) /* a2 = events */
679 error = port_alert(pp, (int)a1, (int)a2, (void *)a3);
680 else
681 port_remove_alert(&pp->port_queue);
682 break;
684 default:
685 error = EINVAL;
686 break;
689 releasef((int)a0);
690 if (error)
691 return ((int64_t)set_errno(error));
692 return (r.r_vals);
696 * System call to create a port.
698 * The port_create() function creates a vnode of type VPORT per port.
699 * The port control data is associated with the vnode as vnode private data.
700 * The port_create() function returns an event port file descriptor.
702 static int
703 port_create(int *fdp)
705 port_t *pp;
706 vnode_t *vp;
707 struct file *fp;
708 proc_t *p = curproc;
710 /* initialize vnode and port private data */
711 pp = kmem_zalloc(sizeof (port_t), KM_SLEEP);
713 pp->port_vnode = vn_alloc(KM_SLEEP);
714 vp = EPTOV(pp);
715 vn_setops(vp, &port_vnodeops);
716 vp->v_type = VPORT;
717 vp->v_vfsp = &port_vfs;
718 vp->v_data = (caddr_t)pp;
720 mutex_enter(&port_control.pc_mutex);
722 * Retrieve the maximal number of event ports allowed per system from
723 * the resource control: project.port-max-ids.
725 mutex_enter(&p->p_lock);
726 if (rctl_test(rc_project_portids, p->p_task->tk_proj->kpj_rctls, p,
727 port_control.pc_nents + 1, RCA_SAFE) & RCT_DENY) {
728 mutex_exit(&p->p_lock);
729 vn_free(vp);
730 kmem_free(pp, sizeof (port_t));
731 mutex_exit(&port_control.pc_mutex);
732 return (EAGAIN);
736 * Retrieve the maximal number of events allowed per port from
737 * the resource control: process.port-max-events.
739 pp->port_max_events = rctl_enforced_value(rc_process_portev,
740 p->p_rctls, p);
741 mutex_exit(&p->p_lock);
743 /* allocate a new user file descriptor and a file structure */
744 if (falloc(vp, FREAD|FWRITE, &fp, fdp)) {
746 * If the file table is full, free allocated resources.
748 vn_free(vp);
749 kmem_free(pp, sizeof (port_t));
750 mutex_exit(&port_control.pc_mutex);
751 return (EMFILE);
754 mutex_exit(&fp->f_tlock);
756 pp->port_fd = *fdp;
757 port_control.pc_nents++;
758 p->p_portcnt++;
759 port_kstat.pks_ports.value.ui32++;
760 mutex_exit(&port_control.pc_mutex);
762 /* initializes port private data */
763 port_init(pp);
764 /* set user file pointer */
765 setf(*fdp, fp);
766 return (0);
770 * port_init() initializes event port specific data
772 static void
773 port_init(port_t *pp)
775 port_queue_t *portq;
776 port_ksource_t *pks;
778 mutex_init(&pp->port_mutex, NULL, MUTEX_DEFAULT, NULL);
779 portq = &pp->port_queue;
780 mutex_init(&portq->portq_mutex, NULL, MUTEX_DEFAULT, NULL);
781 pp->port_flags |= PORT_INIT;
784 * If it is not enough memory available to satisfy a user
785 * request using a single port_getn() call then port_getn()
786 * will reduce the size of the list to PORT_MAX_LIST.
788 pp->port_max_list = port_max_list;
790 /* Set timestamp entries required for fstat(2) requests */
791 gethrestime(&pp->port_ctime);
792 pp->port_uid = crgetuid(curproc->p_cred);
793 pp->port_gid = crgetgid(curproc->p_cred);
795 /* initialize port queue structs */
796 list_create(&portq->portq_list, sizeof (port_kevent_t),
797 offsetof(port_kevent_t, portkev_node));
798 list_create(&portq->portq_get_list, sizeof (port_kevent_t),
799 offsetof(port_kevent_t, portkev_node));
800 portq->portq_flags = 0;
801 pp->port_pid = curproc->p_pid;
803 /* Allocate cache skeleton for PORT_SOURCE_FD events */
804 portq->portq_pcp = kmem_zalloc(sizeof (port_fdcache_t), KM_SLEEP);
805 mutex_init(&portq->portq_pcp->pc_lock, NULL, MUTEX_DEFAULT, NULL);
808 * Allocate cache skeleton for association of event sources.
810 mutex_init(&portq->portq_source_mutex, NULL, MUTEX_DEFAULT, NULL);
811 portq->portq_scache = kmem_zalloc(
812 PORT_SCACHE_SIZE * sizeof (port_source_t *), KM_SLEEP);
815 * pre-associate some kernel sources with this port.
816 * The pre-association is required to create port_source_t
817 * structures for object association.
818 * Some sources can not get associated with a port before the first
819 * object association is requested. Another reason to pre_associate
820 * a particular source with a port is because of performance.
823 for (pks = port_ksource_tab; pks->pks_source != 0; pks++)
824 port_add_ksource_local(pp, pks);
828 * The port_add_ksource_local() function is being used to associate
829 * event sources with every new port.
830 * The event sources need to be added to port_ksource_tab[].
832 static void
833 port_add_ksource_local(port_t *pp, port_ksource_t *pks)
835 port_source_t *pse;
836 port_source_t **ps;
838 mutex_enter(&pp->port_queue.portq_source_mutex);
839 ps = &pp->port_queue.portq_scache[PORT_SHASH(pks->pks_source)];
840 for (pse = *ps; pse != NULL; pse = pse->portsrc_next) {
841 if (pse->portsrc_source == pks->pks_source)
842 break;
845 if (pse == NULL) {
846 /* associate new source with the port */
847 pse = kmem_zalloc(sizeof (port_source_t), KM_SLEEP);
848 pse->portsrc_source = pks->pks_source;
849 pse->portsrc_close = pks->pks_close;
850 pse->portsrc_closearg = pks->pks_closearg;
851 pse->portsrc_cnt = 1;
853 pks->pks_portsrc = pse;
854 if (*ps != NULL)
855 pse->portsrc_next = (*ps)->portsrc_next;
856 *ps = pse;
858 mutex_exit(&pp->port_queue.portq_source_mutex);
862 * The port_send() function sends an event of type "source" to a
863 * port. This function is non-blocking. An event can be sent to
864 * a port as long as the number of events per port does not achieve the
865 * maximal allowed number of events. The max. number of events per port is
866 * defined by the resource control process.max-port-events.
867 * This function is used by the port library function port_send()
868 * and port_dispatch(). The port_send(3c) function is part of the
869 * event ports API and submits events of type PORT_SOURCE_USER. The
870 * port_dispatch() function is project private and it is used by library
871 * functions to submit events of other types than PORT_SOURCE_USER
872 * (e.g. PORT_SOURCE_AIO).
874 static int
875 port_send(port_t *pp, int source, int events, void *user)
877 port_kevent_t *pev;
878 int error;
880 error = port_alloc_event_local(pp, source, PORT_ALLOC_DEFAULT, &pev);
881 if (error)
882 return (error);
884 pev->portkev_object = 0;
885 pev->portkev_events = events;
886 pev->portkev_user = user;
887 pev->portkev_callback = NULL;
888 pev->portkev_arg = NULL;
889 pev->portkev_flags = 0;
891 port_send_event(pev);
892 return (0);
896 * The port_noshare() function returns 0 if the current event was generated
897 * by the same process. Otherwise is returns a value other than 0 and the
898 * event should not be delivered to the current processe.
899 * The port_noshare() function is normally used by the port_dispatch()
900 * function. The port_dispatch() function is project private and can only be
901 * used within the event port project.
902 * Currently the libaio uses the port_dispatch() function to deliver events
903 * of types PORT_SOURCE_AIO.
905 /* ARGSUSED */
906 static int
907 port_noshare(void *arg, int *events, pid_t pid, int flag, void *evp)
909 if (flag == PORT_CALLBACK_DEFAULT && curproc->p_pid != pid)
910 return (1);
911 return (0);
915 * The port_dispatch_event() function is project private and it is used by
916 * libraries involved in the project to deliver events to the port.
917 * port_dispatch will sleep and wait for enough resources to satisfy the
918 * request, if necessary.
919 * The library can specify if the delivered event is shareable with other
920 * processes (see PORT_SYS_NOSHARE flag).
922 static int
923 port_dispatch_event(port_t *pp, int opcode, int source, int events,
924 uintptr_t object, void *user)
926 port_kevent_t *pev;
927 int error;
929 error = port_alloc_event_block(pp, source, PORT_ALLOC_DEFAULT, &pev);
930 if (error)
931 return (error);
933 pev->portkev_object = object;
934 pev->portkev_events = events;
935 pev->portkev_user = user;
936 pev->portkev_arg = NULL;
937 if (opcode & PORT_SYS_NOSHARE) {
938 pev->portkev_flags = PORT_KEV_NOSHARE;
939 pev->portkev_callback = port_noshare;
940 } else {
941 pev->portkev_flags = 0;
942 pev->portkev_callback = NULL;
945 port_send_event(pev);
946 return (0);
951 * The port_sendn() function is the kernel implementation of the event
952 * port API function port_sendn(3c).
953 * This function is able to send an event to a list of event ports.
955 static int
956 port_sendn(int ports[], int errors[], uint_t nent, int events, void *user,
957 uint_t *nget)
959 port_kevent_t *pev;
960 int errorcnt = 0;
961 int error = 0;
962 int count;
963 int port;
964 int *plist;
965 int *elist = NULL;
966 file_t *fp;
967 port_t *pp;
969 if (nent == 0 || nent > port_max_list)
970 return (EINVAL);
972 plist = kmem_alloc(nent * sizeof (int), KM_SLEEP);
973 if (copyin((void *)ports, plist, nent * sizeof (int))) {
974 kmem_free(plist, nent * sizeof (int));
975 return (EFAULT);
979 * Scan the list for event port file descriptors and send the
980 * attached user event data embedded in a event of type
981 * PORT_SOURCE_USER to every event port in the list.
982 * If a list entry is not a valid event port then the corresponding
983 * error code will be stored in the errors[] list with the same
984 * list offset as in the ports[] list.
987 for (count = 0; count < nent; count++) {
988 port = plist[count];
989 if ((fp = getf(port)) == NULL) {
990 elist = port_errorn(elist, nent, EBADF, count);
991 errorcnt++;
992 continue;
995 pp = VTOEP(fp->f_vnode);
996 if (fp->f_vnode->v_type != VPORT) {
997 releasef(port);
998 elist = port_errorn(elist, nent, EBADFD, count);
999 errorcnt++;
1000 continue;
1003 error = port_alloc_event_local(pp, PORT_SOURCE_USER,
1004 PORT_ALLOC_DEFAULT, &pev);
1005 if (error) {
1006 releasef(port);
1007 elist = port_errorn(elist, nent, error, count);
1008 errorcnt++;
1009 continue;
1012 pev->portkev_object = 0;
1013 pev->portkev_events = events;
1014 pev->portkev_user = user;
1015 pev->portkev_callback = NULL;
1016 pev->portkev_arg = NULL;
1017 pev->portkev_flags = 0;
1019 port_send_event(pev);
1020 releasef(port);
1022 if (errorcnt) {
1023 error = EIO;
1024 if (copyout(elist, (void *)errors, nent * sizeof (int)))
1025 error = EFAULT;
1026 kmem_free(elist, nent * sizeof (int));
1028 *nget = nent - errorcnt;
1029 kmem_free(plist, nent * sizeof (int));
1030 return (error);
1033 static int *
1034 port_errorn(int *elist, int nent, int error, int index)
1036 if (elist == NULL)
1037 elist = kmem_zalloc(nent * sizeof (int), KM_SLEEP);
1038 elist[index] = error;
1039 return (elist);
1043 * port_alert()
1044 * The port_alert() funcion is a high priority event and it is always set
1045 * on top of the queue. It is also delivered as single event.
1046 * flags:
1047 * - SET :overwrite current alert data
1048 * - UPDATE:set alert data or return EBUSY if alert mode is already set
1050 * - set the ALERT flag
1051 * - wakeup all sleeping threads
1053 static int
1054 port_alert(port_t *pp, int flags, int events, void *user)
1056 port_queue_t *portq;
1057 portget_t *pgetp;
1058 port_alert_t *pa;
1060 if ((flags & PORT_ALERT_INVALID) == PORT_ALERT_INVALID)
1061 return (EINVAL);
1063 portq = &pp->port_queue;
1064 pa = &portq->portq_alert;
1065 mutex_enter(&portq->portq_mutex);
1067 /* check alert conditions */
1068 if (flags == PORT_ALERT_UPDATE) {
1069 if (portq->portq_flags & PORTQ_ALERT) {
1070 mutex_exit(&portq->portq_mutex);
1071 return (EBUSY);
1076 * Store alert data in the port to be delivered to threads
1077 * which are using port_get(n) to retrieve events.
1080 portq->portq_flags |= PORTQ_ALERT;
1081 pa->portal_events = events; /* alert info */
1082 pa->portal_pid = curproc->p_pid; /* process owner */
1083 pa->portal_object = 0; /* no object */
1084 pa->portal_user = user; /* user alert data */
1086 /* alert and deliver alert data to waiting threads */
1087 pgetp = portq->portq_thread;
1088 if (pgetp == NULL) {
1089 /* no threads waiting for events */
1090 mutex_exit(&portq->portq_mutex);
1091 return (0);
1095 * Set waiting threads in alert mode (PORTGET_ALERT)..
1096 * Every thread waiting for events already allocated a portget_t
1097 * structure to sleep on.
1098 * The port alert arguments are stored in the portget_t structure.
1099 * The PORTGET_ALERT flag is set to indicate the thread to return
1100 * immediately with the alert event.
1102 do {
1103 if ((pgetp->portget_state & PORTGET_ALERT) == 0) {
1104 pa = &pgetp->portget_alert;
1105 pa->portal_events = events;
1106 pa->portal_object = 0;
1107 pa->portal_user = user;
1108 pgetp->portget_state |= PORTGET_ALERT;
1109 cv_signal(&pgetp->portget_cv);
1111 } while ((pgetp = pgetp->portget_next) != portq->portq_thread);
1112 mutex_exit(&portq->portq_mutex);
1113 return (0);
1117 * Clear alert state of the port
1119 static void
1120 port_remove_alert(port_queue_t *portq)
1122 mutex_enter(&portq->portq_mutex);
1123 portq->portq_flags &= ~PORTQ_ALERT;
1124 mutex_exit(&portq->portq_mutex);
1128 * The port_getn() function is used to retrieve events from a port.
1130 * The port_getn() function returns immediately if there are enough events
1131 * available in the port to satisfy the request or if the port is in alert
1132 * mode (see port_alert(3c)).
1133 * The timeout argument of port_getn(3c) -which is embedded in the
1134 * port_gettimer_t structure- specifies if the system call should block or if it
1135 * should return immediately depending on the number of events available.
1136 * This function is internally used by port_getn(3c) as well as by
1137 * port_get(3c).
1139 static int
1140 port_getn(port_t *pp, port_event_t *uevp, uint_t max, uint_t *nget,
1141 port_gettimer_t *pgt)
1143 port_queue_t *portq;
1144 port_kevent_t *pev;
1145 port_kevent_t *lev;
1146 int error = 0;
1147 uint_t nmax;
1148 uint_t nevents;
1149 uint_t eventsz;
1150 port_event_t *kevp;
1151 list_t *glist;
1152 uint_t tnent;
1153 int rval;
1154 int blocking = -1;
1155 int timecheck;
1156 int flag;
1157 timespec_t rqtime;
1158 timespec_t *rqtp = NULL;
1159 portget_t *pgetp;
1160 void *results;
1161 model_t model = get_udatamodel();
1163 flag = pgt->pgt_flags;
1165 if (*nget > max && max > 0)
1166 return (EINVAL);
1168 portq = &pp->port_queue;
1169 mutex_enter(&portq->portq_mutex);
1170 if (max == 0) {
1172 * Return number of objects with events.
1173 * The port_block() call is required to synchronize this
1174 * thread with another possible thread, which could be
1175 * retrieving events from the port queue.
1177 port_block(portq);
1179 * Check if a second thread is currently retrieving events
1180 * and it is using the temporary event queue.
1182 if (portq->portq_tnent) {
1183 /* put remaining events back to the port queue */
1184 port_push_eventq(portq);
1186 *nget = portq->portq_nent;
1187 port_unblock(portq);
1188 mutex_exit(&portq->portq_mutex);
1189 return (0);
1192 if (uevp == NULL) {
1193 mutex_exit(&portq->portq_mutex);
1194 return (EFAULT);
1196 if (*nget == 0) { /* no events required */
1197 mutex_exit(&portq->portq_mutex);
1198 return (0);
1201 /* port is being closed ... */
1202 if (portq->portq_flags & PORTQ_CLOSE) {
1203 mutex_exit(&portq->portq_mutex);
1204 return (EBADFD);
1207 /* return immediately if port in alert mode */
1208 if (portq->portq_flags & PORTQ_ALERT) {
1209 error = port_get_alert(&portq->portq_alert, uevp);
1210 if (error == 0)
1211 *nget = 1;
1212 mutex_exit(&portq->portq_mutex);
1213 return (error);
1216 portq->portq_thrcnt++;
1219 * Now check if the completed events satisfy the
1220 * "wait" requirements of the current thread:
1223 if (pgt->pgt_loop) {
1225 * loop entry of same thread
1226 * pgt_loop is set when the current thread returns
1227 * prematurely from this function. That could happen
1228 * when a port is being shared between processes and
1229 * this thread could not find events to return.
1230 * It is not allowed to a thread to retrieve non-shareable
1231 * events generated in other processes.
1232 * PORTQ_WAIT_EVENTS is set when a thread already
1233 * checked the current event queue and no new events
1234 * are added to the queue.
1236 if (((portq->portq_flags & PORTQ_WAIT_EVENTS) == 0) &&
1237 (portq->portq_nent >= *nget)) {
1238 /* some new events arrived ...check them */
1239 goto portnowait;
1241 rqtp = pgt->pgt_rqtp;
1242 timecheck = pgt->pgt_timecheck;
1243 pgt->pgt_flags |= PORTGET_WAIT_EVENTS;
1244 } else {
1245 /* check if enough events are available ... */
1246 if (portq->portq_nent >= *nget)
1247 goto portnowait;
1249 * There are not enough events available to satisfy
1250 * the request, check timeout value and wait for
1251 * incoming events.
1253 error = port_get_timeout(pgt->pgt_timeout, &rqtime, &rqtp,
1254 &blocking, flag);
1255 if (error) {
1256 port_check_return_cond(portq);
1257 mutex_exit(&portq->portq_mutex);
1258 return (error);
1261 if (blocking == 0) /* don't block, check fired events */
1262 goto portnowait;
1264 if (rqtp != NULL) {
1265 timespec_t now;
1266 timecheck = timechanged;
1267 gethrestime(&now);
1268 timespecadd(rqtp, &now);
1272 /* enqueue thread in the list of waiting threads */
1273 pgetp = port_queue_thread(portq, *nget);
1276 /* Wait here until return conditions met */
1277 for (;;) {
1278 if (pgetp->portget_state & PORTGET_ALERT) {
1279 /* reap alert event and return */
1280 error = port_get_alert(&pgetp->portget_alert, uevp);
1281 if (error)
1282 *nget = 0;
1283 else
1284 *nget = 1;
1285 port_dequeue_thread(&pp->port_queue, pgetp);
1286 portq->portq_thrcnt--;
1287 mutex_exit(&portq->portq_mutex);
1288 return (error);
1292 * Check if some other thread is already retrieving
1293 * events (portq_getn > 0).
1296 if ((portq->portq_getn == 0) &&
1297 ((portq)->portq_nent >= *nget) &&
1298 (!((pgt)->pgt_flags & PORTGET_WAIT_EVENTS) ||
1299 !((portq)->portq_flags & PORTQ_WAIT_EVENTS)))
1300 break;
1302 if (portq->portq_flags & PORTQ_CLOSE) {
1303 error = EBADFD;
1304 break;
1307 rval = cv_waituntil_sig(&pgetp->portget_cv, &portq->portq_mutex,
1308 rqtp, timecheck);
1310 if (rval <= 0) {
1311 error = (rval == 0) ? EINTR : ETIME;
1312 break;
1316 /* take thread out of the wait queue */
1317 port_dequeue_thread(portq, pgetp);
1319 if (error != 0 && (error == EINTR || error == EBADFD ||
1320 (error == ETIME && flag))) {
1321 /* return without events */
1322 port_check_return_cond(portq);
1323 mutex_exit(&portq->portq_mutex);
1324 return (error);
1327 portnowait:
1329 * Move port event queue to a temporary event queue .
1330 * New incoming events will be continue be posted to the event queue
1331 * and they will not be considered by the current thread.
1332 * The idea is to avoid lock contentions or an often locking/unlocking
1333 * of the port queue mutex. The contention and performance degradation
1334 * could happen because:
1335 * a) incoming events use the port queue mutex to enqueue new events and
1336 * b) before the event can be delivered to the application it is
1337 * necessary to notify the event sources about the event delivery.
1338 * Sometimes the event sources can require a long time to return and
1339 * the queue mutex would block incoming events.
1340 * During this time incoming events (port_send_event()) do not need
1341 * to awake threads waiting for events. Before the current thread
1342 * returns it will check the conditions to awake other waiting threads.
1344 portq->portq_getn++; /* number of threads retrieving events */
1345 port_block(portq); /* block other threads here */
1346 nmax = max < portq->portq_nent ? max : portq->portq_nent;
1348 if (portq->portq_tnent) {
1350 * Move remaining events from previous thread back to the
1351 * port event queue.
1353 port_push_eventq(portq);
1355 /* move port event queue to a temporary queue */
1356 list_move_tail(&portq->portq_get_list, &portq->portq_list);
1357 glist = &portq->portq_get_list; /* use temporary event queue */
1358 tnent = portq->portq_nent; /* get current number of events */
1359 portq->portq_nent = 0; /* no events in the port event queue */
1360 portq->portq_flags |= PORTQ_WAIT_EVENTS; /* detect incoming events */
1361 mutex_exit(&portq->portq_mutex); /* event queue can be reused now */
1363 if (model == DATAMODEL_NATIVE) {
1364 eventsz = sizeof (port_event_t);
1365 kevp = kmem_alloc(eventsz * nmax, KM_NOSLEEP);
1366 if (kevp == NULL) {
1367 if (nmax > pp->port_max_list)
1368 nmax = pp->port_max_list;
1369 kevp = kmem_alloc(eventsz * nmax, KM_SLEEP);
1371 results = kevp;
1372 lev = NULL; /* start with first event in the queue */
1373 for (nevents = 0; nevents < nmax; ) {
1374 pev = port_get_kevent(glist, lev);
1375 if (pev == NULL) /* no more events available */
1376 break;
1377 if (pev->portkev_flags & PORT_KEV_FREE) {
1378 /* Just discard event */
1379 list_remove(glist, pev);
1380 pev->portkev_flags &= ~(PORT_CLEANUP_DONE);
1381 if (PORT_FREE_EVENT(pev))
1382 port_free_event_local(pev, 0);
1383 tnent--;
1384 continue;
1387 /* move event data to copyout list */
1388 if (port_copy_event(&kevp[nevents], pev, glist)) {
1390 * Event can not be delivered to the
1391 * current process.
1393 if (lev != NULL)
1394 list_insert_after(glist, lev, pev);
1395 else
1396 list_insert_head(glist, pev);
1397 lev = pev; /* last checked event */
1398 } else {
1399 nevents++; /* # of events ready */
1402 #ifdef _SYSCALL32_IMPL
1403 } else {
1404 port_event32_t *kevp32;
1406 eventsz = sizeof (port_event32_t);
1407 kevp32 = kmem_alloc(eventsz * nmax, KM_NOSLEEP);
1408 if (kevp32 == NULL) {
1409 if (nmax > pp->port_max_list)
1410 nmax = pp->port_max_list;
1411 kevp32 = kmem_alloc(eventsz * nmax, KM_SLEEP);
1413 results = kevp32;
1414 lev = NULL; /* start with first event in the queue */
1415 for (nevents = 0; nevents < nmax; ) {
1416 pev = port_get_kevent(glist, lev);
1417 if (pev == NULL) /* no more events available */
1418 break;
1419 if (pev->portkev_flags & PORT_KEV_FREE) {
1420 /* Just discard event */
1421 list_remove(glist, pev);
1422 pev->portkev_flags &= ~(PORT_CLEANUP_DONE);
1423 if (PORT_FREE_EVENT(pev))
1424 port_free_event_local(pev, 0);
1425 tnent--;
1426 continue;
1429 /* move event data to copyout list */
1430 if (port_copy_event32(&kevp32[nevents], pev, glist)) {
1432 * Event can not be delivered to the
1433 * current process.
1435 if (lev != NULL)
1436 list_insert_after(glist, lev, pev);
1437 else
1438 list_insert_head(glist, pev);
1439 lev = pev; /* last checked event */
1440 } else {
1441 nevents++; /* # of events ready */
1444 #endif /* _SYSCALL32_IMPL */
1448 * Remember number of remaining events in the temporary event queue.
1450 portq->portq_tnent = tnent - nevents;
1453 * Work to do before return :
1454 * - push list of remaining events back to the top of the standard
1455 * port queue.
1456 * - if this is the last thread calling port_get(n) then wakeup the
1457 * thread waiting on close(2).
1458 * - check for a deferred cv_signal from port_send_event() and wakeup
1459 * the sleeping thread.
1462 mutex_enter(&portq->portq_mutex);
1463 port_unblock(portq);
1464 if (portq->portq_tnent) {
1466 * move remaining events in the temporary event queue back
1467 * to the port event queue
1469 port_push_eventq(portq);
1471 portq->portq_getn--; /* update # of threads retrieving events */
1472 if (--portq->portq_thrcnt == 0) { /* # of threads waiting ... */
1473 /* Last thread => check close(2) conditions ... */
1474 if (portq->portq_flags & PORTQ_CLOSE) {
1475 cv_signal(&portq->portq_closecv);
1476 mutex_exit(&portq->portq_mutex);
1477 kmem_free(results, eventsz * nmax);
1478 /* do not copyout events */
1479 *nget = 0;
1480 return (EBADFD);
1482 } else if (portq->portq_getn == 0) {
1484 * no other threads retrieving events ...
1485 * check wakeup conditions of sleeping threads
1487 if ((portq->portq_thread != NULL) &&
1488 (portq->portq_nent >= portq->portq_nget))
1489 cv_signal(&portq->portq_thread->portget_cv);
1493 * Check PORTQ_POLLIN here because the current thread set temporarily
1494 * the number of events in the queue to zero.
1496 if (portq->portq_flags & PORTQ_POLLIN) {
1497 portq->portq_flags &= ~PORTQ_POLLIN;
1498 mutex_exit(&portq->portq_mutex);
1499 pollwakeup(&pp->port_pollhd, POLLIN);
1500 } else {
1501 mutex_exit(&portq->portq_mutex);
1504 /* now copyout list of user event structures to user space */
1505 if (nevents) {
1506 if (copyout(results, uevp, nevents * eventsz))
1507 error = EFAULT;
1509 kmem_free(results, eventsz * nmax);
1511 if (nevents == 0 && error == 0 && pgt->pgt_loop == 0 && blocking != 0) {
1512 /* no events retrieved: check loop conditions */
1513 if (blocking == -1) {
1514 /* no timeout checked */
1515 error = port_get_timeout(pgt->pgt_timeout,
1516 &pgt->pgt_rqtime, &rqtp, &blocking, flag);
1517 if (error) {
1518 *nget = nevents;
1519 return (error);
1521 if (rqtp != NULL) {
1522 timespec_t now;
1523 pgt->pgt_timecheck = timechanged;
1524 gethrestime(&now);
1525 timespecadd(&pgt->pgt_rqtime, &now);
1527 pgt->pgt_rqtp = rqtp;
1528 } else {
1529 /* timeout already checked -> remember values */
1530 pgt->pgt_rqtp = rqtp;
1531 if (rqtp != NULL) {
1532 pgt->pgt_timecheck = timecheck;
1533 pgt->pgt_rqtime = *rqtp;
1536 if (blocking)
1537 /* timeout remaining */
1538 pgt->pgt_loop = 1;
1541 /* set number of user event structures completed */
1542 *nget = nevents;
1543 return (error);
1547 * 1. copy kernel event structure to user event structure.
1548 * 2. PORT_KEV_WIRED event structures will be reused by the "source"
1549 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue)
1550 * 4. Other types of event structures can be delivered back to the port cache
1551 * (port_free_event_local()).
1552 * 5. The event source callback function is the last opportunity for the
1553 * event source to update events, to free local resources associated with
1554 * the event or to deny the delivery of the event.
1556 static int
1557 port_copy_event(port_event_t *puevp, port_kevent_t *pkevp, list_t *list)
1559 int free_event = 0;
1560 int flags;
1561 int error;
1563 puevp->portev_source = pkevp->portkev_source;
1564 puevp->portev_object = pkevp->portkev_object;
1565 puevp->portev_user = pkevp->portkev_user;
1566 puevp->portev_events = pkevp->portkev_events;
1568 /* remove event from the queue */
1569 list_remove(list, pkevp);
1572 * Events of type PORT_KEV_WIRED remain allocated by the
1573 * event source.
1575 flags = pkevp->portkev_flags;
1576 if (pkevp->portkev_flags & PORT_KEV_WIRED)
1577 pkevp->portkev_flags &= ~PORT_KEV_DONEQ;
1578 else
1579 free_event = 1;
1581 if (pkevp->portkev_callback) {
1582 error = (*pkevp->portkev_callback)(pkevp->portkev_arg,
1583 &puevp->portev_events, pkevp->portkev_pid,
1584 PORT_CALLBACK_DEFAULT, pkevp);
1586 if (error) {
1588 * Event can not be delivered.
1589 * Caller must reinsert the event into the queue.
1591 pkevp->portkev_flags = flags;
1592 return (error);
1595 if (free_event)
1596 port_free_event_local(pkevp, 0);
1597 return (0);
1600 #ifdef _SYSCALL32_IMPL
1602 * 1. copy kernel event structure to user event structure.
1603 * 2. PORT_KEV_WIRED event structures will be reused by the "source"
1604 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue)
1605 * 4. Other types of event structures can be delivered back to the port cache
1606 * (port_free_event_local()).
1607 * 5. The event source callback function is the last opportunity for the
1608 * event source to update events, to free local resources associated with
1609 * the event or to deny the delivery of the event.
1611 static int
1612 port_copy_event32(port_event32_t *puevp, port_kevent_t *pkevp, list_t *list)
1614 int free_event = 0;
1615 int error;
1616 int flags;
1618 puevp->portev_source = pkevp->portkev_source;
1619 puevp->portev_object = (daddr32_t)pkevp->portkev_object;
1620 puevp->portev_user = (caddr32_t)(uintptr_t)pkevp->portkev_user;
1621 puevp->portev_events = pkevp->portkev_events;
1623 /* remove event from the queue */
1624 list_remove(list, pkevp);
1627 * Events if type PORT_KEV_WIRED remain allocated by the
1628 * sub-system (source).
1631 flags = pkevp->portkev_flags;
1632 if (pkevp->portkev_flags & PORT_KEV_WIRED)
1633 pkevp->portkev_flags &= ~PORT_KEV_DONEQ;
1634 else
1635 free_event = 1;
1637 if (pkevp->portkev_callback != NULL) {
1638 error = (*pkevp->portkev_callback)(pkevp->portkev_arg,
1639 &puevp->portev_events, pkevp->portkev_pid,
1640 PORT_CALLBACK_DEFAULT, pkevp);
1641 if (error) {
1643 * Event can not be delivered.
1644 * Caller must reinsert the event into the queue.
1646 pkevp->portkev_flags = flags;
1647 return (error);
1650 if (free_event)
1651 port_free_event_local(pkevp, 0);
1652 return (0);
1654 #endif /* _SYSCALL32_IMPL */
1657 * copyout alert event.
1659 static int
1660 port_get_alert(port_alert_t *pa, port_event_t *uevp)
1662 model_t model = get_udatamodel();
1664 /* copyout alert event structures to user space */
1665 if (model == DATAMODEL_NATIVE) {
1666 port_event_t uev;
1667 uev.portev_source = PORT_SOURCE_ALERT;
1668 uev.portev_object = pa->portal_object;
1669 uev.portev_events = pa->portal_events;
1670 uev.portev_user = pa->portal_user;
1671 if (copyout(&uev, uevp, sizeof (port_event_t)))
1672 return (EFAULT);
1673 #ifdef _SYSCALL32_IMPL
1674 } else {
1675 port_event32_t uev32;
1676 uev32.portev_source = PORT_SOURCE_ALERT;
1677 uev32.portev_object = (daddr32_t)pa->portal_object;
1678 uev32.portev_events = pa->portal_events;
1679 uev32.portev_user = (daddr32_t)(uintptr_t)pa->portal_user;
1680 if (copyout(&uev32, uevp, sizeof (port_event32_t)))
1681 return (EFAULT);
1682 #endif /* _SYSCALL32_IMPL */
1684 return (0);
1688 * Check return conditions :
1689 * - pending port close(2)
1690 * - threads waiting for events
1692 static void
1693 port_check_return_cond(port_queue_t *portq)
1695 ASSERT(MUTEX_HELD(&portq->portq_mutex));
1696 portq->portq_thrcnt--;
1697 if (portq->portq_flags & PORTQ_CLOSE) {
1698 if (portq->portq_thrcnt == 0)
1699 cv_signal(&portq->portq_closecv);
1700 else
1701 cv_signal(&portq->portq_thread->portget_cv);
1706 * The port_get_kevent() function returns
1707 * - the event located at the head of the queue if 'last' pointer is NULL
1708 * - the next event after the event pointed by 'last'
1709 * The caller of this function is responsible for the integrity of the queue
1710 * in use:
1711 * - port_getn() is using a temporary queue protected with port_block().
1712 * - port_close_events() is working on the global event queue and protects
1713 * the queue with portq->portq_mutex.
1715 port_kevent_t *
1716 port_get_kevent(list_t *list, port_kevent_t *last)
1718 if (last == NULL)
1719 return (list_head(list));
1720 else
1721 return (list_next(list, last));
1725 * The port_get_timeout() function gets the timeout data from user space
1726 * and converts that info into a corresponding internal representation.
1727 * The kerneldata flag means that the timeout data is already loaded.
1729 static int
1730 port_get_timeout(timespec_t *timeout, timespec_t *rqtime, timespec_t **rqtp,
1731 int *blocking, int kerneldata)
1733 model_t model = get_udatamodel();
1735 *rqtp = NULL;
1736 if (timeout == NULL) {
1737 *blocking = 1;
1738 return (0);
1741 if (kerneldata) {
1742 *rqtime = *timeout;
1743 } else {
1744 if (model == DATAMODEL_NATIVE) {
1745 if (copyin(timeout, rqtime, sizeof (*rqtime)))
1746 return (EFAULT);
1747 #ifdef _SYSCALL32_IMPL
1748 } else {
1749 timespec32_t wait_time_32;
1750 if (copyin(timeout, &wait_time_32,
1751 sizeof (wait_time_32)))
1752 return (EFAULT);
1753 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
1754 #endif /* _SYSCALL32_IMPL */
1758 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
1759 *blocking = 0;
1760 return (0);
1763 if (rqtime->tv_sec < 0 ||
1764 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
1765 return (EINVAL);
1767 *rqtp = rqtime;
1768 *blocking = 1;
1769 return (0);
1773 * port_queue_thread()
1774 * Threads requiring more events than available will be put in a wait queue.
1775 * There is a "thread wait queue" per port.
1776 * Threads requiring less events get a higher priority than others and they
1777 * will be awoken first.
1779 static portget_t *
1780 port_queue_thread(port_queue_t *portq, uint_t nget)
1782 portget_t *pgetp;
1783 portget_t *ttp;
1784 portget_t *htp;
1786 pgetp = kmem_zalloc(sizeof (portget_t), KM_SLEEP);
1787 pgetp->portget_nget = nget;
1788 pgetp->portget_pid = curproc->p_pid;
1789 if (portq->portq_thread == NULL) {
1790 /* first waiting thread */
1791 portq->portq_thread = pgetp;
1792 portq->portq_nget = nget;
1793 pgetp->portget_prev = pgetp;
1794 pgetp->portget_next = pgetp;
1795 return (pgetp);
1799 * thread waiting for less events will be set on top of the queue.
1801 ttp = portq->portq_thread;
1802 htp = ttp;
1803 for (;;) {
1804 if (nget <= ttp->portget_nget)
1805 break;
1806 if (htp == ttp->portget_next)
1807 break; /* last event */
1808 ttp = ttp->portget_next;
1811 /* add thread to the queue */
1812 pgetp->portget_next = ttp;
1813 pgetp->portget_prev = ttp->portget_prev;
1814 ttp->portget_prev->portget_next = pgetp;
1815 ttp->portget_prev = pgetp;
1816 if (portq->portq_thread == ttp)
1817 portq->portq_thread = pgetp;
1818 portq->portq_nget = portq->portq_thread->portget_nget;
1819 return (pgetp);
1823 * Take thread out of the queue.
1825 static void
1826 port_dequeue_thread(port_queue_t *portq, portget_t *pgetp)
1828 if (pgetp->portget_next == pgetp) {
1829 /* last (single) waiting thread */
1830 portq->portq_thread = NULL;
1831 portq->portq_nget = 0;
1832 } else {
1833 pgetp->portget_prev->portget_next = pgetp->portget_next;
1834 pgetp->portget_next->portget_prev = pgetp->portget_prev;
1835 if (portq->portq_thread == pgetp)
1836 portq->portq_thread = pgetp->portget_next;
1837 portq->portq_nget = portq->portq_thread->portget_nget;
1839 kmem_free(pgetp, sizeof (portget_t));
1843 * Set up event port kstats.
1845 static void
1846 port_kstat_init()
1848 kstat_t *ksp;
1849 uint_t ndata;
1851 ndata = sizeof (port_kstat) / sizeof (kstat_named_t);
1852 ksp = kstat_create("portfs", 0, "Event Ports", "misc",
1853 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_VIRTUAL);
1854 if (ksp) {
1855 ksp->ks_data = &port_kstat;
1856 kstat_install(ksp);