2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2017 Joyent, Inc.
17 * Support for the eventfd facility, a Linux-borne facility for user-generated
18 * file descriptor-based events.
22 #include <sys/sunddi.h>
23 #include <sys/eventfd.h>
26 #include <sys/sysmacros.h>
27 #include <sys/filio.h>
32 typedef struct eventfd_state eventfd_state_t
;
34 struct eventfd_state
{
35 kmutex_t efd_lock
; /* lock protecting state */
36 boolean_t efd_semaphore
; /* boolean: sema. semantics */
37 kcondvar_t efd_cv
; /* condvar */
38 pollhead_t efd_pollhd
; /* poll head */
39 uint64_t efd_value
; /* value */
40 size_t efd_bwriters
; /* count of blocked writers */
41 eventfd_state_t
*efd_next
; /* next state on global list */
45 * Internal global variables.
47 static kmutex_t eventfd_lock
; /* lock protecting state */
48 static dev_info_t
*eventfd_devi
; /* device info */
49 static vmem_t
*eventfd_minor
; /* minor number arena */
50 static void *eventfd_softstate
; /* softstate pointer */
51 static eventfd_state_t
*eventfd_state
; /* global list of state */
55 eventfd_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
57 eventfd_state_t
*state
;
58 major_t major
= getemajor(*devp
);
59 minor_t minor
= getminor(*devp
);
61 if (minor
!= EVENTFDMNRN_EVENTFD
)
64 mutex_enter(&eventfd_lock
);
66 minor
= (minor_t
)(uintptr_t)vmem_alloc(eventfd_minor
, 1,
67 VM_BESTFIT
| VM_SLEEP
);
69 if (ddi_soft_state_zalloc(eventfd_softstate
, minor
) != DDI_SUCCESS
) {
70 vmem_free(eventfd_minor
, (void *)(uintptr_t)minor
, 1);
71 mutex_exit(&eventfd_lock
);
75 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
76 *devp
= makedevice(major
, minor
);
78 state
->efd_next
= eventfd_state
;
79 eventfd_state
= state
;
81 mutex_exit(&eventfd_lock
);
88 eventfd_read(dev_t dev
, uio_t
*uio
, cred_t
*cr
)
90 eventfd_state_t
*state
;
91 minor_t minor
= getminor(dev
);
95 if (uio
->uio_resid
< sizeof (val
))
98 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
100 mutex_enter(&state
->efd_lock
);
102 while (state
->efd_value
== 0) {
103 if (uio
->uio_fmode
& (FNDELAY
|FNONBLOCK
)) {
104 mutex_exit(&state
->efd_lock
);
108 if (!cv_wait_sig_swap(&state
->efd_cv
, &state
->efd_lock
)) {
109 mutex_exit(&state
->efd_lock
);
115 * We have a non-zero value and we own the lock; our behavior now
116 * depends on whether or not EFD_SEMAPHORE was set when the eventfd
119 val
= oval
= state
->efd_value
;
121 if (state
->efd_semaphore
) {
125 state
->efd_value
= 0;
128 err
= uiomove(&val
, sizeof (val
), UIO_READ
, uio
);
131 * Wake any writers blocked on this eventfd as this read operation may
132 * have created adequate capacity for their values.
134 if (state
->efd_bwriters
!= 0) {
135 cv_broadcast(&state
->efd_cv
);
137 mutex_exit(&state
->efd_lock
);
140 * It is necessary to emit POLLOUT events only when the eventfd
141 * transitions from EVENTFD_VALMAX to a lower value. At all other
142 * times, it is already considered writable by poll.
144 if (oval
== EVENTFD_VALMAX
) {
145 pollwakeup(&state
->efd_pollhd
, POLLWRNORM
| POLLOUT
);
153 eventfd_write(dev_t dev
, struct uio
*uio
, cred_t
*credp
)
155 eventfd_state_t
*state
;
156 minor_t minor
= getminor(dev
);
160 if (uio
->uio_resid
< sizeof (val
))
163 if ((err
= uiomove(&val
, sizeof (val
), UIO_WRITE
, uio
)) != 0)
166 if (val
> EVENTFD_VALMAX
)
169 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
171 mutex_enter(&state
->efd_lock
);
173 while (val
> EVENTFD_VALMAX
- state
->efd_value
) {
174 if (uio
->uio_fmode
& (FNDELAY
|FNONBLOCK
)) {
175 mutex_exit(&state
->efd_lock
);
179 state
->efd_bwriters
++;
180 if (!cv_wait_sig_swap(&state
->efd_cv
, &state
->efd_lock
)) {
181 state
->efd_bwriters
--;
182 mutex_exit(&state
->efd_lock
);
185 state
->efd_bwriters
--;
189 * We now know that we can add the value without overflowing.
191 state
->efd_value
= (oval
= state
->efd_value
) + val
;
194 * If the value was previously "empty", notify blocked readers that
198 cv_broadcast(&state
->efd_cv
);
200 mutex_exit(&state
->efd_lock
);
203 * Notify pollers as well if the eventfd is now readable.
206 pollwakeup(&state
->efd_pollhd
, POLLRDNORM
| POLLIN
);
214 eventfd_poll(dev_t dev
, short events
, int anyyet
, short *reventsp
,
215 struct pollhead
**phpp
)
217 eventfd_state_t
*state
;
218 minor_t minor
= getminor(dev
);
221 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
223 mutex_enter(&state
->efd_lock
);
225 if (state
->efd_value
> 0)
226 revents
|= POLLRDNORM
| POLLIN
;
228 if (state
->efd_value
< EVENTFD_VALMAX
)
229 revents
|= POLLWRNORM
| POLLOUT
;
231 *reventsp
= revents
& events
;
232 if ((*reventsp
== 0 && !anyyet
) || (events
& POLLET
))
233 *phpp
= &state
->efd_pollhd
;
235 mutex_exit(&state
->efd_lock
);
242 eventfd_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int md
, cred_t
*cr
, int *rv
)
244 eventfd_state_t
*state
;
245 minor_t minor
= getminor(dev
);
247 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
250 case EVENTFDIOC_SEMAPHORE
: {
251 mutex_enter(&state
->efd_lock
);
252 state
->efd_semaphore
^= 1;
253 mutex_exit(&state
->efd_lock
);
267 eventfd_close(dev_t dev
, int flag
, int otyp
, cred_t
*cred_p
)
269 eventfd_state_t
*state
, **sp
;
270 minor_t minor
= getminor(dev
);
272 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
274 if (state
->efd_pollhd
.ph_list
!= NULL
) {
275 pollwakeup(&state
->efd_pollhd
, POLLERR
);
276 pollhead_clean(&state
->efd_pollhd
);
279 mutex_enter(&eventfd_lock
);
282 * Remove our state from our global list.
284 for (sp
= &eventfd_state
; *sp
!= state
; sp
= &((*sp
)->efd_next
))
287 *sp
= (*sp
)->efd_next
;
289 ddi_soft_state_free(eventfd_softstate
, minor
);
290 vmem_free(eventfd_minor
, (void *)(uintptr_t)minor
, 1);
292 mutex_exit(&eventfd_lock
);
298 eventfd_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
305 return (DDI_SUCCESS
);
308 return (DDI_FAILURE
);
311 mutex_enter(&eventfd_lock
);
313 if (ddi_soft_state_init(&eventfd_softstate
,
314 sizeof (eventfd_state_t
), 0) != 0) {
315 cmn_err(CE_NOTE
, "/dev/eventfd failed to create soft state");
316 mutex_exit(&eventfd_lock
);
317 return (DDI_FAILURE
);
320 if (ddi_create_minor_node(devi
, "eventfd", S_IFCHR
,
321 EVENTFDMNRN_EVENTFD
, DDI_PSEUDO
, 0) == DDI_FAILURE
) {
322 cmn_err(CE_NOTE
, "/dev/eventfd couldn't create minor node");
323 ddi_soft_state_fini(&eventfd_softstate
);
324 mutex_exit(&eventfd_lock
);
325 return (DDI_FAILURE
);
328 ddi_report_dev(devi
);
331 eventfd_minor
= vmem_create("eventfd_minor", (void *)EVENTFDMNRN_CLONE
,
332 UINT32_MAX
- EVENTFDMNRN_CLONE
, 1, NULL
, NULL
, NULL
, 0,
333 VM_SLEEP
| VMC_IDENTIFIER
);
335 mutex_exit(&eventfd_lock
);
337 return (DDI_SUCCESS
);
342 eventfd_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
349 return (DDI_SUCCESS
);
352 return (DDI_FAILURE
);
355 mutex_enter(&eventfd_lock
);
356 vmem_destroy(eventfd_minor
);
358 ddi_remove_minor_node(eventfd_devi
, NULL
);
361 ddi_soft_state_fini(&eventfd_softstate
);
362 mutex_exit(&eventfd_lock
);
364 return (DDI_SUCCESS
);
369 eventfd_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
374 case DDI_INFO_DEVT2DEVINFO
:
375 *result
= (void *)eventfd_devi
;
378 case DDI_INFO_DEVT2INSTANCE
:
388 static struct cb_ops eventfd_cb_ops
= {
389 eventfd_open
, /* open */
390 eventfd_close
, /* close */
391 nulldev
, /* strategy */
394 eventfd_read
, /* read */
395 eventfd_write
, /* write */
396 eventfd_ioctl
, /* ioctl */
400 eventfd_poll
, /* poll */
401 ddi_prop_op
, /* cb_prop_op */
403 D_NEW
| D_MP
/* Driver compatibility flag */
406 static struct dev_ops eventfd_ops
= {
407 DEVO_REV
, /* devo_rev */
409 eventfd_info
, /* get_dev_info */
410 nulldev
, /* identify */
412 eventfd_attach
, /* attach */
413 eventfd_detach
, /* detach */
415 &eventfd_cb_ops
, /* driver operations */
416 NULL
, /* bus operations */
417 nodev
, /* dev power */
418 ddi_quiesce_not_needed
, /* quiesce */
421 static struct modldrv modldrv
= {
422 &mod_driverops
, /* module type (this is a pseudo driver) */
423 "eventfd support", /* name of module */
424 &eventfd_ops
, /* driver ops */
427 static struct modlinkage modlinkage
= {
436 return (mod_install(&modlinkage
));
440 _info(struct modinfo
*modinfop
)
442 return (mod_info(&modlinkage
, modinfop
));
448 return (mod_remove(&modlinkage
));