Patrick Welche <prlw1@cam.ac.uk>
[netbsd-mini2440.git] / external / ibm-public / postfix / dist / src / util / events.c
blob8ada6d4e669857e9dbc0f22afffd75da428f9f11
1 /* $NetBSD$ */
3 /*++
4 /* NAME
5 /* events 3
6 /* SUMMARY
7 /* event manager
8 /* SYNOPSIS
9 /* #include <events.h>
11 /* time_t event_time()
13 /* void event_loop(delay)
14 /* int delay;
16 /* time_t event_request_timer(callback, context, delay)
17 /* void (*callback)(int event, char *context);
18 /* char *context;
19 /* int delay;
21 /* int event_cancel_timer(callback, context)
22 /* void (*callback)(int event, char *context);
23 /* char *context;
25 /* void event_enable_read(fd, callback, context)
26 /* int fd;
27 /* void (*callback)(int event, char *context);
28 /* char *context;
30 /* void event_enable_write(fd, callback, context)
31 /* int fd;
32 /* void (*callback)(int event, char *context);
33 /* char *context;
35 /* void event_disable_readwrite(fd)
36 /* int fd;
38 /* void event_drain(time_limit)
39 /* int time_limit;
40 /* DESCRIPTION
41 /* This module delivers I/O and timer events.
42 /* Multiple I/O streams and timers can be monitored simultaneously.
43 /* Events are delivered via callback routines provided by the
44 /* application. When requesting an event, the application can provide
45 /* private context that is passed back when the callback routine is
46 /* executed.
48 /* event_time() returns a cached value of the current time.
50 /* event_loop() monitors all I/O channels for which the application has
51 /* expressed interest, and monitors the timer request queue.
52 /* It notifies the application whenever events of interest happen.
53 /* A negative delay value causes the function to pause until something
54 /* happens; a positive delay value causes event_loop() to return when
55 /* the next event happens or when the delay time in seconds is over,
56 /* whatever happens first. A zero delay effectuates a poll.
58 /* Note: in order to avoid race conditions, event_loop() cannot
59 /* not be called recursively.
61 /* event_request_timer() causes the specified callback function to
62 /* be called with the specified context argument after \fIdelay\fR
63 /* seconds, or as soon as possible thereafter. The delay should
64 /* not be negative.
65 /* The event argument is equal to EVENT_TIME.
66 /* Only one timer request can be active per (callback, context) pair.
67 /* Calling event_request_timer() with an existing (callback, context)
68 /* pair does not schedule a new event, but updates the time of event
69 /* delivery. The result is the absolute time at which the timer is
70 /* scheduled to go off.
72 /* event_cancel_timer() cancels the specified (callback, context) request.
73 /* The application is allowed to cancel non-existing requests. The result
74 /* value is the amount of time left before the timer would have gone off,
75 /* or -1 in case of no pending timer.
77 /* event_enable_read() (event_enable_write()) enables read (write) events
78 /* on the named I/O channel. It is up to the application to assemble
79 /* partial reads or writes.
80 /* An I/O channel cannot handle more than one request at the
81 /* same time. The application is allowed to enable an event that
82 /* is already enabled (same channel, same read or write operation,
83 /* but perhaps a different callback or context). On systems with
84 /* kernel-based event filters this is preferred usage, because
85 /* each disable and enable request would cost a system call.
87 /* The manifest constants EVENT_NULL_CONTEXT and EVENT_NULL_TYPE
88 /* provide convenient null values.
90 /* The callback routine has the following arguments:
91 /* .IP fd
92 /* The stream on which the event happened.
93 /* .IP event
94 /* An indication of the event type:
95 /* .RS
96 /* .IP EVENT_READ
97 /* read event,
98 /* .IP EVENT_WRITE
99 /* write event,
100 /* .IP EVENT_XCPT
101 /* exception (actually, any event other than read or write).
102 /* .RE
103 /* .IP context
104 /* Application context given to event_enable_read() (event_enable_write()).
105 /* .PP
106 /* event_disable_readwrite() disables further I/O events on the specified
107 /* I/O channel. The application is allowed to cancel non-existing
108 /* I/O event requests.
110 /* event_drain() repeatedly calls event_loop() until no more timer
111 /* events or I/O events are pending or until the time limit is reached.
112 /* This routine must not be called from an event_whatever() callback
113 /* routine. Note: this function ignores pending timer events, and
114 /* assumes that no new I/O events will be registered.
115 /* DIAGNOSTICS
116 /* Panics: interface violations. Fatal errors: out of memory,
117 /* system call failure. Warnings: the number of available
118 /* file descriptors is much less than FD_SETSIZE.
119 /* BUGS
120 /* This module is based on event selection. It assumes that the
121 /* event_loop() routine is called frequently. This approach is
122 /* not suitable for applications with compute-bound loops that
123 /* take a significant amount of time.
124 /* LICENSE
125 /* .ad
126 /* .fi
127 /* The Secure Mailer license must be distributed with this software.
128 /* AUTHOR(S)
129 /* Wietse Venema
130 /* IBM T.J. Watson Research
131 /* P.O. Box 704
132 /* Yorktown Heights, NY 10598, USA
133 /*--*/
135 /* System libraries. */
137 #include "sys_defs.h"
138 #include <sys/time.h> /* XXX: 44BSD uses bzero() */
139 #include <time.h>
140 #include <errno.h>
141 #include <unistd.h>
142 #include <stddef.h> /* offsetof() */
143 #include <string.h> /* bzero() prototype for 44BSD */
144 #include <limits.h> /* INT_MAX */
146 #ifdef USE_SYS_SELECT_H
147 #include <sys/select.h>
148 #endif
150 /* Application-specific. */
152 #include "mymalloc.h"
153 #include "msg.h"
154 #include "iostuff.h"
155 #include "ring.h"
156 #include "events.h"
158 #if !defined(EVENTS_STYLE)
159 #error "must define EVENTS_STYLE"
160 #endif
163 * Traditional BSD-style select(2). Works everywhere, but has a built-in
164 * upper bound on the number of file descriptors, and that limit is hard to
165 * change on Linux. Is sometimes emulated with SYSV-style poll(2) which
166 * doesn't have the file descriptor limit, but unfortunately does not help
167 * to improve the performance of servers with lots of connections.
169 #define EVENT_ALLOC_INCR 10
171 #if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
172 typedef fd_set EVENT_MASK;
174 #define EVENT_MASK_BYTE_COUNT(mask) sizeof(*(mask))
175 #define EVENT_MASK_ZERO(mask) FD_ZERO(mask)
176 #define EVENT_MASK_SET(fd, mask) FD_SET((fd), (mask))
177 #define EVENT_MASK_ISSET(fd, mask) FD_ISSET((fd), (mask))
178 #define EVENT_MASK_CLR(fd, mask) FD_CLR((fd), (mask))
179 #else
182 * Kernel-based event filters (kqueue, /dev/poll, epoll). We use the
183 * following file descriptor mask structure which is expanded on the fly.
185 typedef struct {
186 char *data; /* bit mask */
187 size_t data_len; /* data byte count */
188 } EVENT_MASK;
190 /* Bits per byte, byte in vector, bit offset in byte, bytes per set. */
191 #define EVENT_MASK_NBBY (8)
192 #define EVENT_MASK_FD_BYTE(fd, mask) \
193 (((unsigned char *) (mask)->data)[(fd) / EVENT_MASK_NBBY])
194 #define EVENT_MASK_FD_BIT(fd) (1 << ((fd) % EVENT_MASK_NBBY))
195 #define EVENT_MASK_BYTES_NEEDED(len) \
196 (((len) + (EVENT_MASK_NBBY -1)) / EVENT_MASK_NBBY)
197 #define EVENT_MASK_BYTE_COUNT(mask) ((mask)->data_len)
199 /* Memory management. */
200 #define EVENT_MASK_ALLOC(mask, bit_len) do { \
201 size_t _byte_len = EVENT_MASK_BYTES_NEEDED(bit_len); \
202 (mask)->data = mymalloc(_byte_len); \
203 memset((mask)->data, 0, _byte_len); \
204 (mask)->data_len = _byte_len; \
205 } while (0)
206 #define EVENT_MASK_REALLOC(mask, bit_len) do { \
207 size_t _byte_len = EVENT_MASK_BYTES_NEEDED(bit_len); \
208 size_t _old_len = (mask)->data_len; \
209 (mask)->data = myrealloc((mask)->data, _byte_len); \
210 if (_byte_len > _old_len) \
211 memset((mask)->data + _old_len, 0, _byte_len - _old_len); \
212 (mask)->data_len = _byte_len; \
213 } while (0)
214 #define EVENT_MASK_FREE(mask) myfree((mask)->data)
216 /* Set operations, modeled after FD_ZERO/SET/ISSET/CLR. */
217 #define EVENT_MASK_ZERO(mask) \
218 memset((mask)->data, 0, (mask)->data_len)
219 #define EVENT_MASK_SET(fd, mask) \
220 (EVENT_MASK_FD_BYTE((fd), (mask)) |= EVENT_MASK_FD_BIT(fd))
221 #define EVENT_MASK_ISSET(fd, mask) \
222 (EVENT_MASK_FD_BYTE((fd), (mask)) & EVENT_MASK_FD_BIT(fd))
223 #define EVENT_MASK_CLR(fd, mask) \
224 (EVENT_MASK_FD_BYTE((fd), (mask)) &= ~EVENT_MASK_FD_BIT(fd))
225 #endif
228 * I/O events.
230 typedef struct EVENT_FDTABLE EVENT_FDTABLE;
232 struct EVENT_FDTABLE {
233 EVENT_NOTIFY_RDWR callback;
234 char *context;
236 static EVENT_MASK event_rmask; /* enabled read events */
237 static EVENT_MASK event_wmask; /* enabled write events */
238 static EVENT_MASK event_xmask; /* for bad news mostly */
239 static int event_fdlimit; /* per-process open file limit */
240 static EVENT_FDTABLE *event_fdtable; /* one slot per file descriptor */
241 static int event_fdslots; /* number of file descriptor slots */
242 static int event_max_fd = -1; /* highest fd number seen */
245 * FreeBSD kqueue supports no system call to find out what descriptors are
246 * registered in the kernel-based filter. To implement our own sanity checks
247 * we maintain our own descriptor bitmask.
249 * FreeBSD kqueue does support application context pointers. Unfortunately,
250 * changing that information would cost a system call, and some of the
251 * competitors don't support application context. To keep the implementation
252 * simple we maintain our own table with call-back information.
254 * FreeBSD kqueue silently unregisters a descriptor from its filter when the
255 * descriptor is closed, so our information could get out of sync with the
256 * kernel. But that will never happen, because we have to meticulously
257 * unregister a file descriptor before it is closed, to avoid errors on
258 * systems that are built with EVENTS_STYLE == EVENTS_STYLE_SELECT.
260 #if (EVENTS_STYLE == EVENTS_STYLE_KQUEUE)
261 #include <sys/event.h>
264 * Some early FreeBSD implementations don't have the EV_SET macro.
266 #ifndef EV_SET
267 #define EV_SET(kp, id, fi, fl, ffl, da, ud) do { \
268 (kp)->ident = (id); \
269 (kp)->filter = (fi); \
270 (kp)->flags = (fl); \
271 (kp)->fflags = (ffl); \
272 (kp)->data = (da); \
273 (kp)->udata = (ud); \
274 } while(0)
275 #endif
278 * Macros to initialize the kernel-based filter; see event_init().
280 static int event_kq; /* handle to event filter */
282 #define EVENT_REG_INIT_HANDLE(er, n) do { \
283 er = event_kq = kqueue(); \
284 } while (0)
285 #define EVENT_REG_INIT_TEXT "kqueue"
288 * Macros to update the kernel-based filter; see event_enable_read(),
289 * event_enable_write() and event_disable_readwrite().
291 #define EVENT_REG_FD_OP(er, fh, ev, op) do { \
292 struct kevent dummy; \
293 EV_SET(&dummy, (fh), (ev), (op), 0, 0, 0); \
294 (er) = kevent(event_kq, &dummy, 1, 0, 0, 0); \
295 } while (0)
297 #define EVENT_REG_ADD_OP(e, f, ev) EVENT_REG_FD_OP((e), (f), (ev), EV_ADD)
298 #define EVENT_REG_ADD_READ(e, f) EVENT_REG_ADD_OP((e), (f), EVFILT_READ)
299 #define EVENT_REG_ADD_WRITE(e, f) EVENT_REG_ADD_OP((e), (f), EVFILT_WRITE)
300 #define EVENT_REG_ADD_TEXT "kevent EV_ADD"
302 #define EVENT_REG_DEL_OP(e, f, ev) EVENT_REG_FD_OP((e), (f), (ev), EV_DELETE)
303 #define EVENT_REG_DEL_READ(e, f) EVENT_REG_DEL_OP((e), (f), EVFILT_READ)
304 #define EVENT_REG_DEL_WRITE(e, f) EVENT_REG_DEL_OP((e), (f), EVFILT_WRITE)
305 #define EVENT_REG_DEL_TEXT "kevent EV_DELETE"
308 * Macros to retrieve event buffers from the kernel; see event_loop().
310 typedef struct kevent EVENT_BUFFER;
312 #define EVENT_BUFFER_READ(event_count, event_buf, buflen, delay) do { \
313 struct timespec ts; \
314 struct timespec *tsp; \
315 if ((delay) < 0) { \
316 tsp = 0; \
317 } else { \
318 tsp = &ts; \
319 ts.tv_nsec = 0; \
320 ts.tv_sec = (delay); \
322 (event_count) = kevent(event_kq, (struct kevent *) 0, 0, (event_buf), \
323 (buflen), (tsp)); \
324 } while (0)
325 #define EVENT_BUFFER_READ_TEXT "kevent"
328 * Macros to process event buffers from the kernel; see event_loop().
330 #define EVENT_GET_FD(bp) ((bp)->ident)
331 #define EVENT_GET_TYPE(bp) ((bp)->filter)
332 #define EVENT_TEST_READ(bp) (EVENT_GET_TYPE(bp) == EVFILT_READ)
333 #define EVENT_TEST_WRITE(bp) (EVENT_GET_TYPE(bp) == EVFILT_WRITE)
335 #endif
338 * Solaris /dev/poll does not support application context, so we have to
339 * maintain our own. This has the benefit of avoiding an expensive system
340 * call just to change a call-back function or argument.
342 * Solaris /dev/poll does have a way to query if a specific descriptor is
343 * registered. However, we maintain a descriptor mask anyway because a) it
344 * avoids having to make an expensive system call to find out if something
345 * is registered, b) some EVENTS_STYLE_MUMBLE implementations need a
346 * descriptor bitmask anyway and c) we use the bitmask already to implement
347 * sanity checks.
349 #if (EVENTS_STYLE == EVENTS_STYLE_DEVPOLL)
350 #include <sys/devpoll.h>
351 #include <fcntl.h>
354 * Macros to initialize the kernel-based filter; see event_init().
356 static int event_pollfd; /* handle to file descriptor set */
358 #define EVENT_REG_INIT_HANDLE(er, n) do { \
359 er = event_pollfd = open("/dev/poll", O_RDWR); \
360 if (event_pollfd >= 0) close_on_exec(event_pollfd, CLOSE_ON_EXEC); \
361 } while (0)
362 #define EVENT_REG_INIT_TEXT "open /dev/poll"
365 * Macros to update the kernel-based filter; see event_enable_read(),
366 * event_enable_write() and event_disable_readwrite().
368 #define EVENT_REG_FD_OP(er, fh, ev) do { \
369 struct pollfd dummy; \
370 dummy.fd = (fh); \
371 dummy.events = (ev); \
372 (er) = write(event_pollfd, (char *) &dummy, \
373 sizeof(dummy)) != sizeof(dummy) ? -1 : 0; \
374 } while (0)
376 #define EVENT_REG_ADD_READ(e, f) EVENT_REG_FD_OP((e), (f), POLLIN)
377 #define EVENT_REG_ADD_WRITE(e, f) EVENT_REG_FD_OP((e), (f), POLLOUT)
378 #define EVENT_REG_ADD_TEXT "write /dev/poll"
380 #define EVENT_REG_DEL_BOTH(e, f) EVENT_REG_FD_OP((e), (f), POLLREMOVE)
381 #define EVENT_REG_DEL_TEXT "write /dev/poll"
384 * Macros to retrieve event buffers from the kernel; see event_loop().
386 typedef struct pollfd EVENT_BUFFER;
388 #define EVENT_BUFFER_READ(event_count, event_buf, buflen, delay) do { \
389 struct dvpoll dvpoll; \
390 dvpoll.dp_fds = (event_buf); \
391 dvpoll.dp_nfds = (buflen); \
392 dvpoll.dp_timeout = (delay) < 0 ? -1 : (delay) * 1000; \
393 (event_count) = ioctl(event_pollfd, DP_POLL, &dvpoll); \
394 } while (0)
395 #define EVENT_BUFFER_READ_TEXT "ioctl DP_POLL"
398 * Macros to process event buffers from the kernel; see event_loop().
400 #define EVENT_GET_FD(bp) ((bp)->fd)
401 #define EVENT_GET_TYPE(bp) ((bp)->revents)
402 #define EVENT_TEST_READ(bp) (EVENT_GET_TYPE(bp) & POLLIN)
403 #define EVENT_TEST_WRITE(bp) (EVENT_GET_TYPE(bp) & POLLOUT)
405 #endif
408 * Linux epoll supports no system call to find out what descriptors are
409 * registered in the kernel-based filter. To implement our own sanity checks
410 * we maintain our own descriptor bitmask.
412 * Linux epoll does support application context pointers. Unfortunately,
413 * changing that information would cost a system call, and some of the
414 * competitors don't support application context. To keep the implementation
415 * simple we maintain our own table with call-back information.
417 * Linux epoll silently unregisters a descriptor from its filter when the
418 * descriptor is closed, so our information could get out of sync with the
419 * kernel. But that will never happen, because we have to meticulously
420 * unregister a file descriptor before it is closed, to avoid errors on
422 #if (EVENTS_STYLE == EVENTS_STYLE_EPOLL)
423 #include <sys/epoll.h>
426 * Macros to initialize the kernel-based filter; see event_init().
428 static int event_epollfd; /* epoll handle */
430 #define EVENT_REG_INIT_HANDLE(er, n) do { \
431 er = event_epollfd = epoll_create(n); \
432 if (event_epollfd >= 0) close_on_exec(event_epollfd, CLOSE_ON_EXEC); \
433 } while (0)
434 #define EVENT_REG_INIT_TEXT "epoll_create"
437 * Macros to update the kernel-based filter; see event_enable_read(),
438 * event_enable_write() and event_disable_readwrite().
440 #define EVENT_REG_FD_OP(er, fh, ev, op) do { \
441 struct epoll_event dummy; \
442 dummy.events = (ev); \
443 dummy.data.fd = (fh); \
444 (er) = epoll_ctl(event_epollfd, (op), (fh), &dummy); \
445 } while (0)
447 #define EVENT_REG_ADD_OP(e, f, ev) EVENT_REG_FD_OP((e), (f), (ev), EPOLL_CTL_ADD)
448 #define EVENT_REG_ADD_READ(e, f) EVENT_REG_ADD_OP((e), (f), EPOLLIN)
449 #define EVENT_REG_ADD_WRITE(e, f) EVENT_REG_ADD_OP((e), (f), EPOLLOUT)
450 #define EVENT_REG_ADD_TEXT "epoll_ctl EPOLL_CTL_ADD"
452 #define EVENT_REG_DEL_OP(e, f, ev) EVENT_REG_FD_OP((e), (f), (ev), EPOLL_CTL_DEL)
453 #define EVENT_REG_DEL_READ(e, f) EVENT_REG_DEL_OP((e), (f), EPOLLIN)
454 #define EVENT_REG_DEL_WRITE(e, f) EVENT_REG_DEL_OP((e), (f), EPOLLOUT)
455 #define EVENT_REG_DEL_TEXT "epoll_ctl EPOLL_CTL_DEL"
458 * Macros to retrieve event buffers from the kernel; see event_loop().
460 typedef struct epoll_event EVENT_BUFFER;
462 #define EVENT_BUFFER_READ(event_count, event_buf, buflen, delay) do { \
463 (event_count) = epoll_wait(event_epollfd, (event_buf), (buflen), \
464 (delay) < 0 ? -1 : (delay) * 1000); \
465 } while (0)
466 #define EVENT_BUFFER_READ_TEXT "epoll_wait"
469 * Macros to process event buffers from the kernel; see event_loop().
471 #define EVENT_GET_FD(bp) ((bp)->data.fd)
472 #define EVENT_GET_TYPE(bp) ((bp)->events)
473 #define EVENT_TEST_READ(bp) (EVENT_GET_TYPE(bp) & EPOLLIN)
474 #define EVENT_TEST_WRITE(bp) (EVENT_GET_TYPE(bp) & EPOLLOUT)
476 #endif
479 * Timer events. Timer requests are kept sorted, in a circular list. We use
480 * the RING abstraction, so we get to use a couple ugly macros.
482 typedef struct EVENT_TIMER EVENT_TIMER;
484 struct EVENT_TIMER {
485 time_t when; /* when event is wanted */
486 EVENT_NOTIFY_TIME callback; /* callback function */
487 char *context; /* callback context */
488 RING ring; /* linkage */
491 static RING event_timer_head; /* timer queue head */
493 #define RING_TO_TIMER(r) \
494 ((EVENT_TIMER *) ((char *) (r) - offsetof(EVENT_TIMER, ring)))
496 #define FOREACH_QUEUE_ENTRY(entry, head) \
497 for (entry = ring_succ(head); entry != (head); entry = ring_succ(entry))
499 #define FIRST_TIMER(head) \
500 (ring_succ(head) != (head) ? RING_TO_TIMER(ring_succ(head)) : 0)
503 * Other private data structures.
505 static time_t event_present; /* cached time of day */
507 #define EVENT_INIT_NEEDED() (event_present == 0)
509 /* event_init - set up tables and such */
511 static void event_init(void)
513 EVENT_FDTABLE *fdp;
514 int err;
516 if (!EVENT_INIT_NEEDED())
517 msg_panic("event_init: repeated call");
520 * Initialize the file descriptor masks and the call-back table. Where
521 * possible we extend these data structures on the fly. With select(2)
522 * based implementations we can only handle FD_SETSIZE open files.
524 #if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
525 if ((event_fdlimit = open_limit(FD_SETSIZE)) < 0)
526 msg_fatal("unable to determine open file limit");
527 #else
528 if ((event_fdlimit = open_limit(INT_MAX)) < 0)
529 msg_fatal("unable to determine open file limit");
530 #endif
531 if (event_fdlimit < FD_SETSIZE / 2 && event_fdlimit < 256)
532 msg_warn("could allocate space for only %d open files", event_fdlimit);
533 event_fdslots = EVENT_ALLOC_INCR;
534 event_fdtable = (EVENT_FDTABLE *)
535 mymalloc(sizeof(EVENT_FDTABLE) * event_fdslots);
536 for (fdp = event_fdtable; fdp < event_fdtable + event_fdslots; fdp++) {
537 fdp->callback = 0;
538 fdp->context = 0;
542 * Initialize the I/O event request masks.
544 #if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
545 EVENT_MASK_ZERO(&event_rmask);
546 EVENT_MASK_ZERO(&event_wmask);
547 EVENT_MASK_ZERO(&event_xmask);
548 #else
549 EVENT_MASK_ALLOC(&event_rmask, event_fdslots);
550 EVENT_MASK_ALLOC(&event_wmask, event_fdslots);
551 EVENT_MASK_ALLOC(&event_xmask, event_fdslots);
554 * Initialize the kernel-based filter.
556 EVENT_REG_INIT_HANDLE(err, event_fdslots);
557 if (err < 0)
558 msg_fatal("%s: %m", EVENT_REG_INIT_TEXT);
559 #endif
562 * Initialize timer stuff.
564 ring_init(&event_timer_head);
565 (void) time(&event_present);
568 * Avoid an infinite initialization loop.
570 if (EVENT_INIT_NEEDED())
571 msg_panic("event_init: unable to initialize");
574 /* event_extend - make room for more descriptor slots */
576 static void event_extend(int fd)
578 const char *myname = "event_extend";
579 int old_slots = event_fdslots;
580 int new_slots = (event_fdslots > fd / 2 ?
581 2 * old_slots : fd + EVENT_ALLOC_INCR);
582 EVENT_FDTABLE *fdp;
583 int err;
585 if (msg_verbose > 2)
586 msg_info("%s: fd %d", myname, fd);
587 event_fdtable = (EVENT_FDTABLE *)
588 myrealloc((char *) event_fdtable, sizeof(EVENT_FDTABLE) * new_slots);
589 event_fdslots = new_slots;
590 for (fdp = event_fdtable + old_slots;
591 fdp < event_fdtable + new_slots; fdp++) {
592 fdp->callback = 0;
593 fdp->context = 0;
597 * Initialize the I/O event request masks.
599 #if (EVENTS_STYLE != EVENTS_STYLE_SELECT)
600 EVENT_MASK_REALLOC(&event_rmask, new_slots);
601 EVENT_MASK_REALLOC(&event_wmask, new_slots);
602 EVENT_MASK_REALLOC(&event_xmask, new_slots);
603 #endif
604 #ifdef EVENT_REG_UPD_HANDLE
605 EVENT_REG_UPD_HANDLE(err, new_slots);
606 if (err < 0)
607 msg_fatal("%s: %s: %m", myname, EVENT_REG_UPD_TEXT);
608 #endif
611 /* event_time - look up cached time of day */
613 time_t event_time(void)
615 if (EVENT_INIT_NEEDED())
616 event_init();
618 return (event_present);
621 /* event_drain - loop until all pending events are done */
623 void event_drain(int time_limit)
625 EVENT_MASK zero_mask;
626 time_t max_time;
628 if (EVENT_INIT_NEEDED())
629 return;
631 #if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
632 EVENT_MASK_ZERO(&zero_mask);
633 #else
634 EVENT_MASK_ALLOC(&zero_mask, event_fdslots);
635 #endif
636 (void) time(&event_present);
637 max_time = event_present + time_limit;
638 while (event_present < max_time
639 && (event_timer_head.pred != &event_timer_head
640 || memcmp(&zero_mask, &event_xmask,
641 EVENT_MASK_BYTE_COUNT(&zero_mask)) != 0)) {
642 event_loop(1);
643 #if (EVENTS_STYLE != EVENTS_STYLE_SELECT)
644 if (EVENT_MASK_BYTE_COUNT(&zero_mask)
645 != EVENT_MASK_BYTES_NEEDED(event_fdslots))
646 EVENT_MASK_REALLOC(&zero_mask, event_fdslots);
647 #endif
649 #if (EVENTS_STYLE != EVENTS_STYLE_SELECT)
650 EVENT_MASK_FREE(&zero_mask);
651 #endif
654 /* event_enable_read - enable read events */
656 void event_enable_read(int fd, EVENT_NOTIFY_RDWR callback, char *context)
658 const char *myname = "event_enable_read";
659 EVENT_FDTABLE *fdp;
660 int err;
662 if (EVENT_INIT_NEEDED())
663 event_init();
666 * Sanity checks.
668 if (fd < 0 || fd >= event_fdlimit)
669 msg_panic("%s: bad file descriptor: %d", myname, fd);
671 if (msg_verbose > 2)
672 msg_info("%s: fd %d", myname, fd);
674 if (fd >= event_fdslots)
675 event_extend(fd);
678 * Disallow mixed (i.e. read and write) requests on the same descriptor.
680 if (EVENT_MASK_ISSET(fd, &event_wmask))
681 msg_panic("%s: fd %d: read/write I/O request", myname, fd);
684 * Postfix 2.4 allows multiple event_enable_read() calls on the same
685 * descriptor without requiring event_disable_readwrite() calls between
686 * them. With kernel-based filters (kqueue, /dev/poll, epoll) it's
687 * wasteful to make system calls when we change only application
688 * call-back information. It has a noticeable effect on smtp-source
689 * performance.
691 if (EVENT_MASK_ISSET(fd, &event_rmask) == 0) {
692 EVENT_MASK_SET(fd, &event_xmask);
693 EVENT_MASK_SET(fd, &event_rmask);
694 if (event_max_fd < fd)
695 event_max_fd = fd;
696 #if (EVENTS_STYLE != EVENTS_STYLE_SELECT)
697 EVENT_REG_ADD_READ(err, fd);
698 if (err < 0)
699 msg_fatal("%s: %s: %m", myname, EVENT_REG_ADD_TEXT);
700 #endif
702 fdp = event_fdtable + fd;
703 if (fdp->callback != callback || fdp->context != context) {
704 fdp->callback = callback;
705 fdp->context = context;
709 /* event_enable_write - enable write events */
711 void event_enable_write(int fd, EVENT_NOTIFY_RDWR callback, char *context)
713 const char *myname = "event_enable_write";
714 EVENT_FDTABLE *fdp;
715 int err;
717 if (EVENT_INIT_NEEDED())
718 event_init();
721 * Sanity checks.
723 if (fd < 0 || fd >= event_fdlimit)
724 msg_panic("%s: bad file descriptor: %d", myname, fd);
726 if (msg_verbose > 2)
727 msg_info("%s: fd %d", myname, fd);
729 if (fd >= event_fdslots)
730 event_extend(fd);
733 * Disallow mixed (i.e. read and write) requests on the same descriptor.
735 if (EVENT_MASK_ISSET(fd, &event_rmask))
736 msg_panic("%s: fd %d: read/write I/O request", myname, fd);
739 * Postfix 2.4 allows multiple event_enable_write() calls on the same
740 * descriptor without requiring event_disable_readwrite() calls between
741 * them. With kernel-based filters (kqueue, /dev/poll, epoll) it's
742 * incredibly wasteful to make unregister and register system calls when
743 * we change only application call-back information. It has a noticeable
744 * effect on smtp-source performance.
746 if (EVENT_MASK_ISSET(fd, &event_wmask) == 0) {
747 EVENT_MASK_SET(fd, &event_xmask);
748 EVENT_MASK_SET(fd, &event_wmask);
749 if (event_max_fd < fd)
750 event_max_fd = fd;
751 #if (EVENTS_STYLE != EVENTS_STYLE_SELECT)
752 EVENT_REG_ADD_WRITE(err, fd);
753 if (err < 0)
754 msg_fatal("%s: %s: %m", myname, EVENT_REG_ADD_TEXT);
755 #endif
757 fdp = event_fdtable + fd;
758 if (fdp->callback != callback || fdp->context != context) {
759 fdp->callback = callback;
760 fdp->context = context;
764 /* event_disable_readwrite - disable request for read or write events */
766 void event_disable_readwrite(int fd)
768 const char *myname = "event_disable_readwrite";
769 EVENT_FDTABLE *fdp;
770 int err;
772 if (EVENT_INIT_NEEDED())
773 event_init();
776 * Sanity checks.
778 if (fd < 0 || fd >= event_fdlimit)
779 msg_panic("%s: bad file descriptor: %d", myname, fd);
781 if (msg_verbose > 2)
782 msg_info("%s: fd %d", myname, fd);
785 * Don't complain when there is nothing to cancel. The request may have
786 * been canceled from another thread.
788 if (fd >= event_fdslots)
789 return;
790 #if (EVENTS_STYLE != EVENTS_STYLE_SELECT)
791 #ifdef EVENT_REG_DEL_BOTH
792 /* XXX Can't seem to disable READ and WRITE events selectively. */
793 if (EVENT_MASK_ISSET(fd, &event_rmask)
794 || EVENT_MASK_ISSET(fd, &event_wmask)) {
795 EVENT_REG_DEL_BOTH(err, fd);
796 if (err < 0)
797 msg_fatal("%s: %s: %m", myname, EVENT_REG_DEL_TEXT);
799 #else
800 if (EVENT_MASK_ISSET(fd, &event_rmask)) {
801 EVENT_REG_DEL_READ(err, fd);
802 if (err < 0)
803 msg_fatal("%s: %s: %m", myname, EVENT_REG_DEL_TEXT);
804 } else if (EVENT_MASK_ISSET(fd, &event_wmask)) {
805 EVENT_REG_DEL_WRITE(err, fd);
806 if (err < 0)
807 msg_fatal("%s: %s: %m", myname, EVENT_REG_DEL_TEXT);
809 #endif /* EVENT_REG_DEL_BOTH */
810 #endif /* != EVENTS_STYLE_SELECT */
811 EVENT_MASK_CLR(fd, &event_xmask);
812 EVENT_MASK_CLR(fd, &event_rmask);
813 EVENT_MASK_CLR(fd, &event_wmask);
814 fdp = event_fdtable + fd;
815 fdp->callback = 0;
816 fdp->context = 0;
819 /* event_request_timer - (re)set timer */
821 time_t event_request_timer(EVENT_NOTIFY_TIME callback, char *context, int delay)
823 const char *myname = "event_request_timer";
824 RING *ring;
825 EVENT_TIMER *timer;
827 if (EVENT_INIT_NEEDED())
828 event_init();
831 * Sanity checks.
833 if (delay < 0)
834 msg_panic("%s: invalid delay: %d", myname, delay);
837 * Make sure we schedule this event at the right time.
839 time(&event_present);
842 * See if they are resetting an existing timer request. If so, take the
843 * request away from the timer queue so that it can be inserted at the
844 * right place.
846 FOREACH_QUEUE_ENTRY(ring, &event_timer_head) {
847 timer = RING_TO_TIMER(ring);
848 if (timer->callback == callback && timer->context == context) {
849 timer->when = event_present + delay;
850 ring_detach(ring);
851 if (msg_verbose > 2)
852 msg_info("%s: reset 0x%lx 0x%lx %d", myname,
853 (long) callback, (long) context, delay);
854 break;
859 * If not found, schedule a new timer request.
861 if (ring == &event_timer_head) {
862 timer = (EVENT_TIMER *) mymalloc(sizeof(EVENT_TIMER));
863 timer->when = event_present + delay;
864 timer->callback = callback;
865 timer->context = context;
866 if (msg_verbose > 2)
867 msg_info("%s: set 0x%lx 0x%lx %d", myname,
868 (long) callback, (long) context, delay);
872 * Insert the request at the right place. Timer requests are kept sorted
873 * to reduce lookup overhead in the event loop.
875 FOREACH_QUEUE_ENTRY(ring, &event_timer_head)
876 if (timer->when < RING_TO_TIMER(ring)->when)
877 break;
878 ring_prepend(ring, &timer->ring);
880 return (timer->when);
883 /* event_cancel_timer - cancel timer */
885 int event_cancel_timer(EVENT_NOTIFY_TIME callback, char *context)
887 const char *myname = "event_cancel_timer";
888 RING *ring;
889 EVENT_TIMER *timer;
890 int time_left = -1;
892 if (EVENT_INIT_NEEDED())
893 event_init();
896 * See if they are canceling an existing timer request. Do not complain
897 * when the request is not found. It might have been canceled from some
898 * other thread.
900 FOREACH_QUEUE_ENTRY(ring, &event_timer_head) {
901 timer = RING_TO_TIMER(ring);
902 if (timer->callback == callback && timer->context == context) {
903 if ((time_left = timer->when - event_present) < 0)
904 time_left = 0;
905 ring_detach(ring);
906 myfree((char *) timer);
907 break;
910 if (msg_verbose > 2)
911 msg_info("%s: 0x%lx 0x%lx %d", myname,
912 (long) callback, (long) context, time_left);
913 return (time_left);
916 /* event_loop - wait for the next event */
918 void event_loop(int delay)
920 const char *myname = "event_loop";
921 static int nested;
923 #if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
924 fd_set rmask;
925 fd_set wmask;
926 fd_set xmask;
927 struct timeval tv;
928 struct timeval *tvp;
929 int new_max_fd;
931 #else
932 EVENT_BUFFER event_buf[100];
933 EVENT_BUFFER *bp;
935 #endif
936 int event_count;
937 EVENT_TIMER *timer;
938 int fd;
939 EVENT_FDTABLE *fdp;
940 int select_delay;
942 if (EVENT_INIT_NEEDED())
943 event_init();
946 * XXX Also print the select() masks?
948 if (msg_verbose > 2) {
949 RING *ring;
951 FOREACH_QUEUE_ENTRY(ring, &event_timer_head) {
952 timer = RING_TO_TIMER(ring);
953 msg_info("%s: time left %3d for 0x%lx 0x%lx", myname,
954 (int) (timer->when - event_present),
955 (long) timer->callback, (long) timer->context);
960 * Find out when the next timer would go off. Timer requests are sorted.
961 * If any timer is scheduled, adjust the delay appropriately.
963 if ((timer = FIRST_TIMER(&event_timer_head)) != 0) {
964 event_present = time((time_t *) 0);
965 if ((select_delay = timer->when - event_present) < 0) {
966 select_delay = 0;
967 } else if (delay >= 0 && select_delay > delay) {
968 select_delay = delay;
970 } else {
971 select_delay = delay;
973 if (msg_verbose > 2)
974 msg_info("event_loop: select_delay %d", select_delay);
977 * Negative delay means: wait until something happens. Zero delay means:
978 * poll. Positive delay means: wait at most this long.
980 #if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
981 if (select_delay < 0) {
982 tvp = 0;
983 } else {
984 tvp = &tv;
985 tv.tv_usec = 0;
986 tv.tv_sec = select_delay;
990 * Pause until the next event happens. When select() has a problem, don't
991 * go into a tight loop. Allow select() to be interrupted due to the
992 * arrival of a signal.
994 rmask = event_rmask;
995 wmask = event_wmask;
996 xmask = event_xmask;
998 event_count = select(event_max_fd + 1, &rmask, &wmask, &xmask, tvp);
999 if (event_count < 0) {
1000 if (errno != EINTR)
1001 msg_fatal("event_loop: select: %m");
1002 return;
1004 #else
1005 EVENT_BUFFER_READ(event_count, event_buf,
1006 sizeof(event_buf) / sizeof(event_buf[0]),
1007 select_delay);
1008 if (event_count < 0) {
1009 if (errno != EINTR)
1010 msg_fatal("event_loop: " EVENT_BUFFER_READ_TEXT ": %m");
1011 return;
1013 #endif
1016 * Before entering the application call-back routines, make sure we
1017 * aren't being called from a call-back routine. Doing so would make us
1018 * vulnerable to all kinds of race conditions.
1020 if (nested++ > 0)
1021 msg_panic("event_loop: recursive call");
1024 * Deliver timer events. Requests are sorted: we can stop when we reach
1025 * the future or the list end. Allow the application to update the timer
1026 * queue while it is being called back. To this end, we repeatedly pop
1027 * the first request off the timer queue before delivering the event to
1028 * the application.
1030 event_present = time((time_t *) 0);
1032 while ((timer = FIRST_TIMER(&event_timer_head)) != 0) {
1033 if (timer->when > event_present)
1034 break;
1035 ring_detach(&timer->ring); /* first this */
1036 if (msg_verbose > 2)
1037 msg_info("%s: timer 0x%lx 0x%lx", myname,
1038 (long) timer->callback, (long) timer->context);
1039 timer->callback(EVENT_TIME, timer->context); /* then this */
1040 myfree((char *) timer);
1044 * Deliver I/O events. Allow the application to cancel event requests
1045 * while it is being called back. To this end, we keep an eye on the
1046 * contents of event_xmask, so that we deliver only events that are still
1047 * wanted. We do not change the event request masks. It is up to the
1048 * application to determine when a read or write is complete.
1050 #if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
1051 if (event_count > 0) {
1052 for (new_max_fd = 0, fd = 0; fd <= event_max_fd; fd++) {
1053 if (FD_ISSET(fd, &event_xmask)) {
1054 new_max_fd = fd;
1055 /* In case event_fdtable is updated. */
1056 fdp = event_fdtable + fd;
1057 if (FD_ISSET(fd, &xmask)) {
1058 if (msg_verbose > 2)
1059 msg_info("%s: exception fd=%d act=0x%lx 0x%lx", myname,
1060 fd, (long) fdp->callback, (long) fdp->context);
1061 fdp->callback(EVENT_XCPT, fdp->context);
1062 } else if (FD_ISSET(fd, &wmask)) {
1063 if (msg_verbose > 2)
1064 msg_info("%s: write fd=%d act=0x%lx 0x%lx", myname,
1065 fd, (long) fdp->callback, (long) fdp->context);
1066 fdp->callback(EVENT_WRITE, fdp->context);
1067 } else if (FD_ISSET(fd, &rmask)) {
1068 if (msg_verbose > 2)
1069 msg_info("%s: read fd=%d act=0x%lx 0x%lx", myname,
1070 fd, (long) fdp->callback, (long) fdp->context);
1071 fdp->callback(EVENT_READ, fdp->context);
1075 event_max_fd = new_max_fd;
1077 #else
1078 for (bp = event_buf; bp < event_buf + event_count; bp++) {
1079 fd = EVENT_GET_FD(bp);
1080 if (fd < 0 || fd > event_max_fd)
1081 msg_panic("%s: bad file descriptor: %d", myname, fd);
1082 if (EVENT_MASK_ISSET(fd, &event_xmask)) {
1083 fdp = event_fdtable + fd;
1084 if (EVENT_TEST_READ(bp)) {
1085 if (msg_verbose > 2)
1086 msg_info("%s: read fd=%d act=0x%lx 0x%lx", myname,
1087 fd, (long) fdp->callback, (long) fdp->context);
1088 fdp->callback(EVENT_READ, fdp->context);
1089 } else if (EVENT_TEST_WRITE(bp)) {
1090 if (msg_verbose > 2)
1091 msg_info("%s: write fd=%d act=0x%lx 0x%lx", myname,
1092 fd, (long) fdp->callback,
1093 (long) fdp->context);
1094 fdp->callback(EVENT_WRITE, fdp->context);
1095 } else {
1096 if (msg_verbose > 2)
1097 msg_info("%s: other fd=%d act=0x%lx 0x%lx", myname,
1098 fd, (long) fdp->callback, (long) fdp->context);
1099 fdp->callback(EVENT_XCPT, fdp->context);
1103 #endif
1104 nested--;
1107 #ifdef TEST
1110 * Proof-of-concept test program for the event manager. Schedule a series of
1111 * events at one-second intervals and let them happen, while echoing any
1112 * lines read from stdin.
1114 #include <stdio.h>
1115 #include <ctype.h>
1116 #include <stdlib.h>
1118 /* timer_event - display event */
1120 static void timer_event(int unused_event, char *context)
1122 printf("%ld: %s\n", (long) event_present, context);
1123 fflush(stdout);
1126 /* echo - echo text received on stdin */
1128 static void echo(int unused_event, char *unused_context)
1130 char buf[BUFSIZ];
1132 if (fgets(buf, sizeof(buf), stdin) == 0)
1133 exit(0);
1134 printf("Result: %s", buf);
1137 int main(int argc, char **argv)
1139 if (argv[1])
1140 msg_verbose = atoi(argv[1]);
1141 event_request_timer(timer_event, "3 first", 3);
1142 event_request_timer(timer_event, "3 second", 3);
1143 event_request_timer(timer_event, "4 first", 4);
1144 event_request_timer(timer_event, "4 second", 4);
1145 event_request_timer(timer_event, "2 first", 2);
1146 event_request_timer(timer_event, "2 second", 2);
1147 event_request_timer(timer_event, "1 first", 1);
1148 event_request_timer(timer_event, "1 second", 1);
1149 event_request_timer(timer_event, "0 first", 0);
1150 event_request_timer(timer_event, "0 second", 0);
1151 event_enable_read(fileno(stdin), echo, (char *) 0);
1152 event_drain(10);
1153 exit(0);
1156 #endif