Correct PPTP server firewall rules chain.
[tomato/davidwu.git] / release / src / router / libevent / epoll.c
blob4c5c3da01a5bb2a8c4ee0a5267ebf66db589d10d
1 /*
2 * Copyright 2000-2007 Niels Provos <provos@citi.umich.edu>
3 * Copyright 2007-2012 Niels Provos, Nick Mathewson
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "event2/event-config.h"
29 #include <stdint.h>
30 #include <sys/types.h>
31 #include <sys/resource.h>
32 #ifdef _EVENT_HAVE_SYS_TIME_H
33 #include <sys/time.h>
34 #endif
35 #include <sys/queue.h>
36 #include <sys/epoll.h>
37 #include <signal.h>
38 #include <limits.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
43 #include <errno.h>
44 #ifdef _EVENT_HAVE_FCNTL_H
45 #include <fcntl.h>
46 #endif
48 #include "event-internal.h"
49 #include "evsignal-internal.h"
50 #include "event2/thread.h"
51 #include "evthread-internal.h"
52 #include "log-internal.h"
53 #include "evmap-internal.h"
54 #include "changelist-internal.h"
56 struct epollop {
57 struct epoll_event *events;
58 int nevents;
59 int epfd;
62 static void *epoll_init(struct event_base *);
63 static int epoll_dispatch(struct event_base *, struct timeval *);
64 static void epoll_dealloc(struct event_base *);
66 static const struct eventop epollops_changelist = {
67 "epoll (with changelist)",
68 epoll_init,
69 event_changelist_add,
70 event_changelist_del,
71 epoll_dispatch,
72 epoll_dealloc,
73 1, /* need reinit */
74 EV_FEATURE_ET|EV_FEATURE_O1,
75 EVENT_CHANGELIST_FDINFO_SIZE
79 static int epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
80 short old, short events, void *p);
81 static int epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
82 short old, short events, void *p);
84 const struct eventop epollops = {
85 "epoll",
86 epoll_init,
87 epoll_nochangelist_add,
88 epoll_nochangelist_del,
89 epoll_dispatch,
90 epoll_dealloc,
91 1, /* need reinit */
92 EV_FEATURE_ET|EV_FEATURE_O1,
96 #define INITIAL_NEVENT 32
97 #define MAX_NEVENT 4096
99 /* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout
100 * values bigger than (LONG_MAX - 999ULL)/HZ. HZ in the wild can be
101 * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the
102 * largest number of msec we can support here is 2147482. Let's
103 * round that down by 47 seconds.
105 #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)
107 static void *
108 epoll_init(struct event_base *base)
110 int epfd;
111 struct epollop *epollop;
113 /* Initialize the kernel queue. (The size field is ignored since
114 * 2.6.8.) */
115 if ((epfd = epoll_create(32000)) == -1) {
116 if (errno != ENOSYS)
117 event_warn("epoll_create");
118 return (NULL);
121 evutil_make_socket_closeonexec(epfd);
123 if (!(epollop = mm_calloc(1, sizeof(struct epollop)))) {
124 close(epfd);
125 return (NULL);
128 epollop->epfd = epfd;
130 /* Initialize fields */
131 epollop->events = mm_calloc(INITIAL_NEVENT, sizeof(struct epoll_event));
132 if (epollop->events == NULL) {
133 mm_free(epollop);
134 close(epfd);
135 return (NULL);
137 epollop->nevents = INITIAL_NEVENT;
139 if ((base->flags & EVENT_BASE_FLAG_EPOLL_USE_CHANGELIST) != 0 ||
140 ((base->flags & EVENT_BASE_FLAG_IGNORE_ENV) == 0 &&
141 evutil_getenv("EVENT_EPOLL_USE_CHANGELIST") != NULL))
142 base->evsel = &epollops_changelist;
144 evsig_init(base);
146 return (epollop);
149 static const char *
150 change_to_string(int change)
152 change &= (EV_CHANGE_ADD|EV_CHANGE_DEL);
153 if (change == EV_CHANGE_ADD) {
154 return "add";
155 } else if (change == EV_CHANGE_DEL) {
156 return "del";
157 } else if (change == 0) {
158 return "none";
159 } else {
160 return "???";
164 static const char *
165 epoll_op_to_string(int op)
167 return op == EPOLL_CTL_ADD?"ADD":
168 op == EPOLL_CTL_DEL?"DEL":
169 op == EPOLL_CTL_MOD?"MOD":
170 "???";
173 static int
174 epoll_apply_one_change(struct event_base *base,
175 struct epollop *epollop,
176 const struct event_change *ch)
178 struct epoll_event epev;
179 int op, events = 0;
181 if (1) {
182 /* The logic here is a little tricky. If we had no events set
183 on the fd before, we need to set op="ADD" and set
184 events=the events we want to add. If we had any events set
185 on the fd before, and we want any events to remain on the
186 fd, we need to say op="MOD" and set events=the events we
187 want to remain. But if we want to delete the last event,
188 we say op="DEL" and set events=the remaining events. What
189 fun!
192 /* TODO: Turn this into a switch or a table lookup. */
194 if ((ch->read_change & EV_CHANGE_ADD) ||
195 (ch->write_change & EV_CHANGE_ADD)) {
196 /* If we are adding anything at all, we'll want to do
197 * either an ADD or a MOD. */
198 events = 0;
199 op = EPOLL_CTL_ADD;
200 if (ch->read_change & EV_CHANGE_ADD) {
201 events |= EPOLLIN;
202 } else if (ch->read_change & EV_CHANGE_DEL) {
204 } else if (ch->old_events & EV_READ) {
205 events |= EPOLLIN;
207 if (ch->write_change & EV_CHANGE_ADD) {
208 events |= EPOLLOUT;
209 } else if (ch->write_change & EV_CHANGE_DEL) {
211 } else if (ch->old_events & EV_WRITE) {
212 events |= EPOLLOUT;
214 if ((ch->read_change|ch->write_change) & EV_ET)
215 events |= EPOLLET;
217 if (ch->old_events) {
218 /* If MOD fails, we retry as an ADD, and if
219 * ADD fails we will retry as a MOD. So the
220 * only hard part here is to guess which one
221 * will work. As a heuristic, we'll try
222 * MOD first if we think there were old
223 * events and ADD if we think there were none.
225 * We can be wrong about the MOD if the file
226 * has in fact been closed and re-opened.
228 * We can be wrong about the ADD if the
229 * the fd has been re-created with a dup()
230 * of the same file that it was before.
232 op = EPOLL_CTL_MOD;
234 } else if ((ch->read_change & EV_CHANGE_DEL) ||
235 (ch->write_change & EV_CHANGE_DEL)) {
236 /* If we're deleting anything, we'll want to do a MOD
237 * or a DEL. */
238 op = EPOLL_CTL_DEL;
240 if (ch->read_change & EV_CHANGE_DEL) {
241 if (ch->write_change & EV_CHANGE_DEL) {
242 events = EPOLLIN|EPOLLOUT;
243 } else if (ch->old_events & EV_WRITE) {
244 events = EPOLLOUT;
245 op = EPOLL_CTL_MOD;
246 } else {
247 events = EPOLLIN;
249 } else if (ch->write_change & EV_CHANGE_DEL) {
250 if (ch->old_events & EV_READ) {
251 events = EPOLLIN;
252 op = EPOLL_CTL_MOD;
253 } else {
254 events = EPOLLOUT;
259 if (!events)
260 return 0;
262 memset(&epev, 0, sizeof(epev));
263 epev.data.fd = ch->fd;
264 epev.events = events;
265 if (epoll_ctl(epollop->epfd, op, ch->fd, &epev) == -1) {
266 if (op == EPOLL_CTL_MOD && errno == ENOENT) {
267 /* If a MOD operation fails with ENOENT, the
268 * fd was probably closed and re-opened. We
269 * should retry the operation as an ADD.
271 if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, ch->fd, &epev) == -1) {
272 event_warn("Epoll MOD(%d) on %d retried as ADD; that failed too",
273 (int)epev.events, ch->fd);
274 return -1;
275 } else {
276 event_debug(("Epoll MOD(%d) on %d retried as ADD; succeeded.",
277 (int)epev.events,
278 ch->fd));
280 } else if (op == EPOLL_CTL_ADD && errno == EEXIST) {
281 /* If an ADD operation fails with EEXIST,
282 * either the operation was redundant (as with a
283 * precautionary add), or we ran into a fun
284 * kernel bug where using dup*() to duplicate the
285 * same file into the same fd gives you the same epitem
286 * rather than a fresh one. For the second case,
287 * we must retry with MOD. */
288 if (epoll_ctl(epollop->epfd, EPOLL_CTL_MOD, ch->fd, &epev) == -1) {
289 event_warn("Epoll ADD(%d) on %d retried as MOD; that failed too",
290 (int)epev.events, ch->fd);
291 return -1;
292 } else {
293 event_debug(("Epoll ADD(%d) on %d retried as MOD; succeeded.",
294 (int)epev.events,
295 ch->fd));
297 } else if (op == EPOLL_CTL_DEL &&
298 (errno == ENOENT || errno == EBADF ||
299 errno == EPERM)) {
300 /* If a delete fails with one of these errors,
301 * that's fine too: we closed the fd before we
302 * got around to calling epoll_dispatch. */
303 event_debug(("Epoll DEL(%d) on fd %d gave %s: DEL was unnecessary.",
304 (int)epev.events,
305 ch->fd,
306 strerror(errno)));
307 } else {
308 event_warn("Epoll %s(%d) on fd %d failed. Old events were %d; read change was %d (%s); write change was %d (%s)",
309 epoll_op_to_string(op),
310 (int)epev.events,
311 ch->fd,
312 ch->old_events,
313 ch->read_change,
314 change_to_string(ch->read_change),
315 ch->write_change,
316 change_to_string(ch->write_change));
317 return -1;
319 } else {
320 event_debug(("Epoll %s(%d) on fd %d okay. [old events were %d; read change was %d; write change was %d]",
321 epoll_op_to_string(op),
322 (int)epev.events,
323 (int)ch->fd,
324 ch->old_events,
325 ch->read_change,
326 ch->write_change));
329 return 0;
332 static int
333 epoll_apply_changes(struct event_base *base)
335 struct event_changelist *changelist = &base->changelist;
336 struct epollop *epollop = base->evbase;
337 struct event_change *ch;
339 int r = 0;
340 int i;
342 for (i = 0; i < changelist->n_changes; ++i) {
343 ch = &changelist->changes[i];
344 if (epoll_apply_one_change(base, epollop, ch) < 0)
345 r = -1;
348 return (r);
351 static int
352 epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
353 short old, short events, void *p)
355 struct event_change ch;
356 ch.fd = fd;
357 ch.old_events = old;
358 ch.read_change = ch.write_change = 0;
359 if (events & EV_WRITE)
360 ch.write_change = EV_CHANGE_ADD |
361 (events & EV_ET);
362 if (events & EV_READ)
363 ch.read_change = EV_CHANGE_ADD |
364 (events & EV_ET);
366 return epoll_apply_one_change(base, base->evbase, &ch);
369 static int
370 epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
371 short old, short events, void *p)
373 struct event_change ch;
374 ch.fd = fd;
375 ch.old_events = old;
376 ch.read_change = ch.write_change = 0;
377 if (events & EV_WRITE)
378 ch.write_change = EV_CHANGE_DEL;
379 if (events & EV_READ)
380 ch.read_change = EV_CHANGE_DEL;
382 return epoll_apply_one_change(base, base->evbase, &ch);
385 static int
386 epoll_dispatch(struct event_base *base, struct timeval *tv)
388 struct epollop *epollop = base->evbase;
389 struct epoll_event *events = epollop->events;
390 int i, res;
391 long timeout = -1;
393 if (tv != NULL) {
394 timeout = evutil_tv_to_msec(tv);
395 if (timeout < 0 || timeout > MAX_EPOLL_TIMEOUT_MSEC) {
396 /* Linux kernels can wait forever if the timeout is
397 * too big; see comment on MAX_EPOLL_TIMEOUT_MSEC. */
398 timeout = MAX_EPOLL_TIMEOUT_MSEC;
402 epoll_apply_changes(base);
403 event_changelist_remove_all(&base->changelist, base);
405 EVBASE_RELEASE_LOCK(base, th_base_lock);
407 res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);
409 EVBASE_ACQUIRE_LOCK(base, th_base_lock);
411 if (res == -1) {
412 if (errno != EINTR) {
413 event_warn("epoll_wait");
414 return (-1);
417 return (0);
420 event_debug(("%s: epoll_wait reports %d", __func__, res));
421 EVUTIL_ASSERT(res <= epollop->nevents);
423 for (i = 0; i < res; i++) {
424 int what = events[i].events;
425 short ev = 0;
427 if (what & (EPOLLHUP|EPOLLERR)) {
428 ev = EV_READ | EV_WRITE;
429 } else {
430 if (what & EPOLLIN)
431 ev |= EV_READ;
432 if (what & EPOLLOUT)
433 ev |= EV_WRITE;
436 if (!ev)
437 continue;
439 evmap_io_active(base, events[i].data.fd, ev | EV_ET);
442 if (res == epollop->nevents && epollop->nevents < MAX_NEVENT) {
443 /* We used all of the event space this time. We should
444 be ready for more events next time. */
445 int new_nevents = epollop->nevents * 2;
446 struct epoll_event *new_events;
448 new_events = mm_realloc(epollop->events,
449 new_nevents * sizeof(struct epoll_event));
450 if (new_events) {
451 epollop->events = new_events;
452 epollop->nevents = new_nevents;
456 return (0);
460 static void
461 epoll_dealloc(struct event_base *base)
463 struct epollop *epollop = base->evbase;
465 evsig_dealloc(base);
466 if (epollop->events)
467 mm_free(epollop->events);
468 if (epollop->epfd >= 0)
469 close(epollop->epfd);
471 memset(epollop, 0, sizeof(struct epollop));
472 mm_free(epollop);