1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/tools/flip_server/epoll_server.h"
7 #include <stdlib.h> // for abort
8 #include <errno.h> // for errno and strerror_r
14 #include "base/logging.h"
15 #include "base/timer.h"
17 // Design notes: An efficient implementation of ready list has the following
18 // desirable properties:
20 // A. O(1) insertion into/removal from the list in any location.
21 // B. Once the callback is found by hash lookup using the fd, the lookup of
22 // corresponding entry in the list is O(1).
23 // C. Safe insertion into/removal from the list during list iteration. (The
24 // ready list's purpose is to enable completely event driven I/O model.
25 // Thus, all the interesting bits happen in the callback. It is critical
26 // to not place any restriction on the API during list iteration.
28 // The current implementation achieves these goals with the following design:
30 // - The ready list is constructed as a doubly linked list to enable O(1)
31 // insertion/removal (see man 3 queue).
32 // - The forward and backward links are directly embedded inside the
33 // CBAndEventMask struct. This enables O(1) lookup in the list for a given
34 // callback. (Techincally, we could've used std::list of hash_set::iterator,
35 // and keep a list::iterator in CBAndEventMask to achieve the same effect.
36 // However, iterators have two problems: no way to portably invalidate them,
37 // and no way to tell whether an iterator is singular or not. The only way to
38 // overcome these issues is to keep bools in both places, but that throws off
39 // memory alignment (up to 7 wasted bytes for each bool). The extra level of
40 // indirection will also likely be less cache friendly. Direct manipulation
41 // of link pointers makes it easier to retrieve the CBAndEventMask from the
42 // list, easier to check whether an CBAndEventMask is in the list, uses less
43 // memory (save 32 bytes/fd), and does not affect cache usage (we need to
44 // read in the struct to use the callback anyway).)
45 // - Embed the fd directly into CBAndEventMask and switch to using hash_set.
46 // This removes the need to store hash_map::iterator in the list just so that
47 // we can get both the fd and the callback.
48 // - The ready list is "one shot": each entry is removed before OnEvent is
49 // called. This removes the mutation-while-iterating problem.
50 // - Use two lists to keep track of callbacks. The ready_list_ is the one used
51 // for registration. Before iteration, the ready_list_ is swapped into the
52 // tmp_list_. Once iteration is done, tmp_list_ will be empty, and
53 // ready_list_ will have all the new ready fds.
55 // The size we use for buffers passed to strerror_r
56 static const int kErrorBufferSize
= 256;
60 // Clears the pipe and returns. Used for waking the epoll server up.
61 class ReadPipeCallback
: public EpollCallbackInterface
{
63 void OnEvent(int fd
, EpollEvent
* event
) {
64 DCHECK(event
->in_events
== EPOLLIN
);
67 // Read until the pipe is empty.
68 while (data_read
> 0) {
69 data_read
= read(fd
, &data
, sizeof(data
));
72 void OnShutdown(EpollServer
*eps
, int fd
) {}
73 void OnRegistration(EpollServer
*, int, int) {}
74 void OnModification(int, int) {} // COV_NF_LINE
75 void OnUnregistration(int, bool) {} // COV_NF_LINE
78 ////////////////////////////////////////////////////////////////////////////////
79 ////////////////////////////////////////////////////////////////////////////////
81 EpollServer::EpollServer()
82 : epoll_fd_(epoll_create(1024)),
84 recorded_now_in_us_(0),
86 wake_cb_(new ReadPipeCallback
),
89 in_wait_for_events_and_execute_callbacks_(false),
91 // ensure that the epoll_fd_ is valid.
92 CHECK_NE(epoll_fd_
, -1);
93 LIST_INIT(&ready_list_
);
94 LIST_INIT(&tmp_list_
);
97 if (pipe(pipe_fds
) < 0) {
98 // Unfortunately, it is impossible to test any such initialization in
99 // a constructor (as virtual methods do not yet work).
100 // This -could- be solved by moving initialization to an outside
102 int saved_errno
= errno
;
103 char buf
[kErrorBufferSize
];
104 LOG(FATAL
) << "Error " << saved_errno
105 << " in pipe(): " << strerror_r(saved_errno
, buf
, sizeof(buf
));
107 read_fd_
= pipe_fds
[0];
108 write_fd_
= pipe_fds
[1];
109 RegisterFD(read_fd_
, wake_cb_
.get(), EPOLLIN
);
112 void EpollServer::CleanupFDToCBMap() {
113 FDToCBMap::iterator cb_iter
= cb_map_
.begin();
114 while (cb_iter
!= cb_map_
.end()) {
115 int fd
= cb_iter
->fd
;
116 CB
* cb
= cb_iter
->cb
;
118 cb_iter
->in_use
= true;
120 cb
->OnShutdown(this, fd
);
123 cb_map_
.erase(cb_iter
);
124 cb_iter
= cb_map_
.begin();
128 void EpollServer::CleanupTimeToAlarmCBMap() {
129 TimeToAlarmCBMap::iterator erase_it
;
131 // Call OnShutdown() on alarms. Note that the structure of the loop
132 // is similar to the structure of loop in the function HandleAlarms()
133 for (TimeToAlarmCBMap::iterator i
= alarm_map_
.begin();
134 i
!= alarm_map_
.end();
136 // Note that OnShutdown() can call UnregisterAlarm() on
137 // other iterators. OnShutdown() should not call UnregisterAlarm()
138 // on self because by definition the iterator is not valid any more.
139 i
->second
->OnShutdown(this);
142 alarm_map_
.erase(erase_it
);
146 EpollServer::~EpollServer() {
147 DCHECK_EQ(in_shutdown_
, false);
149 #ifdef EPOLL_SERVER_EVENT_TRACING
150 LOG(INFO
) << "\n" << event_recorder_
;
152 VLOG(2) << "Shutting down epoll server ";
155 LIST_INIT(&ready_list_
);
156 LIST_INIT(&tmp_list_
);
158 CleanupTimeToAlarmCBMap();
165 // Whether a CBAandEventMask is on the ready list is determined by a non-NULL
166 // le_prev pointer (le_next being NULL indicates end of list).
167 inline void EpollServer::AddToReadyList(CBAndEventMask
* cb_and_mask
) {
168 if (cb_and_mask
->entry
.le_prev
== NULL
) {
169 LIST_INSERT_HEAD(&ready_list_
, cb_and_mask
, entry
);
174 inline void EpollServer::RemoveFromReadyList(
175 const CBAndEventMask
& cb_and_mask
) {
176 if (cb_and_mask
.entry
.le_prev
!= NULL
) {
177 LIST_REMOVE(&cb_and_mask
, entry
);
178 // Clean up all the ready list states. Don't bother with the other fields
179 // as they are initialized when the CBAandEventMask is added to the ready
180 // list. This saves a few cycles in the inner loop.
181 cb_and_mask
.entry
.le_prev
= NULL
;
183 if (ready_list_size_
== 0) {
184 DCHECK(ready_list_
.lh_first
== NULL
);
185 DCHECK(tmp_list_
.lh_first
== NULL
);
190 void EpollServer::RegisterFD(int fd
, CB
* cb
, int event_mask
) {
192 VLOG(3) << "RegisterFD fd=" << fd
<< " event_mask=" << event_mask
;
193 FDToCBMap::iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
194 if (cb_map_
.end() != fd_i
) {
195 // do we just abort, or do we just unregister the other guy?
196 // for now, lets just unregister the other guy.
198 // unregister any callback that may already be registered for this FD.
199 CB
* other_cb
= fd_i
->cb
;
201 // Must remove from the ready list before erasing.
202 RemoveFromReadyList(*fd_i
);
203 other_cb
->OnUnregistration(fd
, true);
204 ModFD(fd
, event_mask
);
206 // already unregistered, so just recycle the node.
207 AddFD(fd
, event_mask
);
210 fd_i
->event_mask
= event_mask
;
211 fd_i
->events_to_fake
= 0;
213 AddFD(fd
, event_mask
);
214 cb_map_
.insert(CBAndEventMask(cb
, event_mask
, fd
));
218 // set the FD to be non-blocking.
221 cb
->OnRegistration(this, fd
, event_mask
);
224 int EpollServer::GetFlags(int fd
) {
225 return fcntl(fd
, F_GETFL
, 0);
228 void EpollServer::SetNonblocking(int fd
) {
229 int flags
= GetFlags(fd
);
231 int saved_errno
= errno
;
232 char buf
[kErrorBufferSize
];
233 LOG(FATAL
) << "Error " << saved_errno
234 << " doing fcntl(" << fd
<< ", F_GETFL, 0): "
235 << strerror_r(saved_errno
, buf
, sizeof(buf
));
237 if (!(flags
& O_NONBLOCK
)) {
238 int saved_flags
= flags
;
239 flags
= SetFlags(fd
, flags
| O_NONBLOCK
);
242 int saved_errno
= errno
;
243 char buf
[kErrorBufferSize
];
244 LOG(FATAL
) << "Error " << saved_errno
245 << " doing fcntl(" << fd
<< ", F_SETFL, " << saved_flags
<< "): "
246 << strerror_r(saved_errno
, buf
, sizeof(buf
));
251 int EpollServer::epoll_wait_impl(int epfd
,
252 struct epoll_event
* events
,
255 return epoll_wait(epfd
, events
, max_events
, timeout_in_ms
);
258 void EpollServer::RegisterFDForWrite(int fd
, CB
* cb
) {
259 RegisterFD(fd
, cb
, EPOLLOUT
);
262 void EpollServer::RegisterFDForReadWrite(int fd
, CB
* cb
) {
263 RegisterFD(fd
, cb
, EPOLLIN
| EPOLLOUT
);
266 void EpollServer::RegisterFDForRead(int fd
, CB
* cb
) {
267 RegisterFD(fd
, cb
, EPOLLIN
);
270 void EpollServer::UnregisterFD(int fd
) {
271 FDToCBMap::iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
272 if (cb_map_
.end() == fd_i
|| fd_i
->cb
== NULL
) {
273 // Doesn't exist in server, or has gone through UnregisterFD once and still
274 // inside the callchain of OnEvent.
277 #ifdef EPOLL_SERVER_EVENT_TRACING
278 event_recorder_
.RecordUnregistration(fd
);
281 // Since the links are embedded within the struct, we must remove it from the
282 // list before erasing it from the hash_set.
283 RemoveFromReadyList(*fd_i
);
285 cb
->OnUnregistration(fd
, false);
286 // fd_i->cb is NULL if that fd is unregistered inside the callchain of
287 // OnEvent. Since the EpollServer needs a valid CBAndEventMask after OnEvent
288 // returns in order to add it to the ready list, we cannot have UnregisterFD
289 // erase the entry if it is in use. Thus, a NULL fd_i->cb is used as a
290 // condition that tells the EpollServer that this entry is unused at a later
295 // Remove all trace of the registration, and just keep the node alive long
296 // enough so the code that calls OnEvent doesn't have to worry about
297 // figuring out whether the CBAndEventMask is valid or not.
299 fd_i
->event_mask
= 0;
300 fd_i
->events_to_fake
= 0;
304 void EpollServer::ModifyCallback(int fd
, int event_mask
) {
305 ModifyFD(fd
, ~0, event_mask
);
308 void EpollServer::StopRead(int fd
) {
309 ModifyFD(fd
, EPOLLIN
, 0);
312 void EpollServer::StartRead(int fd
) {
313 ModifyFD(fd
, 0, EPOLLIN
);
316 void EpollServer::StopWrite(int fd
) {
317 ModifyFD(fd
, EPOLLOUT
, 0);
320 void EpollServer::StartWrite(int fd
) {
321 ModifyFD(fd
, 0, EPOLLOUT
);
324 void EpollServer::HandleEvent(int fd
, int event_mask
) {
325 #ifdef EPOLL_SERVER_EVENT_TRACING
326 event_recorder_
.RecordEpollEvent(fd
, event_mask
);
328 FDToCBMap::iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
329 if (fd_i
== cb_map_
.end() || fd_i
->cb
== NULL
) {
331 // This could occur if epoll() returns a set of events, and
332 // while processing event A (earlier) we removed the callback
333 // for event B (and are now processing event B).
336 fd_i
->events_asserted
= event_mask
;
337 CBAndEventMask
* cb_and_mask
= const_cast<CBAndEventMask
*>(&*fd_i
);
338 AddToReadyList(cb_and_mask
);
341 class TrueFalseGuard
{
343 explicit TrueFalseGuard(bool* guarded_bool
) : guarded_bool_(guarded_bool
) {
344 DCHECK(guarded_bool_
!= NULL
);
345 DCHECK(*guarded_bool_
== false);
346 *guarded_bool_
= true;
349 *guarded_bool_
= false;
355 void EpollServer::WaitForEventsAndExecuteCallbacks() {
356 if (in_wait_for_events_and_execute_callbacks_
) {
358 "Attempting to call WaitForEventsAndExecuteCallbacks"
359 " when an ancestor to the current function is already"
360 " WaitForEventsAndExecuteCallbacks!";
361 // The line below is actually tested, but in coverage mode,
363 return; // COV_NF_LINE
365 TrueFalseGuard
recursion_guard(&in_wait_for_events_and_execute_callbacks_
);
366 if (alarm_map_
.empty()) {
367 // no alarms, this is business as usual.
368 WaitForEventsAndCallHandleEvents(timeout_in_us_
,
371 recorded_now_in_us_
= 0;
375 // store the 'now'. If we recomputed 'now' every iteration
376 // down below, then we might never exit that loop-- any
377 // long-running alarms might install other long-running
378 // alarms, etc. By storing it here now, we ensure that
379 // a more reasonable amount of work is done here.
380 int64 now_in_us
= NowInUsec();
382 // Get the first timeout from the alarm_map where it is
383 // stored in absolute time.
384 int64 next_alarm_time_in_us
= alarm_map_
.begin()->first
;
385 VLOG(4) << "next_alarm_time = " << next_alarm_time_in_us
386 << " now = " << now_in_us
387 << " timeout_in_us = " << timeout_in_us_
;
389 int64 wait_time_in_us
;
390 int64 alarm_timeout_in_us
= next_alarm_time_in_us
- now_in_us
;
392 // If the next alarm is sooner than the default timeout, or if there is no
393 // timeout (timeout_in_us_ == -1), wake up when the alarm should fire.
394 // Otherwise use the default timeout.
395 if (alarm_timeout_in_us
< timeout_in_us_
|| timeout_in_us_
< 0) {
396 wait_time_in_us
= std::max(alarm_timeout_in_us
, static_cast<int64
>(0));
398 wait_time_in_us
= timeout_in_us_
;
401 VLOG(4) << "wait_time_in_us = " << wait_time_in_us
;
405 WaitForEventsAndCallHandleEvents(wait_time_in_us
,
408 CallAndReregisterAlarmEvents();
409 recorded_now_in_us_
= 0;
412 void EpollServer::SetFDReady(int fd
, int events_to_fake
) {
413 FDToCBMap::iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
414 if (cb_map_
.end() != fd_i
&& fd_i
->cb
!= NULL
) {
415 // This const_cast is necessary for LIST_HEAD_INSERT to work. Declaring
416 // entry mutable is insufficient because LIST_HEAD_INSERT assigns the
417 // forward pointer of the list head to the current cb_and_mask, and the
418 // compiler complains that it can't assign a const T* to a T*.
419 CBAndEventMask
* cb_and_mask
= const_cast<CBAndEventMask
*>(&*fd_i
);
420 // Note that there is no clearly correct behavior here when
421 // cb_and_mask->events_to_fake != 0 and this function is called.
422 // Of the two operations:
423 // cb_and_mask->events_to_fake = events_to_fake
424 // cb_and_mask->events_to_fake |= events_to_fake
425 // the first was picked because it discourages users from calling
426 // SetFDReady repeatedly to build up the correct event set as it is more
427 // efficient to call SetFDReady once with the correct, final mask.
428 cb_and_mask
->events_to_fake
= events_to_fake
;
429 AddToReadyList(cb_and_mask
);
433 void EpollServer::SetFDNotReady(int fd
) {
434 FDToCBMap::iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
435 if (cb_map_
.end() != fd_i
) {
436 RemoveFromReadyList(*fd_i
);
440 bool EpollServer::IsFDReady(int fd
) const {
441 FDToCBMap::const_iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
442 return (cb_map_
.end() != fd_i
&&
444 fd_i
->entry
.le_prev
!= NULL
);
447 void EpollServer::VerifyReadyList() const {
449 CBAndEventMask
* cur
= ready_list_
.lh_first
;
450 for (; cur
; cur
= cur
->entry
.le_next
) {
453 for (cur
= tmp_list_
.lh_first
; cur
; cur
= cur
->entry
.le_next
) {
456 CHECK_EQ(ready_list_size_
, count
) << "Ready list size does not match count";
459 void EpollServer::RegisterAlarm(int64 timeout_time_in_us
, AlarmCB
* ac
) {
461 if (ContainsAlarm(ac
)) {
462 LOG(FATAL
) << "Alarm already exists " << ac
;
464 VLOG(4) << "RegisteringAlarm at : " << timeout_time_in_us
;
466 TimeToAlarmCBMap::iterator alarm_iter
=
467 alarm_map_
.insert(std::make_pair(timeout_time_in_us
, ac
));
469 all_alarms_
.insert(ac
);
470 // Pass the iterator to the EpollAlarmCallbackInterface.
471 ac
->OnRegistration(alarm_iter
, this);
474 // Unregister a specific alarm callback: iterator_token must be a
475 // valid iterator. The caller must ensure the validity of the iterator.
476 void EpollServer::UnregisterAlarm(const AlarmRegToken
& iterator_token
) {
477 AlarmCB
* cb
= iterator_token
->second
;
478 alarm_map_
.erase(iterator_token
);
479 all_alarms_
.erase(cb
);
480 cb
->OnUnregistration();
483 int EpollServer::NumFDsRegistered() const {
484 DCHECK(cb_map_
.size() >= 1);
485 // Omit the internal FD (read_fd_)
486 return cb_map_
.size() - 1;
489 void EpollServer::Wake() {
490 char data
= 'd'; // 'd' is for data. It's good enough for me.
491 int rv
= write(write_fd_
, &data
, 1);
495 int64
EpollServer::NowInUsec() const {
496 return base::Time::Now().ToInternalValue();
499 int64
EpollServer::ApproximateNowInUsec() const {
500 if (recorded_now_in_us_
!= 0) {
501 return recorded_now_in_us_
;
503 return this->NowInUsec();
506 std::string
EpollServer::EventMaskToString(int event_mask
) {
508 if (event_mask
& EPOLLIN
) s
+= "EPOLLIN ";
509 if (event_mask
& EPOLLPRI
) s
+= "EPOLLPRI ";
510 if (event_mask
& EPOLLOUT
) s
+= "EPOLLOUT ";
511 if (event_mask
& EPOLLRDNORM
) s
+= "EPOLLRDNORM ";
512 if (event_mask
& EPOLLRDBAND
) s
+= "EPOLLRDBAND ";
513 if (event_mask
& EPOLLWRNORM
) s
+= "EPOLLWRNORM ";
514 if (event_mask
& EPOLLWRBAND
) s
+= "EPOLLWRBAND ";
515 if (event_mask
& EPOLLMSG
) s
+= "EPOLLMSG ";
516 if (event_mask
& EPOLLERR
) s
+= "EPOLLERR ";
517 if (event_mask
& EPOLLHUP
) s
+= "EPOLLHUP ";
518 if (event_mask
& EPOLLONESHOT
) s
+= "EPOLLONESHOT ";
519 if (event_mask
& EPOLLET
) s
+= "EPOLLET ";
523 void EpollServer::LogStateOnCrash() {
524 LOG(ERROR
) << "----------------------Epoll Server---------------------------";
525 LOG(ERROR
) << "Epoll server " << this << " polling on fd " << epoll_fd_
;
526 LOG(ERROR
) << "timeout_in_us_: " << timeout_in_us_
;
528 // Log sessions with alarms.
529 LOG(ERROR
) << alarm_map_
.size() << " alarms registered.";
530 for (TimeToAlarmCBMap::iterator it
= alarm_map_
.begin();
531 it
!= alarm_map_
.end();
534 alarms_reregistered_and_should_be_skipped_
.find(it
->second
)
535 != alarms_reregistered_and_should_be_skipped_
.end();
536 LOG(ERROR
) << "Alarm " << it
->second
<< " registered at time " << it
->first
537 << " and should be skipped = " << skipped
;
540 LOG(ERROR
) << cb_map_
.size() << " fd callbacks registered.";
541 for (FDToCBMap::iterator it
= cb_map_
.begin();
544 LOG(ERROR
) << "fd: " << it
->fd
<< " with mask " << it
->event_mask
545 << " registered with cb: " << it
->cb
;
547 LOG(ERROR
) << "----------------------/Epoll Server--------------------------";
552 ////////////////////////////////////////////////////////////////////////////////
553 ////////////////////////////////////////////////////////////////////////////////
555 void EpollServer::DelFD(int fd
) const {
556 struct epoll_event ee
;
557 memset(&ee
, 0, sizeof(ee
));
558 #ifdef EPOLL_SERVER_EVENT_TRACING
559 event_recorder_
.RecordFDMaskEvent(fd
, 0, "DelFD");
561 if (epoll_ctl(epoll_fd_
, EPOLL_CTL_DEL
, fd
, &ee
)) {
562 int saved_errno
= errno
;
563 char buf
[kErrorBufferSize
];
564 LOG(FATAL
) << "Epoll set removal error for fd " << fd
<< ": "
565 << strerror_r(saved_errno
, buf
, sizeof(buf
));
569 ////////////////////////////////////////
571 void EpollServer::AddFD(int fd
, int event_mask
) const {
572 struct epoll_event ee
;
573 memset(&ee
, 0, sizeof(ee
));
574 ee
.events
= event_mask
| EPOLLERR
| EPOLLHUP
;
576 #ifdef EPOLL_SERVER_EVENT_TRACING
577 event_recorder_
.RecordFDMaskEvent(fd
, ee
.events
, "AddFD");
579 if (epoll_ctl(epoll_fd_
, EPOLL_CTL_ADD
, fd
, &ee
)) {
580 int saved_errno
= errno
;
581 char buf
[kErrorBufferSize
];
582 LOG(FATAL
) << "Epoll set insertion error for fd " << fd
<< ": "
583 << strerror_r(saved_errno
, buf
, sizeof(buf
));
587 ////////////////////////////////////////
589 void EpollServer::ModFD(int fd
, int event_mask
) const {
590 struct epoll_event ee
;
591 memset(&ee
, 0, sizeof(ee
));
592 ee
.events
= event_mask
| EPOLLERR
| EPOLLHUP
;
594 #ifdef EPOLL_SERVER_EVENT_TRACING
595 event_recorder_
.RecordFDMaskEvent(fd
, ee
.events
, "ModFD");
597 VLOG(3) << "modifying fd= " << fd
<< " "
598 << EventMaskToString(ee
.events
);
599 if (epoll_ctl(epoll_fd_
, EPOLL_CTL_MOD
, fd
, &ee
)) {
600 int saved_errno
= errno
;
601 char buf
[kErrorBufferSize
];
602 LOG(FATAL
) << "Epoll set modification error for fd " << fd
<< ": "
603 << strerror_r(saved_errno
, buf
, sizeof(buf
));
607 ////////////////////////////////////////
609 void EpollServer::ModifyFD(int fd
, int remove_event
, int add_event
) {
610 FDToCBMap::iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
611 if (cb_map_
.end() == fd_i
) {
612 VLOG(2) << "Didn't find the fd " << fd
<< "in internal structures";
616 if (fd_i
->cb
!= NULL
) {
617 int & event_mask
= fd_i
->event_mask
;
618 VLOG(3) << "fd= " << fd
619 << " event_mask before: " << EventMaskToString(event_mask
);
620 event_mask
&= ~remove_event
;
621 event_mask
|= add_event
;
623 VLOG(3) << " event_mask after: " << EventMaskToString(event_mask
);
625 ModFD(fd
, event_mask
);
627 fd_i
->cb
->OnModification(fd
, event_mask
);
631 void EpollServer::WaitForEventsAndCallHandleEvents(int64 timeout_in_us
,
632 struct epoll_event events
[],
634 if (timeout_in_us
== 0 || ready_list_
.lh_first
!= NULL
) {
635 // If ready list is not empty, then don't sleep at all.
637 } else if (timeout_in_us
< 0) {
638 LOG(INFO
) << "Negative epoll timeout: " << timeout_in_us
639 << "us; epoll will wait forever for events.";
640 // If timeout_in_us is < 0 we are supposed to Wait forever. This means we
641 // should set timeout_in_us to -1000 so we will
642 // Wait(-1000/1000) == Wait(-1) == Wait forever.
643 timeout_in_us
= -1000;
645 // If timeout is specified, and the ready list is empty.
646 if (timeout_in_us
< 1000) {
647 timeout_in_us
= 1000;
650 const int timeout_in_ms
= timeout_in_us
/ 1000;
651 int nfds
= epoll_wait_impl(epoll_fd_
,
655 VLOG(3) << "nfds=" << nfds
;
657 #ifdef EPOLL_SERVER_EVENT_TRACING
658 event_recorder_
.RecordEpollWaitEvent(timeout_in_ms
, nfds
);
661 // If you're wondering why the NowInUsec() is recorded here, the answer is
662 // simple: If we did it before the epoll_wait_impl, then the max error for
663 // the ApproximateNowInUs() call would be as large as the maximum length of
664 // epoll_wait, which can be arbitrarily long. Since this would make
665 // ApproximateNowInUs() worthless, we instead record the time -after- we've
666 // done epoll_wait, which guarantees that the maximum error is the amount of
667 // time it takes to process all the events generated by epoll_wait.
668 recorded_now_in_us_
= NowInUsec();
670 for (int i
= 0; i
< nfds
; ++i
) {
671 int event_mask
= events
[i
].events
;
672 int fd
= events
[i
].data
.fd
;
673 HandleEvent(fd
, event_mask
);
675 } else if (nfds
< 0) {
676 // Catch interrupted syscall and just ignore it and move on.
677 if (errno
!= EINTR
&& errno
!= 0) {
678 int saved_errno
= errno
;
679 char buf
[kErrorBufferSize
];
680 LOG(FATAL
) << "Error " << saved_errno
<< " in epoll_wait: "
681 << strerror_r(saved_errno
, buf
, sizeof(buf
));
685 // Now run through the ready list.
686 if (ready_list_
.lh_first
) {
687 CallReadyListCallbacks();
691 void EpollServer::CallReadyListCallbacks() {
692 // Check pre-conditions.
693 DCHECK(tmp_list_
.lh_first
== NULL
);
694 // Swap out the ready_list_ into the tmp_list_ before traversing the list to
695 // enable SetFDReady() to just push new items into the ready_list_.
696 std::swap(ready_list_
.lh_first
, tmp_list_
.lh_first
);
697 if (tmp_list_
.lh_first
) {
698 tmp_list_
.lh_first
->entry
.le_prev
= &tmp_list_
.lh_first
;
699 EpollEvent
event(0, false);
700 while (tmp_list_
.lh_first
!= NULL
) {
701 DCHECK_GT(ready_list_size_
, 0);
702 CBAndEventMask
* cb_and_mask
= tmp_list_
.lh_first
;
703 RemoveFromReadyList(*cb_and_mask
);
705 event
.out_ready_mask
= 0;
707 cb_and_mask
->events_asserted
| cb_and_mask
->events_to_fake
;
708 // TODO(fenix): get rid of the two separate fields in cb_and_mask.
709 cb_and_mask
->events_asserted
= 0;
710 cb_and_mask
->events_to_fake
= 0;
712 // OnEvent() may call UnRegister, so we set in_use, here. Any
713 // UnRegister call will now simply set the cb to NULL instead of
714 // invalidating the cb_and_mask object (by deleting the object in the
715 // map to which cb_and_mask refers)
716 TrueFalseGuard
in_use_guard(&(cb_and_mask
->in_use
));
717 cb_and_mask
->cb
->OnEvent(cb_and_mask
->fd
, &event
);
720 // Since OnEvent may have called UnregisterFD, we must check here that
721 // the callback is still valid. If it isn't, then UnregisterFD *was*
722 // called, and we should now get rid of the object.
723 if (cb_and_mask
->cb
== NULL
) {
724 cb_map_
.erase(*cb_and_mask
);
725 } else if (event
.out_ready_mask
!= 0) {
726 cb_and_mask
->events_to_fake
= event
.out_ready_mask
;
727 AddToReadyList(cb_and_mask
);
731 DCHECK(tmp_list_
.lh_first
== NULL
);
734 const int EpollServer::kMinimumEffectiveAlarmQuantum
= 1000;
736 // Alarms may be up to kMinimumEffectiveAlarmQuantum -1 us late.
737 inline int64
EpollServer::DoRoundingOnNow(int64 now_in_us
) const {
738 now_in_us
/= kMinimumEffectiveAlarmQuantum
;
739 now_in_us
*= kMinimumEffectiveAlarmQuantum
;
740 now_in_us
+= (2 * kMinimumEffectiveAlarmQuantum
- 1);
744 void EpollServer::CallAndReregisterAlarmEvents() {
745 int64 now_in_us
= recorded_now_in_us_
;
746 DCHECK_NE(0, recorded_now_in_us_
);
747 now_in_us
= DoRoundingOnNow(now_in_us
);
749 TimeToAlarmCBMap::iterator erase_it
;
752 for (TimeToAlarmCBMap::iterator i
= alarm_map_
.begin();
753 i
!= alarm_map_
.end();
755 if (i
->first
> now_in_us
) {
758 AlarmCB
* cb
= i
->second
;
759 // Execute the OnAlarm() only if we did not register
760 // it in this loop itself.
761 const bool added_in_this_round
=
762 alarms_reregistered_and_should_be_skipped_
.find(cb
)
763 != alarms_reregistered_and_should_be_skipped_
.end();
764 if (added_in_this_round
) {
768 all_alarms_
.erase(cb
);
769 const int64 new_timeout_time_in_us
= cb
->OnAlarm();
773 alarm_map_
.erase(erase_it
);
775 if (new_timeout_time_in_us
> 0) {
776 // We add to hash_set only if the new timeout is <= now_in_us.
777 // if timeout is > now_in_us then we have no fear that this alarm
778 // can be reexecuted in this loop, and hence we do not need to
779 // worry about a recursive loop.
780 DVLOG(3) << "Reregistering alarm "
782 << " " << new_timeout_time_in_us
784 if (new_timeout_time_in_us
<= now_in_us
) {
785 alarms_reregistered_and_should_be_skipped_
.insert(cb
);
787 RegisterAlarm(new_timeout_time_in_us
, cb
);
790 alarms_reregistered_and_should_be_skipped_
.clear();
793 EpollAlarm::EpollAlarm() : eps_(NULL
), registered_(false) {
796 EpollAlarm::~EpollAlarm() {
797 UnregisterIfRegistered();
800 int64
EpollAlarm::OnAlarm() {
805 void EpollAlarm::OnRegistration(const EpollServer::AlarmRegToken
& token
,
807 DCHECK_EQ(false, registered_
);
814 void EpollAlarm::OnUnregistration() {
818 void EpollAlarm::OnShutdown(EpollServer
* eps
) {
823 // If the alarm was registered, unregister it.
824 void EpollAlarm::UnregisterIfRegistered() {
828 eps_
->UnregisterAlarm(token_
);