1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/tools/flip_server/epoll_server.h"
7 #include <stdlib.h> // for abort
8 #include <errno.h> // for errno and strerror_r
11 #include <unistd.h> // For read, pipe, close and write.
15 #include "base/logging.h"
16 #include "base/timer/timer.h"
18 // Design notes: An efficient implementation of ready list has the following
19 // desirable properties:
21 // A. O(1) insertion into/removal from the list in any location.
22 // B. Once the callback is found by hash lookup using the fd, the lookup of
23 // corresponding entry in the list is O(1).
24 // C. Safe insertion into/removal from the list during list iteration. (The
25 // ready list's purpose is to enable completely event driven I/O model.
26 // Thus, all the interesting bits happen in the callback. It is critical
27 // to not place any restriction on the API during list iteration.
29 // The current implementation achieves these goals with the following design:
31 // - The ready list is constructed as a doubly linked list to enable O(1)
32 // insertion/removal (see man 3 queue).
33 // - The forward and backward links are directly embedded inside the
34 // CBAndEventMask struct. This enables O(1) lookup in the list for a given
35 // callback. (Techincally, we could've used std::list of hash_set::iterator,
36 // and keep a list::iterator in CBAndEventMask to achieve the same effect.
37 // However, iterators have two problems: no way to portably invalidate them,
38 // and no way to tell whether an iterator is singular or not. The only way to
39 // overcome these issues is to keep bools in both places, but that throws off
40 // memory alignment (up to 7 wasted bytes for each bool). The extra level of
41 // indirection will also likely be less cache friendly. Direct manipulation
42 // of link pointers makes it easier to retrieve the CBAndEventMask from the
43 // list, easier to check whether an CBAndEventMask is in the list, uses less
44 // memory (save 32 bytes/fd), and does not affect cache usage (we need to
45 // read in the struct to use the callback anyway).)
46 // - Embed the fd directly into CBAndEventMask and switch to using hash_set.
47 // This removes the need to store hash_map::iterator in the list just so that
48 // we can get both the fd and the callback.
49 // - The ready list is "one shot": each entry is removed before OnEvent is
50 // called. This removes the mutation-while-iterating problem.
51 // - Use two lists to keep track of callbacks. The ready_list_ is the one used
52 // for registration. Before iteration, the ready_list_ is swapped into the
53 // tmp_list_. Once iteration is done, tmp_list_ will be empty, and
54 // ready_list_ will have all the new ready fds.
56 // The size we use for buffers passed to strerror_r
57 static const int kErrorBufferSize
= 256;
61 // Clears the pipe and returns. Used for waking the epoll server up.
62 class ReadPipeCallback
: public EpollCallbackInterface
{
64 virtual void OnEvent(int fd
, EpollEvent
* event
) OVERRIDE
{
65 DCHECK(event
->in_events
== EPOLLIN
);
68 // Read until the pipe is empty.
69 while (data_read
> 0) {
70 data_read
= read(fd
, &data
, sizeof(data
));
73 virtual void OnShutdown(EpollServer
*eps
, int fd
) OVERRIDE
{}
74 virtual void OnRegistration(EpollServer
*, int, int) OVERRIDE
{}
75 virtual void OnModification(int, int) OVERRIDE
{} // COV_NF_LINE
76 virtual void OnUnregistration(int, bool) OVERRIDE
{} // COV_NF_LINE
79 ////////////////////////////////////////////////////////////////////////////////
80 ////////////////////////////////////////////////////////////////////////////////
82 EpollServer::EpollServer()
83 : epoll_fd_(epoll_create(1024)),
85 recorded_now_in_us_(0),
87 wake_cb_(new ReadPipeCallback
),
90 in_wait_for_events_and_execute_callbacks_(false),
92 // ensure that the epoll_fd_ is valid.
93 CHECK_NE(epoll_fd_
, -1);
94 LIST_INIT(&ready_list_
);
95 LIST_INIT(&tmp_list_
);
98 if (pipe(pipe_fds
) < 0) {
99 // Unfortunately, it is impossible to test any such initialization in
100 // a constructor (as virtual methods do not yet work).
101 // This -could- be solved by moving initialization to an outside
103 int saved_errno
= errno
;
104 char buf
[kErrorBufferSize
];
105 LOG(FATAL
) << "Error " << saved_errno
106 << " in pipe(): " << strerror_r(saved_errno
, buf
, sizeof(buf
));
108 read_fd_
= pipe_fds
[0];
109 write_fd_
= pipe_fds
[1];
110 RegisterFD(read_fd_
, wake_cb_
.get(), EPOLLIN
);
113 void EpollServer::CleanupFDToCBMap() {
114 FDToCBMap::iterator cb_iter
= cb_map_
.begin();
115 while (cb_iter
!= cb_map_
.end()) {
116 int fd
= cb_iter
->fd
;
117 CB
* cb
= cb_iter
->cb
;
119 cb_iter
->in_use
= true;
121 cb
->OnShutdown(this, fd
);
124 cb_map_
.erase(cb_iter
);
125 cb_iter
= cb_map_
.begin();
129 void EpollServer::CleanupTimeToAlarmCBMap() {
130 TimeToAlarmCBMap::iterator erase_it
;
132 // Call OnShutdown() on alarms. Note that the structure of the loop
133 // is similar to the structure of loop in the function HandleAlarms()
134 for (TimeToAlarmCBMap::iterator i
= alarm_map_
.begin();
135 i
!= alarm_map_
.end();
137 // Note that OnShutdown() can call UnregisterAlarm() on
138 // other iterators. OnShutdown() should not call UnregisterAlarm()
139 // on self because by definition the iterator is not valid any more.
140 i
->second
->OnShutdown(this);
143 alarm_map_
.erase(erase_it
);
147 EpollServer::~EpollServer() {
148 DCHECK_EQ(in_shutdown_
, false);
150 #ifdef EPOLL_SERVER_EVENT_TRACING
151 LOG(INFO
) << "\n" << event_recorder_
;
153 VLOG(2) << "Shutting down epoll server ";
156 LIST_INIT(&ready_list_
);
157 LIST_INIT(&tmp_list_
);
159 CleanupTimeToAlarmCBMap();
166 // Whether a CBAandEventMask is on the ready list is determined by a non-NULL
167 // le_prev pointer (le_next being NULL indicates end of list).
168 inline void EpollServer::AddToReadyList(CBAndEventMask
* cb_and_mask
) {
169 if (cb_and_mask
->entry
.le_prev
== NULL
) {
170 LIST_INSERT_HEAD(&ready_list_
, cb_and_mask
, entry
);
175 inline void EpollServer::RemoveFromReadyList(
176 const CBAndEventMask
& cb_and_mask
) {
177 if (cb_and_mask
.entry
.le_prev
!= NULL
) {
178 LIST_REMOVE(&cb_and_mask
, entry
);
179 // Clean up all the ready list states. Don't bother with the other fields
180 // as they are initialized when the CBAandEventMask is added to the ready
181 // list. This saves a few cycles in the inner loop.
182 cb_and_mask
.entry
.le_prev
= NULL
;
184 if (ready_list_size_
== 0) {
185 DCHECK(ready_list_
.lh_first
== NULL
);
186 DCHECK(tmp_list_
.lh_first
== NULL
);
191 void EpollServer::RegisterFD(int fd
, CB
* cb
, int event_mask
) {
193 VLOG(3) << "RegisterFD fd=" << fd
<< " event_mask=" << event_mask
;
194 FDToCBMap::iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
195 if (cb_map_
.end() != fd_i
) {
196 // do we just abort, or do we just unregister the other guy?
197 // for now, lets just unregister the other guy.
199 // unregister any callback that may already be registered for this FD.
200 CB
* other_cb
= fd_i
->cb
;
202 // Must remove from the ready list before erasing.
203 RemoveFromReadyList(*fd_i
);
204 other_cb
->OnUnregistration(fd
, true);
205 ModFD(fd
, event_mask
);
207 // already unregistered, so just recycle the node.
208 AddFD(fd
, event_mask
);
211 fd_i
->event_mask
= event_mask
;
212 fd_i
->events_to_fake
= 0;
214 AddFD(fd
, event_mask
);
215 cb_map_
.insert(CBAndEventMask(cb
, event_mask
, fd
));
219 // set the FD to be non-blocking.
222 cb
->OnRegistration(this, fd
, event_mask
);
225 int EpollServer::GetFlags(int fd
) {
226 return fcntl(fd
, F_GETFL
, 0);
229 void EpollServer::SetNonblocking(int fd
) {
230 int flags
= GetFlags(fd
);
232 int saved_errno
= errno
;
233 char buf
[kErrorBufferSize
];
234 LOG(FATAL
) << "Error " << saved_errno
235 << " doing fcntl(" << fd
<< ", F_GETFL, 0): "
236 << strerror_r(saved_errno
, buf
, sizeof(buf
));
238 if (!(flags
& O_NONBLOCK
)) {
239 int saved_flags
= flags
;
240 flags
= SetFlags(fd
, flags
| O_NONBLOCK
);
243 int saved_errno
= errno
;
244 char buf
[kErrorBufferSize
];
245 LOG(FATAL
) << "Error " << saved_errno
246 << " doing fcntl(" << fd
<< ", F_SETFL, " << saved_flags
<< "): "
247 << strerror_r(saved_errno
, buf
, sizeof(buf
));
252 int EpollServer::epoll_wait_impl(int epfd
,
253 struct epoll_event
* events
,
256 return epoll_wait(epfd
, events
, max_events
, timeout_in_ms
);
259 void EpollServer::RegisterFDForWrite(int fd
, CB
* cb
) {
260 RegisterFD(fd
, cb
, EPOLLOUT
);
263 void EpollServer::RegisterFDForReadWrite(int fd
, CB
* cb
) {
264 RegisterFD(fd
, cb
, EPOLLIN
| EPOLLOUT
);
267 void EpollServer::RegisterFDForRead(int fd
, CB
* cb
) {
268 RegisterFD(fd
, cb
, EPOLLIN
);
271 void EpollServer::UnregisterFD(int fd
) {
272 FDToCBMap::iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
273 if (cb_map_
.end() == fd_i
|| fd_i
->cb
== NULL
) {
274 // Doesn't exist in server, or has gone through UnregisterFD once and still
275 // inside the callchain of OnEvent.
278 #ifdef EPOLL_SERVER_EVENT_TRACING
279 event_recorder_
.RecordUnregistration(fd
);
282 // Since the links are embedded within the struct, we must remove it from the
283 // list before erasing it from the hash_set.
284 RemoveFromReadyList(*fd_i
);
286 cb
->OnUnregistration(fd
, false);
287 // fd_i->cb is NULL if that fd is unregistered inside the callchain of
288 // OnEvent. Since the EpollServer needs a valid CBAndEventMask after OnEvent
289 // returns in order to add it to the ready list, we cannot have UnregisterFD
290 // erase the entry if it is in use. Thus, a NULL fd_i->cb is used as a
291 // condition that tells the EpollServer that this entry is unused at a later
296 // Remove all trace of the registration, and just keep the node alive long
297 // enough so the code that calls OnEvent doesn't have to worry about
298 // figuring out whether the CBAndEventMask is valid or not.
300 fd_i
->event_mask
= 0;
301 fd_i
->events_to_fake
= 0;
305 void EpollServer::ModifyCallback(int fd
, int event_mask
) {
306 ModifyFD(fd
, ~0, event_mask
);
309 void EpollServer::StopRead(int fd
) {
310 ModifyFD(fd
, EPOLLIN
, 0);
313 void EpollServer::StartRead(int fd
) {
314 ModifyFD(fd
, 0, EPOLLIN
);
317 void EpollServer::StopWrite(int fd
) {
318 ModifyFD(fd
, EPOLLOUT
, 0);
321 void EpollServer::StartWrite(int fd
) {
322 ModifyFD(fd
, 0, EPOLLOUT
);
325 void EpollServer::HandleEvent(int fd
, int event_mask
) {
326 #ifdef EPOLL_SERVER_EVENT_TRACING
327 event_recorder_
.RecordEpollEvent(fd
, event_mask
);
329 FDToCBMap::iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
330 if (fd_i
== cb_map_
.end() || fd_i
->cb
== NULL
) {
332 // This could occur if epoll() returns a set of events, and
333 // while processing event A (earlier) we removed the callback
334 // for event B (and are now processing event B).
337 fd_i
->events_asserted
= event_mask
;
338 CBAndEventMask
* cb_and_mask
= const_cast<CBAndEventMask
*>(&*fd_i
);
339 AddToReadyList(cb_and_mask
);
342 class TrueFalseGuard
{
344 explicit TrueFalseGuard(bool* guarded_bool
) : guarded_bool_(guarded_bool
) {
345 DCHECK(guarded_bool_
!= NULL
);
346 DCHECK(*guarded_bool_
== false);
347 *guarded_bool_
= true;
350 *guarded_bool_
= false;
356 void EpollServer::WaitForEventsAndExecuteCallbacks() {
357 if (in_wait_for_events_and_execute_callbacks_
) {
359 "Attempting to call WaitForEventsAndExecuteCallbacks"
360 " when an ancestor to the current function is already"
361 " WaitForEventsAndExecuteCallbacks!";
362 // The line below is actually tested, but in coverage mode,
364 return; // COV_NF_LINE
366 TrueFalseGuard
recursion_guard(&in_wait_for_events_and_execute_callbacks_
);
367 if (alarm_map_
.empty()) {
368 // no alarms, this is business as usual.
369 WaitForEventsAndCallHandleEvents(timeout_in_us_
,
372 recorded_now_in_us_
= 0;
376 // store the 'now'. If we recomputed 'now' every iteration
377 // down below, then we might never exit that loop-- any
378 // long-running alarms might install other long-running
379 // alarms, etc. By storing it here now, we ensure that
380 // a more reasonable amount of work is done here.
381 int64 now_in_us
= NowInUsec();
383 // Get the first timeout from the alarm_map where it is
384 // stored in absolute time.
385 int64 next_alarm_time_in_us
= alarm_map_
.begin()->first
;
386 VLOG(4) << "next_alarm_time = " << next_alarm_time_in_us
387 << " now = " << now_in_us
388 << " timeout_in_us = " << timeout_in_us_
;
390 int64 wait_time_in_us
;
391 int64 alarm_timeout_in_us
= next_alarm_time_in_us
- now_in_us
;
393 // If the next alarm is sooner than the default timeout, or if there is no
394 // timeout (timeout_in_us_ == -1), wake up when the alarm should fire.
395 // Otherwise use the default timeout.
396 if (alarm_timeout_in_us
< timeout_in_us_
|| timeout_in_us_
< 0) {
397 wait_time_in_us
= std::max(alarm_timeout_in_us
, static_cast<int64
>(0));
399 wait_time_in_us
= timeout_in_us_
;
402 VLOG(4) << "wait_time_in_us = " << wait_time_in_us
;
406 WaitForEventsAndCallHandleEvents(wait_time_in_us
,
409 CallAndReregisterAlarmEvents();
410 recorded_now_in_us_
= 0;
413 void EpollServer::SetFDReady(int fd
, int events_to_fake
) {
414 FDToCBMap::iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
415 if (cb_map_
.end() != fd_i
&& fd_i
->cb
!= NULL
) {
416 // This const_cast is necessary for LIST_HEAD_INSERT to work. Declaring
417 // entry mutable is insufficient because LIST_HEAD_INSERT assigns the
418 // forward pointer of the list head to the current cb_and_mask, and the
419 // compiler complains that it can't assign a const T* to a T*.
420 CBAndEventMask
* cb_and_mask
= const_cast<CBAndEventMask
*>(&*fd_i
);
421 // Note that there is no clearly correct behavior here when
422 // cb_and_mask->events_to_fake != 0 and this function is called.
423 // Of the two operations:
424 // cb_and_mask->events_to_fake = events_to_fake
425 // cb_and_mask->events_to_fake |= events_to_fake
426 // the first was picked because it discourages users from calling
427 // SetFDReady repeatedly to build up the correct event set as it is more
428 // efficient to call SetFDReady once with the correct, final mask.
429 cb_and_mask
->events_to_fake
= events_to_fake
;
430 AddToReadyList(cb_and_mask
);
434 void EpollServer::SetFDNotReady(int fd
) {
435 FDToCBMap::iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
436 if (cb_map_
.end() != fd_i
) {
437 RemoveFromReadyList(*fd_i
);
441 bool EpollServer::IsFDReady(int fd
) const {
442 FDToCBMap::const_iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
443 return (cb_map_
.end() != fd_i
&&
445 fd_i
->entry
.le_prev
!= NULL
);
448 void EpollServer::VerifyReadyList() const {
450 CBAndEventMask
* cur
= ready_list_
.lh_first
;
451 for (; cur
; cur
= cur
->entry
.le_next
) {
454 for (cur
= tmp_list_
.lh_first
; cur
; cur
= cur
->entry
.le_next
) {
457 CHECK_EQ(ready_list_size_
, count
) << "Ready list size does not match count";
460 void EpollServer::RegisterAlarm(int64 timeout_time_in_us
, AlarmCB
* ac
) {
462 if (ContainsAlarm(ac
)) {
463 LOG(FATAL
) << "Alarm already exists " << ac
;
465 VLOG(4) << "RegisteringAlarm at : " << timeout_time_in_us
;
467 TimeToAlarmCBMap::iterator alarm_iter
=
468 alarm_map_
.insert(std::make_pair(timeout_time_in_us
, ac
));
470 all_alarms_
.insert(ac
);
471 // Pass the iterator to the EpollAlarmCallbackInterface.
472 ac
->OnRegistration(alarm_iter
, this);
475 // Unregister a specific alarm callback: iterator_token must be a
476 // valid iterator. The caller must ensure the validity of the iterator.
477 void EpollServer::UnregisterAlarm(const AlarmRegToken
& iterator_token
) {
478 AlarmCB
* cb
= iterator_token
->second
;
479 alarm_map_
.erase(iterator_token
);
480 all_alarms_
.erase(cb
);
481 cb
->OnUnregistration();
484 int EpollServer::NumFDsRegistered() const {
485 DCHECK(cb_map_
.size() >= 1);
486 // Omit the internal FD (read_fd_)
487 return cb_map_
.size() - 1;
490 void EpollServer::Wake() {
491 char data
= 'd'; // 'd' is for data. It's good enough for me.
492 int rv
= write(write_fd_
, &data
, 1);
496 int64
EpollServer::NowInUsec() const {
497 return base::Time::Now().ToInternalValue();
500 int64
EpollServer::ApproximateNowInUsec() const {
501 if (recorded_now_in_us_
!= 0) {
502 return recorded_now_in_us_
;
504 return this->NowInUsec();
507 std::string
EpollServer::EventMaskToString(int event_mask
) {
509 if (event_mask
& EPOLLIN
) s
+= "EPOLLIN ";
510 if (event_mask
& EPOLLPRI
) s
+= "EPOLLPRI ";
511 if (event_mask
& EPOLLOUT
) s
+= "EPOLLOUT ";
512 if (event_mask
& EPOLLRDNORM
) s
+= "EPOLLRDNORM ";
513 if (event_mask
& EPOLLRDBAND
) s
+= "EPOLLRDBAND ";
514 if (event_mask
& EPOLLWRNORM
) s
+= "EPOLLWRNORM ";
515 if (event_mask
& EPOLLWRBAND
) s
+= "EPOLLWRBAND ";
516 if (event_mask
& EPOLLMSG
) s
+= "EPOLLMSG ";
517 if (event_mask
& EPOLLERR
) s
+= "EPOLLERR ";
518 if (event_mask
& EPOLLHUP
) s
+= "EPOLLHUP ";
519 if (event_mask
& EPOLLONESHOT
) s
+= "EPOLLONESHOT ";
520 if (event_mask
& EPOLLET
) s
+= "EPOLLET ";
524 void EpollServer::LogStateOnCrash() {
525 LOG(ERROR
) << "----------------------Epoll Server---------------------------";
526 LOG(ERROR
) << "Epoll server " << this << " polling on fd " << epoll_fd_
;
527 LOG(ERROR
) << "timeout_in_us_: " << timeout_in_us_
;
529 // Log sessions with alarms.
530 LOG(ERROR
) << alarm_map_
.size() << " alarms registered.";
531 for (TimeToAlarmCBMap::iterator it
= alarm_map_
.begin();
532 it
!= alarm_map_
.end();
535 alarms_reregistered_and_should_be_skipped_
.find(it
->second
)
536 != alarms_reregistered_and_should_be_skipped_
.end();
537 LOG(ERROR
) << "Alarm " << it
->second
<< " registered at time " << it
->first
538 << " and should be skipped = " << skipped
;
541 LOG(ERROR
) << cb_map_
.size() << " fd callbacks registered.";
542 for (FDToCBMap::iterator it
= cb_map_
.begin();
545 LOG(ERROR
) << "fd: " << it
->fd
<< " with mask " << it
->event_mask
546 << " registered with cb: " << it
->cb
;
548 LOG(ERROR
) << "----------------------/Epoll Server--------------------------";
553 ////////////////////////////////////////////////////////////////////////////////
554 ////////////////////////////////////////////////////////////////////////////////
556 void EpollServer::DelFD(int fd
) const {
557 struct epoll_event ee
;
558 memset(&ee
, 0, sizeof(ee
));
559 #ifdef EPOLL_SERVER_EVENT_TRACING
560 event_recorder_
.RecordFDMaskEvent(fd
, 0, "DelFD");
562 if (epoll_ctl(epoll_fd_
, EPOLL_CTL_DEL
, fd
, &ee
)) {
563 int saved_errno
= errno
;
564 char buf
[kErrorBufferSize
];
565 LOG(FATAL
) << "Epoll set removal error for fd " << fd
<< ": "
566 << strerror_r(saved_errno
, buf
, sizeof(buf
));
570 ////////////////////////////////////////
572 void EpollServer::AddFD(int fd
, int event_mask
) const {
573 struct epoll_event ee
;
574 memset(&ee
, 0, sizeof(ee
));
575 ee
.events
= event_mask
| EPOLLERR
| EPOLLHUP
;
577 #ifdef EPOLL_SERVER_EVENT_TRACING
578 event_recorder_
.RecordFDMaskEvent(fd
, ee
.events
, "AddFD");
580 if (epoll_ctl(epoll_fd_
, EPOLL_CTL_ADD
, fd
, &ee
)) {
581 int saved_errno
= errno
;
582 char buf
[kErrorBufferSize
];
583 LOG(FATAL
) << "Epoll set insertion error for fd " << fd
<< ": "
584 << strerror_r(saved_errno
, buf
, sizeof(buf
));
588 ////////////////////////////////////////
590 void EpollServer::ModFD(int fd
, int event_mask
) const {
591 struct epoll_event ee
;
592 memset(&ee
, 0, sizeof(ee
));
593 ee
.events
= event_mask
| EPOLLERR
| EPOLLHUP
;
595 #ifdef EPOLL_SERVER_EVENT_TRACING
596 event_recorder_
.RecordFDMaskEvent(fd
, ee
.events
, "ModFD");
598 VLOG(3) << "modifying fd= " << fd
<< " "
599 << EventMaskToString(ee
.events
);
600 if (epoll_ctl(epoll_fd_
, EPOLL_CTL_MOD
, fd
, &ee
)) {
601 int saved_errno
= errno
;
602 char buf
[kErrorBufferSize
];
603 LOG(FATAL
) << "Epoll set modification error for fd " << fd
<< ": "
604 << strerror_r(saved_errno
, buf
, sizeof(buf
));
608 ////////////////////////////////////////
610 void EpollServer::ModifyFD(int fd
, int remove_event
, int add_event
) {
611 FDToCBMap::iterator fd_i
= cb_map_
.find(CBAndEventMask(NULL
, 0, fd
));
612 if (cb_map_
.end() == fd_i
) {
613 VLOG(2) << "Didn't find the fd " << fd
<< "in internal structures";
617 if (fd_i
->cb
!= NULL
) {
618 int & event_mask
= fd_i
->event_mask
;
619 VLOG(3) << "fd= " << fd
620 << " event_mask before: " << EventMaskToString(event_mask
);
621 event_mask
&= ~remove_event
;
622 event_mask
|= add_event
;
624 VLOG(3) << " event_mask after: " << EventMaskToString(event_mask
);
626 ModFD(fd
, event_mask
);
628 fd_i
->cb
->OnModification(fd
, event_mask
);
632 void EpollServer::WaitForEventsAndCallHandleEvents(int64 timeout_in_us
,
633 struct epoll_event events
[],
635 if (timeout_in_us
== 0 || ready_list_
.lh_first
!= NULL
) {
636 // If ready list is not empty, then don't sleep at all.
638 } else if (timeout_in_us
< 0) {
639 LOG(INFO
) << "Negative epoll timeout: " << timeout_in_us
640 << "us; epoll will wait forever for events.";
641 // If timeout_in_us is < 0 we are supposed to Wait forever. This means we
642 // should set timeout_in_us to -1000 so we will
643 // Wait(-1000/1000) == Wait(-1) == Wait forever.
644 timeout_in_us
= -1000;
646 // If timeout is specified, and the ready list is empty.
647 if (timeout_in_us
< 1000) {
648 timeout_in_us
= 1000;
651 const int timeout_in_ms
= timeout_in_us
/ 1000;
652 int nfds
= epoll_wait_impl(epoll_fd_
,
656 VLOG(3) << "nfds=" << nfds
;
658 #ifdef EPOLL_SERVER_EVENT_TRACING
659 event_recorder_
.RecordEpollWaitEvent(timeout_in_ms
, nfds
);
662 // If you're wondering why the NowInUsec() is recorded here, the answer is
663 // simple: If we did it before the epoll_wait_impl, then the max error for
664 // the ApproximateNowInUs() call would be as large as the maximum length of
665 // epoll_wait, which can be arbitrarily long. Since this would make
666 // ApproximateNowInUs() worthless, we instead record the time -after- we've
667 // done epoll_wait, which guarantees that the maximum error is the amount of
668 // time it takes to process all the events generated by epoll_wait.
669 recorded_now_in_us_
= NowInUsec();
671 for (int i
= 0; i
< nfds
; ++i
) {
672 int event_mask
= events
[i
].events
;
673 int fd
= events
[i
].data
.fd
;
674 HandleEvent(fd
, event_mask
);
676 } else if (nfds
< 0) {
677 // Catch interrupted syscall and just ignore it and move on.
678 if (errno
!= EINTR
&& errno
!= 0) {
679 int saved_errno
= errno
;
680 char buf
[kErrorBufferSize
];
681 LOG(FATAL
) << "Error " << saved_errno
<< " in epoll_wait: "
682 << strerror_r(saved_errno
, buf
, sizeof(buf
));
686 // Now run through the ready list.
687 if (ready_list_
.lh_first
) {
688 CallReadyListCallbacks();
692 void EpollServer::CallReadyListCallbacks() {
693 // Check pre-conditions.
694 DCHECK(tmp_list_
.lh_first
== NULL
);
695 // Swap out the ready_list_ into the tmp_list_ before traversing the list to
696 // enable SetFDReady() to just push new items into the ready_list_.
697 std::swap(ready_list_
.lh_first
, tmp_list_
.lh_first
);
698 if (tmp_list_
.lh_first
) {
699 tmp_list_
.lh_first
->entry
.le_prev
= &tmp_list_
.lh_first
;
700 EpollEvent
event(0, false);
701 while (tmp_list_
.lh_first
!= NULL
) {
702 DCHECK_GT(ready_list_size_
, 0);
703 CBAndEventMask
* cb_and_mask
= tmp_list_
.lh_first
;
704 RemoveFromReadyList(*cb_and_mask
);
706 event
.out_ready_mask
= 0;
708 cb_and_mask
->events_asserted
| cb_and_mask
->events_to_fake
;
709 // TODO(fenix): get rid of the two separate fields in cb_and_mask.
710 cb_and_mask
->events_asserted
= 0;
711 cb_and_mask
->events_to_fake
= 0;
713 // OnEvent() may call UnRegister, so we set in_use, here. Any
714 // UnRegister call will now simply set the cb to NULL instead of
715 // invalidating the cb_and_mask object (by deleting the object in the
716 // map to which cb_and_mask refers)
717 TrueFalseGuard
in_use_guard(&(cb_and_mask
->in_use
));
718 cb_and_mask
->cb
->OnEvent(cb_and_mask
->fd
, &event
);
721 // Since OnEvent may have called UnregisterFD, we must check here that
722 // the callback is still valid. If it isn't, then UnregisterFD *was*
723 // called, and we should now get rid of the object.
724 if (cb_and_mask
->cb
== NULL
) {
725 cb_map_
.erase(*cb_and_mask
);
726 } else if (event
.out_ready_mask
!= 0) {
727 cb_and_mask
->events_to_fake
= event
.out_ready_mask
;
728 AddToReadyList(cb_and_mask
);
732 DCHECK(tmp_list_
.lh_first
== NULL
);
735 void EpollServer::CallAndReregisterAlarmEvents() {
736 int64 now_in_us
= recorded_now_in_us_
;
737 DCHECK_NE(0, recorded_now_in_us_
);
739 TimeToAlarmCBMap::iterator erase_it
;
742 for (TimeToAlarmCBMap::iterator i
= alarm_map_
.begin();
743 i
!= alarm_map_
.end();
745 if (i
->first
> now_in_us
) {
748 AlarmCB
* cb
= i
->second
;
749 // Execute the OnAlarm() only if we did not register
750 // it in this loop itself.
751 const bool added_in_this_round
=
752 alarms_reregistered_and_should_be_skipped_
.find(cb
)
753 != alarms_reregistered_and_should_be_skipped_
.end();
754 if (added_in_this_round
) {
758 all_alarms_
.erase(cb
);
759 const int64 new_timeout_time_in_us
= cb
->OnAlarm();
763 alarm_map_
.erase(erase_it
);
765 if (new_timeout_time_in_us
> 0) {
766 // We add to hash_set only if the new timeout is <= now_in_us.
767 // if timeout is > now_in_us then we have no fear that this alarm
768 // can be reexecuted in this loop, and hence we do not need to
769 // worry about a recursive loop.
770 DVLOG(3) << "Reregistering alarm "
772 << " " << new_timeout_time_in_us
774 if (new_timeout_time_in_us
<= now_in_us
) {
775 alarms_reregistered_and_should_be_skipped_
.insert(cb
);
777 RegisterAlarm(new_timeout_time_in_us
, cb
);
780 alarms_reregistered_and_should_be_skipped_
.clear();
783 EpollAlarm::EpollAlarm() : eps_(NULL
), registered_(false) {
786 EpollAlarm::~EpollAlarm() {
787 UnregisterIfRegistered();
790 int64
EpollAlarm::OnAlarm() {
795 void EpollAlarm::OnRegistration(const EpollServer::AlarmRegToken
& token
,
797 DCHECK_EQ(false, registered_
);
804 void EpollAlarm::OnUnregistration() {
808 void EpollAlarm::OnShutdown(EpollServer
* eps
) {
813 // If the alarm was registered, unregister it.
814 void EpollAlarm::UnregisterIfRegistered() {
818 eps_
->UnregisterAlarm(token_
);