QUIC - enable persisting of QUICServerInfo (server config) to disk
[chromium-blink-merge.git] / net / socket / tcp_socket_libevent.cc
blob444e3c04231ce3f5279e56dad7e43d557d7bc86e
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/socket/tcp_socket.h"
7 #include <errno.h>
8 #include <netinet/tcp.h>
9 #include <sys/socket.h>
11 #include "base/bind.h"
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "base/metrics/stats_counters.h"
15 #include "base/posix/eintr_wrapper.h"
16 #include "net/base/address_list.h"
17 #include "net/base/connection_type_histograms.h"
18 #include "net/base/io_buffer.h"
19 #include "net/base/ip_endpoint.h"
20 #include "net/base/net_errors.h"
21 #include "net/base/net_util.h"
22 #include "net/base/network_change_notifier.h"
23 #include "net/socket/socket_libevent.h"
24 #include "net/socket/socket_net_log_params.h"
26 // If we don't have a definition for TCPI_OPT_SYN_DATA, create one.
27 #ifndef TCPI_OPT_SYN_DATA
28 #define TCPI_OPT_SYN_DATA 32
29 #endif
31 namespace net {
33 namespace {
35 // SetTCPNoDelay turns on/off buffering in the kernel. By default, TCP sockets
36 // will wait up to 200ms for more data to complete a packet before transmitting.
37 // After calling this function, the kernel will not wait. See TCP_NODELAY in
38 // `man 7 tcp`.
39 bool SetTCPNoDelay(int fd, bool no_delay) {
40 int on = no_delay ? 1 : 0;
41 int error = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
42 return error == 0;
45 // SetTCPKeepAlive sets SO_KEEPALIVE.
46 bool SetTCPKeepAlive(int fd, bool enable, int delay) {
47 int on = enable ? 1 : 0;
48 if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on))) {
49 PLOG(ERROR) << "Failed to set SO_KEEPALIVE on fd: " << fd;
50 return false;
53 // If we disabled TCP keep alive, our work is done here.
54 if (!enable)
55 return true;
57 #if defined(OS_LINUX) || defined(OS_ANDROID)
58 // Set seconds until first TCP keep alive.
59 if (setsockopt(fd, SOL_TCP, TCP_KEEPIDLE, &delay, sizeof(delay))) {
60 PLOG(ERROR) << "Failed to set TCP_KEEPIDLE on fd: " << fd;
61 return false;
63 // Set seconds between TCP keep alives.
64 if (setsockopt(fd, SOL_TCP, TCP_KEEPINTVL, &delay, sizeof(delay))) {
65 PLOG(ERROR) << "Failed to set TCP_KEEPINTVL on fd: " << fd;
66 return false;
68 #endif
69 return true;
72 } // namespace
74 //-----------------------------------------------------------------------------
76 TCPSocketLibevent::TCPSocketLibevent(NetLog* net_log,
77 const NetLog::Source& source)
78 : use_tcp_fastopen_(IsTCPFastOpenEnabled()),
79 tcp_fastopen_connected_(false),
80 fast_open_status_(FAST_OPEN_STATUS_UNKNOWN),
81 logging_multiple_connect_attempts_(false),
82 net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) {
83 net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE,
84 source.ToEventParametersCallback());
87 TCPSocketLibevent::~TCPSocketLibevent() {
88 net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE);
89 if (tcp_fastopen_connected_) {
90 UMA_HISTOGRAM_ENUMERATION("Net.TcpFastOpenSocketConnection",
91 fast_open_status_, FAST_OPEN_MAX_VALUE);
95 int TCPSocketLibevent::Open(AddressFamily family) {
96 DCHECK(!socket_);
97 socket_.reset(new SocketLibevent);
98 int rv = socket_->Open(ConvertAddressFamily(family));
99 if (rv != OK)
100 socket_.reset();
101 return rv;
104 int TCPSocketLibevent::AdoptConnectedSocket(int socket_fd,
105 const IPEndPoint& peer_address) {
106 DCHECK(!socket_);
108 SockaddrStorage storage;
109 if (!peer_address.ToSockAddr(storage.addr, &storage.addr_len) &&
110 // For backward compatibility, allows the empty address.
111 !(peer_address == IPEndPoint())) {
112 return ERR_ADDRESS_INVALID;
115 socket_.reset(new SocketLibevent);
116 int rv = socket_->AdoptConnectedSocket(socket_fd, storage);
117 if (rv != OK)
118 socket_.reset();
119 return rv;
122 int TCPSocketLibevent::Bind(const IPEndPoint& address) {
123 DCHECK(socket_);
125 SockaddrStorage storage;
126 if (!address.ToSockAddr(storage.addr, &storage.addr_len))
127 return ERR_ADDRESS_INVALID;
129 return socket_->Bind(storage);
132 int TCPSocketLibevent::Listen(int backlog) {
133 DCHECK(socket_);
134 return socket_->Listen(backlog);
137 int TCPSocketLibevent::Accept(scoped_ptr<TCPSocketLibevent>* tcp_socket,
138 IPEndPoint* address,
139 const CompletionCallback& callback) {
140 DCHECK(tcp_socket);
141 DCHECK(!callback.is_null());
142 DCHECK(socket_);
143 DCHECK(!accept_socket_);
145 net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT);
147 int rv = socket_->Accept(
148 &accept_socket_,
149 base::Bind(&TCPSocketLibevent::AcceptCompleted,
150 base::Unretained(this), tcp_socket, address, callback));
151 if (rv != ERR_IO_PENDING)
152 rv = HandleAcceptCompleted(tcp_socket, address, rv);
153 return rv;
156 int TCPSocketLibevent::Connect(const IPEndPoint& address,
157 const CompletionCallback& callback) {
158 DCHECK(socket_);
160 if (!logging_multiple_connect_attempts_)
161 LogConnectBegin(AddressList(address));
163 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
164 CreateNetLogIPEndPointCallback(&address));
166 SockaddrStorage storage;
167 if (!address.ToSockAddr(storage.addr, &storage.addr_len))
168 return ERR_ADDRESS_INVALID;
170 if (use_tcp_fastopen_) {
171 // With TCP FastOpen, we pretend that the socket is connected.
172 DCHECK(!tcp_fastopen_connected_);
173 socket_->SetPeerAddress(storage);
174 return OK;
177 int rv = socket_->Connect(storage,
178 base::Bind(&TCPSocketLibevent::ConnectCompleted,
179 base::Unretained(this), callback));
180 if (rv != ERR_IO_PENDING)
181 rv = HandleConnectCompleted(rv);
182 return rv;
185 bool TCPSocketLibevent::IsConnected() const {
186 if (!socket_)
187 return false;
189 if (use_tcp_fastopen_ && !tcp_fastopen_connected_ &&
190 socket_->HasPeerAddress()) {
191 // With TCP FastOpen, we pretend that the socket is connected.
192 // This allows GetPeerAddress() to return peer_address_.
193 return true;
196 return socket_->IsConnected();
199 bool TCPSocketLibevent::IsConnectedAndIdle() const {
200 // TODO(wtc): should we also handle the TCP FastOpen case here,
201 // as we do in IsConnected()?
202 return socket_ && socket_->IsConnectedAndIdle();
205 int TCPSocketLibevent::Read(IOBuffer* buf,
206 int buf_len,
207 const CompletionCallback& callback) {
208 DCHECK(socket_);
209 DCHECK(!callback.is_null());
211 int rv = socket_->Read(
212 buf, buf_len,
213 base::Bind(&TCPSocketLibevent::ReadCompleted,
214 // Grab a reference to |buf| so that ReadCompleted() can still
215 // use it when Read() completes, as otherwise, this transfers
216 // ownership of buf to socket.
217 base::Unretained(this), make_scoped_refptr(buf), callback));
218 if (rv >= 0)
219 RecordFastOpenStatus();
220 if (rv != ERR_IO_PENDING)
221 rv = HandleReadCompleted(buf, rv);
222 return rv;
225 int TCPSocketLibevent::Write(IOBuffer* buf,
226 int buf_len,
227 const CompletionCallback& callback) {
228 DCHECK(socket_);
229 DCHECK(!callback.is_null());
231 CompletionCallback write_callback =
232 base::Bind(&TCPSocketLibevent::WriteCompleted,
233 // Grab a reference to |buf| so that WriteCompleted() can still
234 // use it when Write() completes, as otherwise, this transfers
235 // ownership of buf to socket.
236 base::Unretained(this), make_scoped_refptr(buf), callback);
237 int rv;
238 if (use_tcp_fastopen_ && !tcp_fastopen_connected_) {
239 rv = TcpFastOpenWrite(buf, buf_len, write_callback);
240 } else {
241 rv = socket_->Write(buf, buf_len, write_callback);
244 if (rv != ERR_IO_PENDING)
245 rv = HandleWriteCompleted(buf, rv);
246 return rv;
249 int TCPSocketLibevent::GetLocalAddress(IPEndPoint* address) const {
250 DCHECK(address);
252 if (!socket_)
253 return ERR_SOCKET_NOT_CONNECTED;
255 SockaddrStorage storage;
256 int rv = socket_->GetLocalAddress(&storage);
257 if (rv != OK)
258 return rv;
260 if (!address->FromSockAddr(storage.addr, storage.addr_len))
261 return ERR_ADDRESS_INVALID;
263 return OK;
266 int TCPSocketLibevent::GetPeerAddress(IPEndPoint* address) const {
267 DCHECK(address);
269 if (!IsConnected())
270 return ERR_SOCKET_NOT_CONNECTED;
272 SockaddrStorage storage;
273 int rv = socket_->GetPeerAddress(&storage);
274 if (rv != OK)
275 return rv;
277 if (!address->FromSockAddr(storage.addr, storage.addr_len))
278 return ERR_ADDRESS_INVALID;
280 return OK;
283 int TCPSocketLibevent::SetDefaultOptionsForServer() {
284 DCHECK(socket_);
285 return SetAddressReuse(true);
288 void TCPSocketLibevent::SetDefaultOptionsForClient() {
289 DCHECK(socket_);
291 // This mirrors the behaviour on Windows. See the comment in
292 // tcp_socket_win.cc after searching for "NODELAY".
293 // If SetTCPNoDelay fails, we don't care.
294 SetTCPNoDelay(socket_->socket_fd(), true);
296 // TCP keep alive wakes up the radio, which is expensive on mobile. Do not
297 // enable it there. It's useful to prevent TCP middleboxes from timing out
298 // connection mappings. Packets for timed out connection mappings at
299 // middleboxes will either lead to:
300 // a) Middleboxes sending TCP RSTs. It's up to higher layers to check for this
301 // and retry. The HTTP network transaction code does this.
302 // b) Middleboxes just drop the unrecognized TCP packet. This leads to the TCP
303 // stack retransmitting packets per TCP stack retransmission timeouts, which
304 // are very high (on the order of seconds). Given the number of
305 // retransmissions required before killing the connection, this can lead to
306 // tens of seconds or even minutes of delay, depending on OS.
307 #if !defined(OS_ANDROID) && !defined(OS_IOS)
308 const int kTCPKeepAliveSeconds = 45;
310 SetTCPKeepAlive(socket_->socket_fd(), true, kTCPKeepAliveSeconds);
311 #endif
314 int TCPSocketLibevent::SetAddressReuse(bool allow) {
315 DCHECK(socket_);
317 // SO_REUSEADDR is useful for server sockets to bind to a recently unbound
318 // port. When a socket is closed, the end point changes its state to TIME_WAIT
319 // and wait for 2 MSL (maximum segment lifetime) to ensure the remote peer
320 // acknowledges its closure. For server sockets, it is usually safe to
321 // bind to a TIME_WAIT end point immediately, which is a widely adopted
322 // behavior.
324 // Note that on *nix, SO_REUSEADDR does not enable the TCP socket to bind to
325 // an end point that is already bound by another socket. To do that one must
326 // set SO_REUSEPORT instead. This option is not provided on Linux prior
327 // to 3.9.
329 // SO_REUSEPORT is provided in MacOS X and iOS.
330 int boolean_value = allow ? 1 : 0;
331 int rv = setsockopt(socket_->socket_fd(), SOL_SOCKET, SO_REUSEADDR,
332 &boolean_value, sizeof(boolean_value));
333 if (rv < 0)
334 return MapSystemError(errno);
335 return OK;
338 int TCPSocketLibevent::SetReceiveBufferSize(int32 size) {
339 DCHECK(socket_);
340 int rv = setsockopt(socket_->socket_fd(), SOL_SOCKET, SO_RCVBUF,
341 reinterpret_cast<const char*>(&size), sizeof(size));
342 return (rv == 0) ? OK : MapSystemError(errno);
345 int TCPSocketLibevent::SetSendBufferSize(int32 size) {
346 DCHECK(socket_);
347 int rv = setsockopt(socket_->socket_fd(), SOL_SOCKET, SO_SNDBUF,
348 reinterpret_cast<const char*>(&size), sizeof(size));
349 return (rv == 0) ? OK : MapSystemError(errno);
352 bool TCPSocketLibevent::SetKeepAlive(bool enable, int delay) {
353 DCHECK(socket_);
354 return SetTCPKeepAlive(socket_->socket_fd(), enable, delay);
357 bool TCPSocketLibevent::SetNoDelay(bool no_delay) {
358 DCHECK(socket_);
359 return SetTCPNoDelay(socket_->socket_fd(), no_delay);
362 void TCPSocketLibevent::Close() {
363 socket_.reset();
364 tcp_fastopen_connected_ = false;
365 fast_open_status_ = FAST_OPEN_STATUS_UNKNOWN;
368 bool TCPSocketLibevent::UsingTCPFastOpen() const {
369 return use_tcp_fastopen_;
372 bool TCPSocketLibevent::IsValid() const {
373 return socket_ != NULL && socket_->socket_fd() != kInvalidSocket;
376 void TCPSocketLibevent::StartLoggingMultipleConnectAttempts(
377 const AddressList& addresses) {
378 if (!logging_multiple_connect_attempts_) {
379 logging_multiple_connect_attempts_ = true;
380 LogConnectBegin(addresses);
381 } else {
382 NOTREACHED();
386 void TCPSocketLibevent::EndLoggingMultipleConnectAttempts(int net_error) {
387 if (logging_multiple_connect_attempts_) {
388 LogConnectEnd(net_error);
389 logging_multiple_connect_attempts_ = false;
390 } else {
391 NOTREACHED();
395 void TCPSocketLibevent::AcceptCompleted(
396 scoped_ptr<TCPSocketLibevent>* tcp_socket,
397 IPEndPoint* address,
398 const CompletionCallback& callback,
399 int rv) {
400 DCHECK_NE(ERR_IO_PENDING, rv);
401 callback.Run(HandleAcceptCompleted(tcp_socket, address, rv));
404 int TCPSocketLibevent::HandleAcceptCompleted(
405 scoped_ptr<TCPSocketLibevent>* tcp_socket,
406 IPEndPoint* address,
407 int rv) {
408 if (rv == OK)
409 rv = BuildTcpSocketLibevent(tcp_socket, address);
411 if (rv == OK) {
412 net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT,
413 CreateNetLogIPEndPointCallback(address));
414 } else {
415 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, rv);
418 return rv;
421 int TCPSocketLibevent::BuildTcpSocketLibevent(
422 scoped_ptr<TCPSocketLibevent>* tcp_socket,
423 IPEndPoint* address) {
424 DCHECK(accept_socket_);
426 SockaddrStorage storage;
427 if (accept_socket_->GetPeerAddress(&storage) != OK ||
428 !address->FromSockAddr(storage.addr, storage.addr_len)) {
429 accept_socket_.reset();
430 return ERR_ADDRESS_INVALID;
433 tcp_socket->reset(new TCPSocketLibevent(net_log_.net_log(),
434 net_log_.source()));
435 (*tcp_socket)->socket_.reset(accept_socket_.release());
436 return OK;
439 void TCPSocketLibevent::ConnectCompleted(const CompletionCallback& callback,
440 int rv) const {
441 DCHECK_NE(ERR_IO_PENDING, rv);
442 callback.Run(HandleConnectCompleted(rv));
445 int TCPSocketLibevent::HandleConnectCompleted(int rv) const {
446 // Log the end of this attempt (and any OS error it threw).
447 if (rv != OK) {
448 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
449 NetLog::IntegerCallback("os_error", errno));
450 } else {
451 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT);
454 // Give a more specific error when the user is offline.
455 if (rv == ERR_ADDRESS_UNREACHABLE && NetworkChangeNotifier::IsOffline())
456 rv = ERR_INTERNET_DISCONNECTED;
458 if (!logging_multiple_connect_attempts_)
459 LogConnectEnd(rv);
461 return rv;
464 void TCPSocketLibevent::LogConnectBegin(const AddressList& addresses) const {
465 base::StatsCounter connects("tcp.connect");
466 connects.Increment();
468 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
469 addresses.CreateNetLogCallback());
472 void TCPSocketLibevent::LogConnectEnd(int net_error) const {
473 if (net_error != OK) {
474 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
475 return;
478 UpdateConnectionTypeHistograms(CONNECTION_ANY);
480 SockaddrStorage storage;
481 int rv = socket_->GetLocalAddress(&storage);
482 if (rv != OK) {
483 PLOG(ERROR) << "GetLocalAddress() [rv: " << rv << "] error: ";
484 NOTREACHED();
485 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
486 return;
489 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT,
490 CreateNetLogSourceAddressCallback(storage.addr,
491 storage.addr_len));
494 void TCPSocketLibevent::ReadCompleted(const scoped_refptr<IOBuffer>& buf,
495 const CompletionCallback& callback,
496 int rv) {
497 DCHECK_NE(ERR_IO_PENDING, rv);
498 // Records fast open status regardless of error in asynchronous case.
499 // TODO(rdsmith,jri): Change histogram name to indicate it could be called on
500 // error.
501 RecordFastOpenStatus();
502 callback.Run(HandleReadCompleted(buf, rv));
505 int TCPSocketLibevent::HandleReadCompleted(IOBuffer* buf, int rv) {
506 if (rv < 0) {
507 net_log_.AddEvent(NetLog::TYPE_SOCKET_READ_ERROR,
508 CreateNetLogSocketErrorCallback(rv, errno));
509 return rv;
512 base::StatsCounter read_bytes("tcp.read_bytes");
513 read_bytes.Add(rv);
514 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, rv,
515 buf->data());
516 return rv;
519 void TCPSocketLibevent::WriteCompleted(const scoped_refptr<IOBuffer>& buf,
520 const CompletionCallback& callback,
521 int rv) const {
522 DCHECK_NE(ERR_IO_PENDING, rv);
523 callback.Run(HandleWriteCompleted(buf, rv));
526 int TCPSocketLibevent::HandleWriteCompleted(IOBuffer* buf, int rv) const {
527 if (rv < 0) {
528 net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
529 CreateNetLogSocketErrorCallback(rv, errno));
530 return rv;
533 base::StatsCounter write_bytes("tcp.write_bytes");
534 write_bytes.Add(rv);
535 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, rv,
536 buf->data());
537 return rv;
540 int TCPSocketLibevent::TcpFastOpenWrite(
541 IOBuffer* buf,
542 int buf_len,
543 const CompletionCallback& callback) {
544 SockaddrStorage storage;
545 int rv = socket_->GetPeerAddress(&storage);
546 if (rv != OK)
547 return rv;
549 int flags = 0x20000000; // Magic flag to enable TCP_FASTOPEN.
550 #if defined(OS_LINUX)
551 // sendto() will fail with EPIPE when the system doesn't support TCP Fast
552 // Open. Theoretically that shouldn't happen since the caller should check
553 // for system support on startup, but users may dynamically disable TCP Fast
554 // Open via sysctl.
555 flags |= MSG_NOSIGNAL;
556 #endif // defined(OS_LINUX)
557 rv = HANDLE_EINTR(sendto(socket_->socket_fd(),
558 buf->data(),
559 buf_len,
560 flags,
561 storage.addr,
562 storage.addr_len));
563 tcp_fastopen_connected_ = true;
565 if (rv >= 0) {
566 fast_open_status_ = FAST_OPEN_FAST_CONNECT_RETURN;
567 return rv;
570 DCHECK_NE(EPIPE, errno);
572 // If errno == EINPROGRESS, that means the kernel didn't have a cookie
573 // and would block. The kernel is internally doing a connect() though.
574 // Remap EINPROGRESS to EAGAIN so we treat this the same as our other
575 // asynchronous cases. Note that the user buffer has not been copied to
576 // kernel space.
577 if (errno == EINPROGRESS) {
578 rv = ERR_IO_PENDING;
579 } else {
580 rv = MapSystemError(errno);
583 if (rv != ERR_IO_PENDING) {
584 fast_open_status_ = FAST_OPEN_ERROR;
585 return rv;
588 fast_open_status_ = FAST_OPEN_SLOW_CONNECT_RETURN;
589 return socket_->WaitForWrite(buf, buf_len, callback);
592 void TCPSocketLibevent::RecordFastOpenStatus() {
593 if (use_tcp_fastopen_ &&
594 (fast_open_status_ == FAST_OPEN_FAST_CONNECT_RETURN ||
595 fast_open_status_ == FAST_OPEN_SLOW_CONNECT_RETURN)) {
596 DCHECK_NE(FAST_OPEN_STATUS_UNKNOWN, fast_open_status_);
597 bool getsockopt_success(false);
598 bool server_acked_data(false);
599 #if defined(TCP_INFO)
600 // Probe to see the if the socket used TCP Fast Open.
601 tcp_info info;
602 socklen_t info_len = sizeof(tcp_info);
603 getsockopt_success =
604 getsockopt(socket_->socket_fd(), IPPROTO_TCP, TCP_INFO,
605 &info, &info_len) == 0 &&
606 info_len == sizeof(tcp_info);
607 server_acked_data = getsockopt_success &&
608 (info.tcpi_options & TCPI_OPT_SYN_DATA);
609 #endif
610 if (getsockopt_success) {
611 if (fast_open_status_ == FAST_OPEN_FAST_CONNECT_RETURN) {
612 fast_open_status_ = (server_acked_data ? FAST_OPEN_SYN_DATA_ACK :
613 FAST_OPEN_SYN_DATA_NACK);
614 } else {
615 fast_open_status_ = (server_acked_data ? FAST_OPEN_NO_SYN_DATA_ACK :
616 FAST_OPEN_NO_SYN_DATA_NACK);
618 } else {
619 fast_open_status_ = (fast_open_status_ == FAST_OPEN_FAST_CONNECT_RETURN ?
620 FAST_OPEN_SYN_DATA_FAILED :
621 FAST_OPEN_NO_SYN_DATA_FAILED);
626 } // namespace net