1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/socket/tcp_socket.h"
8 #include <netinet/tcp.h>
9 #include <sys/socket.h>
11 #include "base/bind.h"
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "base/metrics/stats_counters.h"
15 #include "base/posix/eintr_wrapper.h"
16 #include "base/task_runner_util.h"
17 #include "base/threading/worker_pool.h"
18 #include "net/base/address_list.h"
19 #include "net/base/connection_type_histograms.h"
20 #include "net/base/io_buffer.h"
21 #include "net/base/ip_endpoint.h"
22 #include "net/base/net_errors.h"
23 #include "net/base/net_util.h"
24 #include "net/base/network_change_notifier.h"
25 #include "net/socket/socket_libevent.h"
26 #include "net/socket/socket_net_log_params.h"
28 // If we don't have a definition for TCPI_OPT_SYN_DATA, create one.
29 #ifndef TCPI_OPT_SYN_DATA
30 #define TCPI_OPT_SYN_DATA 32
37 // True if OS supports TCP FastOpen.
38 bool g_tcp_fastopen_supported
= false;
39 // True if TCP FastOpen is user-enabled for all connections.
40 // TODO(jri): Change global variable to param in HttpNetworkSession::Params.
41 bool g_tcp_fastopen_user_enabled
= false;
43 // SetTCPNoDelay turns on/off buffering in the kernel. By default, TCP sockets
44 // will wait up to 200ms for more data to complete a packet before transmitting.
45 // After calling this function, the kernel will not wait. See TCP_NODELAY in
47 bool SetTCPNoDelay(int fd
, bool no_delay
) {
48 int on
= no_delay
? 1 : 0;
49 int error
= setsockopt(fd
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
53 // SetTCPKeepAlive sets SO_KEEPALIVE.
54 bool SetTCPKeepAlive(int fd
, bool enable
, int delay
) {
55 int on
= enable
? 1 : 0;
56 if (setsockopt(fd
, SOL_SOCKET
, SO_KEEPALIVE
, &on
, sizeof(on
))) {
57 PLOG(ERROR
) << "Failed to set SO_KEEPALIVE on fd: " << fd
;
61 // If we disabled TCP keep alive, our work is done here.
65 #if defined(OS_LINUX) || defined(OS_ANDROID)
66 // Set seconds until first TCP keep alive.
67 if (setsockopt(fd
, SOL_TCP
, TCP_KEEPIDLE
, &delay
, sizeof(delay
))) {
68 PLOG(ERROR
) << "Failed to set TCP_KEEPIDLE on fd: " << fd
;
71 // Set seconds between TCP keep alives.
72 if (setsockopt(fd
, SOL_TCP
, TCP_KEEPINTVL
, &delay
, sizeof(delay
))) {
73 PLOG(ERROR
) << "Failed to set TCP_KEEPINTVL on fd: " << fd
;
80 #if defined(OS_LINUX) || defined(OS_ANDROID)
81 // Checks if the kernel supports TCP FastOpen.
82 bool SystemSupportsTCPFastOpen() {
83 const base::FilePath::CharType kTCPFastOpenProcFilePath
[] =
84 "/proc/sys/net/ipv4/tcp_fastopen";
85 std::string system_supports_tcp_fastopen
;
86 if (!base::ReadFileToString(base::FilePath(kTCPFastOpenProcFilePath
),
87 &system_supports_tcp_fastopen
)) {
90 // The read from /proc should return '1' if TCP FastOpen is enabled in the OS.
91 if (system_supports_tcp_fastopen
.empty() ||
92 (system_supports_tcp_fastopen
[0] != '1')) {
98 void RegisterTCPFastOpenIntentAndSupport(bool user_enabled
,
99 bool system_supported
) {
100 g_tcp_fastopen_supported
= system_supported
;
101 g_tcp_fastopen_user_enabled
= user_enabled
;
107 //-----------------------------------------------------------------------------
109 bool IsTCPFastOpenSupported() {
110 return g_tcp_fastopen_supported
;
113 bool IsTCPFastOpenUserEnabled() {
114 return g_tcp_fastopen_user_enabled
;
117 // This is asynchronous because it needs to do file IO, and it isn't allowed to
118 // do that on the IO thread.
119 void CheckSupportAndMaybeEnableTCPFastOpen(bool user_enabled
) {
120 #if defined(OS_LINUX) || defined(OS_ANDROID)
121 base::PostTaskAndReplyWithResult(
122 base::WorkerPool::GetTaskRunner(/*task_is_slow=*/false).get(),
124 base::Bind(SystemSupportsTCPFastOpen
),
125 base::Bind(RegisterTCPFastOpenIntentAndSupport
, user_enabled
));
129 TCPSocketLibevent::TCPSocketLibevent(NetLog
* net_log
,
130 const NetLog::Source
& source
)
131 : use_tcp_fastopen_(false),
132 tcp_fastopen_connected_(false),
133 fast_open_status_(FAST_OPEN_STATUS_UNKNOWN
),
134 logging_multiple_connect_attempts_(false),
135 net_log_(BoundNetLog::Make(net_log
, NetLog::SOURCE_SOCKET
)) {
136 net_log_
.BeginEvent(NetLog::TYPE_SOCKET_ALIVE
,
137 source
.ToEventParametersCallback());
140 TCPSocketLibevent::~TCPSocketLibevent() {
141 net_log_
.EndEvent(NetLog::TYPE_SOCKET_ALIVE
);
142 if (tcp_fastopen_connected_
) {
143 UMA_HISTOGRAM_ENUMERATION("Net.TcpFastOpenSocketConnection",
144 fast_open_status_
, FAST_OPEN_MAX_VALUE
);
148 int TCPSocketLibevent::Open(AddressFamily family
) {
150 socket_
.reset(new SocketLibevent
);
151 int rv
= socket_
->Open(ConvertAddressFamily(family
));
157 int TCPSocketLibevent::AdoptConnectedSocket(int socket_fd
,
158 const IPEndPoint
& peer_address
) {
161 SockaddrStorage storage
;
162 if (!peer_address
.ToSockAddr(storage
.addr
, &storage
.addr_len
) &&
163 // For backward compatibility, allows the empty address.
164 !(peer_address
== IPEndPoint())) {
165 return ERR_ADDRESS_INVALID
;
168 socket_
.reset(new SocketLibevent
);
169 int rv
= socket_
->AdoptConnectedSocket(socket_fd
, storage
);
175 int TCPSocketLibevent::Bind(const IPEndPoint
& address
) {
178 SockaddrStorage storage
;
179 if (!address
.ToSockAddr(storage
.addr
, &storage
.addr_len
))
180 return ERR_ADDRESS_INVALID
;
182 return socket_
->Bind(storage
);
185 int TCPSocketLibevent::Listen(int backlog
) {
187 return socket_
->Listen(backlog
);
190 int TCPSocketLibevent::Accept(scoped_ptr
<TCPSocketLibevent
>* tcp_socket
,
192 const CompletionCallback
& callback
) {
194 DCHECK(!callback
.is_null());
196 DCHECK(!accept_socket_
);
198 net_log_
.BeginEvent(NetLog::TYPE_TCP_ACCEPT
);
200 int rv
= socket_
->Accept(
202 base::Bind(&TCPSocketLibevent::AcceptCompleted
,
203 base::Unretained(this), tcp_socket
, address
, callback
));
204 if (rv
!= ERR_IO_PENDING
)
205 rv
= HandleAcceptCompleted(tcp_socket
, address
, rv
);
209 int TCPSocketLibevent::Connect(const IPEndPoint
& address
,
210 const CompletionCallback
& callback
) {
213 if (!logging_multiple_connect_attempts_
)
214 LogConnectBegin(AddressList(address
));
216 net_log_
.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT
,
217 CreateNetLogIPEndPointCallback(&address
));
219 SockaddrStorage storage
;
220 if (!address
.ToSockAddr(storage
.addr
, &storage
.addr_len
))
221 return ERR_ADDRESS_INVALID
;
223 if (use_tcp_fastopen_
) {
224 // With TCP FastOpen, we pretend that the socket is connected.
225 DCHECK(!tcp_fastopen_connected_
);
226 socket_
->SetPeerAddress(storage
);
230 int rv
= socket_
->Connect(storage
,
231 base::Bind(&TCPSocketLibevent::ConnectCompleted
,
232 base::Unretained(this), callback
));
233 if (rv
!= ERR_IO_PENDING
)
234 rv
= HandleConnectCompleted(rv
);
238 bool TCPSocketLibevent::IsConnected() const {
242 if (use_tcp_fastopen_
&& !tcp_fastopen_connected_
&&
243 socket_
->HasPeerAddress()) {
244 // With TCP FastOpen, we pretend that the socket is connected.
245 // This allows GetPeerAddress() to return peer_address_.
249 return socket_
->IsConnected();
252 bool TCPSocketLibevent::IsConnectedAndIdle() const {
253 // TODO(wtc): should we also handle the TCP FastOpen case here,
254 // as we do in IsConnected()?
255 return socket_
&& socket_
->IsConnectedAndIdle();
258 int TCPSocketLibevent::Read(IOBuffer
* buf
,
260 const CompletionCallback
& callback
) {
262 DCHECK(!callback
.is_null());
264 int rv
= socket_
->Read(
266 base::Bind(&TCPSocketLibevent::ReadCompleted
,
267 // Grab a reference to |buf| so that ReadCompleted() can still
268 // use it when Read() completes, as otherwise, this transfers
269 // ownership of buf to socket.
270 base::Unretained(this), make_scoped_refptr(buf
), callback
));
272 RecordFastOpenStatus();
273 if (rv
!= ERR_IO_PENDING
)
274 rv
= HandleReadCompleted(buf
, rv
);
278 int TCPSocketLibevent::Write(IOBuffer
* buf
,
280 const CompletionCallback
& callback
) {
282 DCHECK(!callback
.is_null());
284 CompletionCallback write_callback
=
285 base::Bind(&TCPSocketLibevent::WriteCompleted
,
286 // Grab a reference to |buf| so that WriteCompleted() can still
287 // use it when Write() completes, as otherwise, this transfers
288 // ownership of buf to socket.
289 base::Unretained(this), make_scoped_refptr(buf
), callback
);
291 if (use_tcp_fastopen_
&& !tcp_fastopen_connected_
) {
292 rv
= TcpFastOpenWrite(buf
, buf_len
, write_callback
);
294 rv
= socket_
->Write(buf
, buf_len
, write_callback
);
297 if (rv
!= ERR_IO_PENDING
)
298 rv
= HandleWriteCompleted(buf
, rv
);
302 int TCPSocketLibevent::GetLocalAddress(IPEndPoint
* address
) const {
306 return ERR_SOCKET_NOT_CONNECTED
;
308 SockaddrStorage storage
;
309 int rv
= socket_
->GetLocalAddress(&storage
);
313 if (!address
->FromSockAddr(storage
.addr
, storage
.addr_len
))
314 return ERR_ADDRESS_INVALID
;
319 int TCPSocketLibevent::GetPeerAddress(IPEndPoint
* address
) const {
323 return ERR_SOCKET_NOT_CONNECTED
;
325 SockaddrStorage storage
;
326 int rv
= socket_
->GetPeerAddress(&storage
);
330 if (!address
->FromSockAddr(storage
.addr
, storage
.addr_len
))
331 return ERR_ADDRESS_INVALID
;
336 int TCPSocketLibevent::SetDefaultOptionsForServer() {
338 return SetAddressReuse(true);
341 void TCPSocketLibevent::SetDefaultOptionsForClient() {
344 // This mirrors the behaviour on Windows. See the comment in
345 // tcp_socket_win.cc after searching for "NODELAY".
346 // If SetTCPNoDelay fails, we don't care.
347 SetTCPNoDelay(socket_
->socket_fd(), true);
349 // TCP keep alive wakes up the radio, which is expensive on mobile. Do not
350 // enable it there. It's useful to prevent TCP middleboxes from timing out
351 // connection mappings. Packets for timed out connection mappings at
352 // middleboxes will either lead to:
353 // a) Middleboxes sending TCP RSTs. It's up to higher layers to check for this
354 // and retry. The HTTP network transaction code does this.
355 // b) Middleboxes just drop the unrecognized TCP packet. This leads to the TCP
356 // stack retransmitting packets per TCP stack retransmission timeouts, which
357 // are very high (on the order of seconds). Given the number of
358 // retransmissions required before killing the connection, this can lead to
359 // tens of seconds or even minutes of delay, depending on OS.
360 #if !defined(OS_ANDROID) && !defined(OS_IOS)
361 const int kTCPKeepAliveSeconds
= 45;
363 SetTCPKeepAlive(socket_
->socket_fd(), true, kTCPKeepAliveSeconds
);
367 int TCPSocketLibevent::SetAddressReuse(bool allow
) {
370 // SO_REUSEADDR is useful for server sockets to bind to a recently unbound
371 // port. When a socket is closed, the end point changes its state to TIME_WAIT
372 // and wait for 2 MSL (maximum segment lifetime) to ensure the remote peer
373 // acknowledges its closure. For server sockets, it is usually safe to
374 // bind to a TIME_WAIT end point immediately, which is a widely adopted
377 // Note that on *nix, SO_REUSEADDR does not enable the TCP socket to bind to
378 // an end point that is already bound by another socket. To do that one must
379 // set SO_REUSEPORT instead. This option is not provided on Linux prior
382 // SO_REUSEPORT is provided in MacOS X and iOS.
383 int boolean_value
= allow
? 1 : 0;
384 int rv
= setsockopt(socket_
->socket_fd(), SOL_SOCKET
, SO_REUSEADDR
,
385 &boolean_value
, sizeof(boolean_value
));
387 return MapSystemError(errno
);
391 int TCPSocketLibevent::SetReceiveBufferSize(int32 size
) {
393 int rv
= setsockopt(socket_
->socket_fd(), SOL_SOCKET
, SO_RCVBUF
,
394 reinterpret_cast<const char*>(&size
), sizeof(size
));
395 return (rv
== 0) ? OK
: MapSystemError(errno
);
398 int TCPSocketLibevent::SetSendBufferSize(int32 size
) {
400 int rv
= setsockopt(socket_
->socket_fd(), SOL_SOCKET
, SO_SNDBUF
,
401 reinterpret_cast<const char*>(&size
), sizeof(size
));
402 return (rv
== 0) ? OK
: MapSystemError(errno
);
405 bool TCPSocketLibevent::SetKeepAlive(bool enable
, int delay
) {
407 return SetTCPKeepAlive(socket_
->socket_fd(), enable
, delay
);
410 bool TCPSocketLibevent::SetNoDelay(bool no_delay
) {
412 return SetTCPNoDelay(socket_
->socket_fd(), no_delay
);
415 void TCPSocketLibevent::Close() {
417 tcp_fastopen_connected_
= false;
418 fast_open_status_
= FAST_OPEN_STATUS_UNKNOWN
;
421 bool TCPSocketLibevent::UsingTCPFastOpen() const {
422 return use_tcp_fastopen_
;
425 void TCPSocketLibevent::EnableTCPFastOpenIfSupported() {
426 if (IsTCPFastOpenSupported())
427 use_tcp_fastopen_
= true;
430 bool TCPSocketLibevent::IsValid() const {
431 return socket_
!= NULL
&& socket_
->socket_fd() != kInvalidSocket
;
434 void TCPSocketLibevent::StartLoggingMultipleConnectAttempts(
435 const AddressList
& addresses
) {
436 if (!logging_multiple_connect_attempts_
) {
437 logging_multiple_connect_attempts_
= true;
438 LogConnectBegin(addresses
);
444 void TCPSocketLibevent::EndLoggingMultipleConnectAttempts(int net_error
) {
445 if (logging_multiple_connect_attempts_
) {
446 LogConnectEnd(net_error
);
447 logging_multiple_connect_attempts_
= false;
453 void TCPSocketLibevent::AcceptCompleted(
454 scoped_ptr
<TCPSocketLibevent
>* tcp_socket
,
456 const CompletionCallback
& callback
,
458 DCHECK_NE(ERR_IO_PENDING
, rv
);
459 callback
.Run(HandleAcceptCompleted(tcp_socket
, address
, rv
));
462 int TCPSocketLibevent::HandleAcceptCompleted(
463 scoped_ptr
<TCPSocketLibevent
>* tcp_socket
,
467 rv
= BuildTcpSocketLibevent(tcp_socket
, address
);
470 net_log_
.EndEvent(NetLog::TYPE_TCP_ACCEPT
,
471 CreateNetLogIPEndPointCallback(address
));
473 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT
, rv
);
479 int TCPSocketLibevent::BuildTcpSocketLibevent(
480 scoped_ptr
<TCPSocketLibevent
>* tcp_socket
,
481 IPEndPoint
* address
) {
482 DCHECK(accept_socket_
);
484 SockaddrStorage storage
;
485 if (accept_socket_
->GetPeerAddress(&storage
) != OK
||
486 !address
->FromSockAddr(storage
.addr
, storage
.addr_len
)) {
487 accept_socket_
.reset();
488 return ERR_ADDRESS_INVALID
;
491 tcp_socket
->reset(new TCPSocketLibevent(net_log_
.net_log(),
493 (*tcp_socket
)->socket_
.reset(accept_socket_
.release());
497 void TCPSocketLibevent::ConnectCompleted(const CompletionCallback
& callback
,
499 DCHECK_NE(ERR_IO_PENDING
, rv
);
500 callback
.Run(HandleConnectCompleted(rv
));
503 int TCPSocketLibevent::HandleConnectCompleted(int rv
) const {
504 // Log the end of this attempt (and any OS error it threw).
506 net_log_
.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT
,
507 NetLog::IntegerCallback("os_error", errno
));
509 net_log_
.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT
);
512 // Give a more specific error when the user is offline.
513 if (rv
== ERR_ADDRESS_UNREACHABLE
&& NetworkChangeNotifier::IsOffline())
514 rv
= ERR_INTERNET_DISCONNECTED
;
516 if (!logging_multiple_connect_attempts_
)
522 void TCPSocketLibevent::LogConnectBegin(const AddressList
& addresses
) const {
523 base::StatsCounter
connects("tcp.connect");
524 connects
.Increment();
526 net_log_
.BeginEvent(NetLog::TYPE_TCP_CONNECT
,
527 addresses
.CreateNetLogCallback());
530 void TCPSocketLibevent::LogConnectEnd(int net_error
) const {
531 if (net_error
!= OK
) {
532 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT
, net_error
);
536 UpdateConnectionTypeHistograms(CONNECTION_ANY
);
538 SockaddrStorage storage
;
539 int rv
= socket_
->GetLocalAddress(&storage
);
541 PLOG(ERROR
) << "GetLocalAddress() [rv: " << rv
<< "] error: ";
543 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT
, rv
);
547 net_log_
.EndEvent(NetLog::TYPE_TCP_CONNECT
,
548 CreateNetLogSourceAddressCallback(storage
.addr
,
552 void TCPSocketLibevent::ReadCompleted(const scoped_refptr
<IOBuffer
>& buf
,
553 const CompletionCallback
& callback
,
555 DCHECK_NE(ERR_IO_PENDING
, rv
);
556 // Records TCP FastOpen status regardless of error in asynchronous case.
557 // TODO(rdsmith,jri): Change histogram name to indicate it could be called on
559 RecordFastOpenStatus();
560 callback
.Run(HandleReadCompleted(buf
.get(), rv
));
563 int TCPSocketLibevent::HandleReadCompleted(IOBuffer
* buf
, int rv
) {
565 net_log_
.AddEvent(NetLog::TYPE_SOCKET_READ_ERROR
,
566 CreateNetLogSocketErrorCallback(rv
, errno
));
570 base::StatsCounter
read_bytes("tcp.read_bytes");
572 net_log_
.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED
, rv
,
577 void TCPSocketLibevent::WriteCompleted(const scoped_refptr
<IOBuffer
>& buf
,
578 const CompletionCallback
& callback
,
580 DCHECK_NE(ERR_IO_PENDING
, rv
);
581 callback
.Run(HandleWriteCompleted(buf
.get(), rv
));
584 int TCPSocketLibevent::HandleWriteCompleted(IOBuffer
* buf
, int rv
) const {
586 net_log_
.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR
,
587 CreateNetLogSocketErrorCallback(rv
, errno
));
591 base::StatsCounter
write_bytes("tcp.write_bytes");
593 net_log_
.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT
, rv
,
598 int TCPSocketLibevent::TcpFastOpenWrite(
601 const CompletionCallback
& callback
) {
602 SockaddrStorage storage
;
603 int rv
= socket_
->GetPeerAddress(&storage
);
607 int flags
= 0x20000000; // Magic flag to enable TCP_FASTOPEN.
608 #if defined(OS_LINUX) || defined(OS_ANDROID)
609 // sendto() will fail with EPIPE when the system doesn't support TCP Fast
610 // Open. Theoretically that shouldn't happen since the caller should check
611 // for system support on startup, but users may dynamically disable TCP Fast
613 flags
|= MSG_NOSIGNAL
;
614 #endif // defined(OS_LINUX) || defined(OS_ANDROID)
615 rv
= HANDLE_EINTR(sendto(socket_
->socket_fd(),
621 tcp_fastopen_connected_
= true;
624 fast_open_status_
= FAST_OPEN_FAST_CONNECT_RETURN
;
628 DCHECK_NE(EPIPE
, errno
);
630 // If errno == EINPROGRESS, that means the kernel didn't have a cookie
631 // and would block. The kernel is internally doing a connect() though.
632 // Remap EINPROGRESS to EAGAIN so we treat this the same as our other
633 // asynchronous cases. Note that the user buffer has not been copied to
635 if (errno
== EINPROGRESS
) {
638 rv
= MapSystemError(errno
);
641 if (rv
!= ERR_IO_PENDING
) {
642 fast_open_status_
= FAST_OPEN_ERROR
;
646 fast_open_status_
= FAST_OPEN_SLOW_CONNECT_RETURN
;
647 return socket_
->WaitForWrite(buf
, buf_len
, callback
);
650 void TCPSocketLibevent::RecordFastOpenStatus() {
651 if (use_tcp_fastopen_
&&
652 (fast_open_status_
== FAST_OPEN_FAST_CONNECT_RETURN
||
653 fast_open_status_
== FAST_OPEN_SLOW_CONNECT_RETURN
)) {
654 DCHECK_NE(FAST_OPEN_STATUS_UNKNOWN
, fast_open_status_
);
655 bool getsockopt_success(false);
656 bool server_acked_data(false);
657 #if defined(TCP_INFO)
658 // Probe to see the if the socket used TCP FastOpen.
660 socklen_t info_len
= sizeof(tcp_info
);
662 getsockopt(socket_
->socket_fd(), IPPROTO_TCP
, TCP_INFO
,
663 &info
, &info_len
) == 0 &&
664 info_len
== sizeof(tcp_info
);
665 server_acked_data
= getsockopt_success
&&
666 (info
.tcpi_options
& TCPI_OPT_SYN_DATA
);
668 if (getsockopt_success
) {
669 if (fast_open_status_
== FAST_OPEN_FAST_CONNECT_RETURN
) {
670 fast_open_status_
= (server_acked_data
? FAST_OPEN_SYN_DATA_ACK
:
671 FAST_OPEN_SYN_DATA_NACK
);
673 fast_open_status_
= (server_acked_data
? FAST_OPEN_NO_SYN_DATA_ACK
:
674 FAST_OPEN_NO_SYN_DATA_NACK
);
677 fast_open_status_
= (fast_open_status_
== FAST_OPEN_FAST_CONNECT_RETURN
?
678 FAST_OPEN_SYN_DATA_FAILED
:
679 FAST_OPEN_NO_SYN_DATA_FAILED
);