1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/socket/tcp_socket.h"
8 #include <netinet/tcp.h>
9 #include <sys/socket.h>
11 #include "base/bind.h"
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "base/metrics/stats_counters.h"
15 #include "base/posix/eintr_wrapper.h"
16 #include "net/base/address_list.h"
17 #include "net/base/connection_type_histograms.h"
18 #include "net/base/io_buffer.h"
19 #include "net/base/ip_endpoint.h"
20 #include "net/base/net_errors.h"
21 #include "net/base/net_util.h"
22 #include "net/base/network_change_notifier.h"
23 #include "net/socket/socket_libevent.h"
24 #include "net/socket/socket_net_log_params.h"
26 // If we don't have a definition for TCPI_OPT_SYN_DATA, create one.
27 #ifndef TCPI_OPT_SYN_DATA
28 #define TCPI_OPT_SYN_DATA 32
35 // SetTCPNoDelay turns on/off buffering in the kernel. By default, TCP sockets
36 // will wait up to 200ms for more data to complete a packet before transmitting.
37 // After calling this function, the kernel will not wait. See TCP_NODELAY in
39 bool SetTCPNoDelay(int fd
, bool no_delay
) {
40 int on
= no_delay
? 1 : 0;
41 int error
= setsockopt(fd
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
45 // SetTCPKeepAlive sets SO_KEEPALIVE.
46 bool SetTCPKeepAlive(int fd
, bool enable
, int delay
) {
47 int on
= enable
? 1 : 0;
48 if (setsockopt(fd
, SOL_SOCKET
, SO_KEEPALIVE
, &on
, sizeof(on
))) {
49 PLOG(ERROR
) << "Failed to set SO_KEEPALIVE on fd: " << fd
;
53 // If we disabled TCP keep alive, our work is done here.
57 #if defined(OS_LINUX) || defined(OS_ANDROID)
58 // Set seconds until first TCP keep alive.
59 if (setsockopt(fd
, SOL_TCP
, TCP_KEEPIDLE
, &delay
, sizeof(delay
))) {
60 PLOG(ERROR
) << "Failed to set TCP_KEEPIDLE on fd: " << fd
;
63 // Set seconds between TCP keep alives.
64 if (setsockopt(fd
, SOL_TCP
, TCP_KEEPINTVL
, &delay
, sizeof(delay
))) {
65 PLOG(ERROR
) << "Failed to set TCP_KEEPINTVL on fd: " << fd
;
74 //-----------------------------------------------------------------------------
76 TCPSocketLibevent::TCPSocketLibevent(NetLog
* net_log
,
77 const NetLog::Source
& source
)
78 : use_tcp_fastopen_(IsTCPFastOpenEnabled()),
79 tcp_fastopen_connected_(false),
80 fast_open_status_(FAST_OPEN_STATUS_UNKNOWN
),
81 logging_multiple_connect_attempts_(false),
82 net_log_(BoundNetLog::Make(net_log
, NetLog::SOURCE_SOCKET
)) {
83 net_log_
.BeginEvent(NetLog::TYPE_SOCKET_ALIVE
,
84 source
.ToEventParametersCallback());
87 TCPSocketLibevent::~TCPSocketLibevent() {
88 net_log_
.EndEvent(NetLog::TYPE_SOCKET_ALIVE
);
89 if (tcp_fastopen_connected_
) {
90 UMA_HISTOGRAM_ENUMERATION("Net.TcpFastOpenSocketConnection",
91 fast_open_status_
, FAST_OPEN_MAX_VALUE
);
95 int TCPSocketLibevent::Open(AddressFamily family
) {
97 socket_
.reset(new SocketLibevent
);
98 int rv
= socket_
->Open(ConvertAddressFamily(family
));
104 int TCPSocketLibevent::AdoptConnectedSocket(int socket_fd
,
105 const IPEndPoint
& peer_address
) {
108 SockaddrStorage storage
;
109 if (!peer_address
.ToSockAddr(storage
.addr
, &storage
.addr_len
) &&
110 // For backward compatibility, allows the empty address.
111 !(peer_address
== IPEndPoint())) {
112 return ERR_ADDRESS_INVALID
;
115 socket_
.reset(new SocketLibevent
);
116 int rv
= socket_
->AdoptConnectedSocket(socket_fd
, storage
);
122 int TCPSocketLibevent::Bind(const IPEndPoint
& address
) {
125 SockaddrStorage storage
;
126 if (!address
.ToSockAddr(storage
.addr
, &storage
.addr_len
))
127 return ERR_ADDRESS_INVALID
;
129 return socket_
->Bind(storage
);
132 int TCPSocketLibevent::Listen(int backlog
) {
134 return socket_
->Listen(backlog
);
137 int TCPSocketLibevent::Accept(scoped_ptr
<TCPSocketLibevent
>* tcp_socket
,
139 const CompletionCallback
& callback
) {
141 DCHECK(!callback
.is_null());
143 DCHECK(!accept_socket_
);
145 net_log_
.BeginEvent(NetLog::TYPE_TCP_ACCEPT
);
147 int rv
= socket_
->Accept(
149 base::Bind(&TCPSocketLibevent::AcceptCompleted
,
150 base::Unretained(this), tcp_socket
, address
, callback
));
151 if (rv
!= ERR_IO_PENDING
)
152 rv
= HandleAcceptCompleted(tcp_socket
, address
, rv
);
156 int TCPSocketLibevent::Connect(const IPEndPoint
& address
,
157 const CompletionCallback
& callback
) {
160 if (!logging_multiple_connect_attempts_
)
161 LogConnectBegin(AddressList(address
));
163 net_log_
.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT
,
164 CreateNetLogIPEndPointCallback(&address
));
166 SockaddrStorage storage
;
167 if (!address
.ToSockAddr(storage
.addr
, &storage
.addr_len
))
168 return ERR_ADDRESS_INVALID
;
170 if (use_tcp_fastopen_
) {
171 // With TCP FastOpen, we pretend that the socket is connected.
172 DCHECK(!tcp_fastopen_connected_
);
173 socket_
->SetPeerAddress(storage
);
177 int rv
= socket_
->Connect(storage
,
178 base::Bind(&TCPSocketLibevent::ConnectCompleted
,
179 base::Unretained(this), callback
));
180 if (rv
!= ERR_IO_PENDING
)
181 rv
= HandleConnectCompleted(rv
);
185 bool TCPSocketLibevent::IsConnected() const {
189 if (use_tcp_fastopen_
&& !tcp_fastopen_connected_
&&
190 socket_
->HasPeerAddress()) {
191 // With TCP FastOpen, we pretend that the socket is connected.
192 // This allows GetPeerAddress() to return peer_address_.
196 return socket_
->IsConnected();
199 bool TCPSocketLibevent::IsConnectedAndIdle() const {
200 // TODO(wtc): should we also handle the TCP FastOpen case here,
201 // as we do in IsConnected()?
202 return socket_
&& socket_
->IsConnectedAndIdle();
205 int TCPSocketLibevent::Read(IOBuffer
* buf
,
207 const CompletionCallback
& callback
) {
209 DCHECK(!callback
.is_null());
211 int rv
= socket_
->Read(
213 base::Bind(&TCPSocketLibevent::ReadCompleted
,
214 // Grab a reference to |buf| so that ReadCompleted() can still
215 // use it when Read() completes, as otherwise, this transfers
216 // ownership of buf to socket.
217 base::Unretained(this), make_scoped_refptr(buf
), callback
));
219 RecordFastOpenStatus();
220 if (rv
!= ERR_IO_PENDING
)
221 rv
= HandleReadCompleted(buf
, rv
);
225 int TCPSocketLibevent::Write(IOBuffer
* buf
,
227 const CompletionCallback
& callback
) {
229 DCHECK(!callback
.is_null());
231 CompletionCallback write_callback
=
232 base::Bind(&TCPSocketLibevent::WriteCompleted
,
233 // Grab a reference to |buf| so that WriteCompleted() can still
234 // use it when Write() completes, as otherwise, this transfers
235 // ownership of buf to socket.
236 base::Unretained(this), make_scoped_refptr(buf
), callback
);
238 if (use_tcp_fastopen_
&& !tcp_fastopen_connected_
) {
239 rv
= TcpFastOpenWrite(buf
, buf_len
, write_callback
);
241 rv
= socket_
->Write(buf
, buf_len
, write_callback
);
244 if (rv
!= ERR_IO_PENDING
)
245 rv
= HandleWriteCompleted(buf
, rv
);
249 int TCPSocketLibevent::GetLocalAddress(IPEndPoint
* address
) const {
253 return ERR_SOCKET_NOT_CONNECTED
;
255 SockaddrStorage storage
;
256 int rv
= socket_
->GetLocalAddress(&storage
);
260 if (!address
->FromSockAddr(storage
.addr
, storage
.addr_len
))
261 return ERR_ADDRESS_INVALID
;
266 int TCPSocketLibevent::GetPeerAddress(IPEndPoint
* address
) const {
270 return ERR_SOCKET_NOT_CONNECTED
;
272 SockaddrStorage storage
;
273 int rv
= socket_
->GetPeerAddress(&storage
);
277 if (!address
->FromSockAddr(storage
.addr
, storage
.addr_len
))
278 return ERR_ADDRESS_INVALID
;
283 int TCPSocketLibevent::SetDefaultOptionsForServer() {
285 return SetAddressReuse(true);
288 void TCPSocketLibevent::SetDefaultOptionsForClient() {
291 // This mirrors the behaviour on Windows. See the comment in
292 // tcp_socket_win.cc after searching for "NODELAY".
293 // If SetTCPNoDelay fails, we don't care.
294 SetTCPNoDelay(socket_
->socket_fd(), true);
296 // TCP keep alive wakes up the radio, which is expensive on mobile. Do not
297 // enable it there. It's useful to prevent TCP middleboxes from timing out
298 // connection mappings. Packets for timed out connection mappings at
299 // middleboxes will either lead to:
300 // a) Middleboxes sending TCP RSTs. It's up to higher layers to check for this
301 // and retry. The HTTP network transaction code does this.
302 // b) Middleboxes just drop the unrecognized TCP packet. This leads to the TCP
303 // stack retransmitting packets per TCP stack retransmission timeouts, which
304 // are very high (on the order of seconds). Given the number of
305 // retransmissions required before killing the connection, this can lead to
306 // tens of seconds or even minutes of delay, depending on OS.
307 #if !defined(OS_ANDROID) && !defined(OS_IOS)
308 const int kTCPKeepAliveSeconds
= 45;
310 SetTCPKeepAlive(socket_
->socket_fd(), true, kTCPKeepAliveSeconds
);
314 int TCPSocketLibevent::SetAddressReuse(bool allow
) {
317 // SO_REUSEADDR is useful for server sockets to bind to a recently unbound
318 // port. When a socket is closed, the end point changes its state to TIME_WAIT
319 // and wait for 2 MSL (maximum segment lifetime) to ensure the remote peer
320 // acknowledges its closure. For server sockets, it is usually safe to
321 // bind to a TIME_WAIT end point immediately, which is a widely adopted
324 // Note that on *nix, SO_REUSEADDR does not enable the TCP socket to bind to
325 // an end point that is already bound by another socket. To do that one must
326 // set SO_REUSEPORT instead. This option is not provided on Linux prior
329 // SO_REUSEPORT is provided in MacOS X and iOS.
330 int boolean_value
= allow
? 1 : 0;
331 int rv
= setsockopt(socket_
->socket_fd(), SOL_SOCKET
, SO_REUSEADDR
,
332 &boolean_value
, sizeof(boolean_value
));
334 return MapSystemError(errno
);
338 int TCPSocketLibevent::SetReceiveBufferSize(int32 size
) {
340 int rv
= setsockopt(socket_
->socket_fd(), SOL_SOCKET
, SO_RCVBUF
,
341 reinterpret_cast<const char*>(&size
), sizeof(size
));
342 return (rv
== 0) ? OK
: MapSystemError(errno
);
345 int TCPSocketLibevent::SetSendBufferSize(int32 size
) {
347 int rv
= setsockopt(socket_
->socket_fd(), SOL_SOCKET
, SO_SNDBUF
,
348 reinterpret_cast<const char*>(&size
), sizeof(size
));
349 return (rv
== 0) ? OK
: MapSystemError(errno
);
352 bool TCPSocketLibevent::SetKeepAlive(bool enable
, int delay
) {
354 return SetTCPKeepAlive(socket_
->socket_fd(), enable
, delay
);
357 bool TCPSocketLibevent::SetNoDelay(bool no_delay
) {
359 return SetTCPNoDelay(socket_
->socket_fd(), no_delay
);
362 void TCPSocketLibevent::Close() {
364 tcp_fastopen_connected_
= false;
365 fast_open_status_
= FAST_OPEN_STATUS_UNKNOWN
;
368 bool TCPSocketLibevent::UsingTCPFastOpen() const {
369 return use_tcp_fastopen_
;
372 bool TCPSocketLibevent::IsValid() const {
373 return socket_
!= NULL
&& socket_
->socket_fd() != kInvalidSocket
;
376 void TCPSocketLibevent::StartLoggingMultipleConnectAttempts(
377 const AddressList
& addresses
) {
378 if (!logging_multiple_connect_attempts_
) {
379 logging_multiple_connect_attempts_
= true;
380 LogConnectBegin(addresses
);
386 void TCPSocketLibevent::EndLoggingMultipleConnectAttempts(int net_error
) {
387 if (logging_multiple_connect_attempts_
) {
388 LogConnectEnd(net_error
);
389 logging_multiple_connect_attempts_
= false;
395 void TCPSocketLibevent::AcceptCompleted(
396 scoped_ptr
<TCPSocketLibevent
>* tcp_socket
,
398 const CompletionCallback
& callback
,
400 DCHECK_NE(ERR_IO_PENDING
, rv
);
401 callback
.Run(HandleAcceptCompleted(tcp_socket
, address
, rv
));
404 int TCPSocketLibevent::HandleAcceptCompleted(
405 scoped_ptr
<TCPSocketLibevent
>* tcp_socket
,
409 rv
= BuildTcpSocketLibevent(tcp_socket
, address
);
412 net_log_
.EndEvent(NetLog::TYPE_TCP_ACCEPT
,
413 CreateNetLogIPEndPointCallback(address
));
415 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT
, rv
);
421 int TCPSocketLibevent::BuildTcpSocketLibevent(
422 scoped_ptr
<TCPSocketLibevent
>* tcp_socket
,
423 IPEndPoint
* address
) {
424 DCHECK(accept_socket_
);
426 SockaddrStorage storage
;
427 if (accept_socket_
->GetPeerAddress(&storage
) != OK
||
428 !address
->FromSockAddr(storage
.addr
, storage
.addr_len
)) {
429 accept_socket_
.reset();
430 return ERR_ADDRESS_INVALID
;
433 tcp_socket
->reset(new TCPSocketLibevent(net_log_
.net_log(),
435 (*tcp_socket
)->socket_
.reset(accept_socket_
.release());
439 void TCPSocketLibevent::ConnectCompleted(const CompletionCallback
& callback
,
441 DCHECK_NE(ERR_IO_PENDING
, rv
);
442 callback
.Run(HandleConnectCompleted(rv
));
445 int TCPSocketLibevent::HandleConnectCompleted(int rv
) const {
446 // Log the end of this attempt (and any OS error it threw).
448 net_log_
.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT
,
449 NetLog::IntegerCallback("os_error", errno
));
451 net_log_
.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT
);
454 // Give a more specific error when the user is offline.
455 if (rv
== ERR_ADDRESS_UNREACHABLE
&& NetworkChangeNotifier::IsOffline())
456 rv
= ERR_INTERNET_DISCONNECTED
;
458 if (!logging_multiple_connect_attempts_
)
464 void TCPSocketLibevent::LogConnectBegin(const AddressList
& addresses
) const {
465 base::StatsCounter
connects("tcp.connect");
466 connects
.Increment();
468 net_log_
.BeginEvent(NetLog::TYPE_TCP_CONNECT
,
469 addresses
.CreateNetLogCallback());
472 void TCPSocketLibevent::LogConnectEnd(int net_error
) const {
473 if (net_error
!= OK
) {
474 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT
, net_error
);
478 UpdateConnectionTypeHistograms(CONNECTION_ANY
);
480 SockaddrStorage storage
;
481 int rv
= socket_
->GetLocalAddress(&storage
);
483 PLOG(ERROR
) << "GetLocalAddress() [rv: " << rv
<< "] error: ";
485 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT
, rv
);
489 net_log_
.EndEvent(NetLog::TYPE_TCP_CONNECT
,
490 CreateNetLogSourceAddressCallback(storage
.addr
,
494 void TCPSocketLibevent::ReadCompleted(const scoped_refptr
<IOBuffer
>& buf
,
495 const CompletionCallback
& callback
,
497 DCHECK_NE(ERR_IO_PENDING
, rv
);
498 // Records fast open status regardless of error in asynchronous case.
499 // TODO(rdsmith,jri): Change histogram name to indicate it could be called on
501 RecordFastOpenStatus();
502 callback
.Run(HandleReadCompleted(buf
, rv
));
505 int TCPSocketLibevent::HandleReadCompleted(IOBuffer
* buf
, int rv
) {
507 net_log_
.AddEvent(NetLog::TYPE_SOCKET_READ_ERROR
,
508 CreateNetLogSocketErrorCallback(rv
, errno
));
512 base::StatsCounter
read_bytes("tcp.read_bytes");
514 net_log_
.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED
, rv
,
519 void TCPSocketLibevent::WriteCompleted(const scoped_refptr
<IOBuffer
>& buf
,
520 const CompletionCallback
& callback
,
522 DCHECK_NE(ERR_IO_PENDING
, rv
);
523 callback
.Run(HandleWriteCompleted(buf
, rv
));
526 int TCPSocketLibevent::HandleWriteCompleted(IOBuffer
* buf
, int rv
) const {
528 net_log_
.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR
,
529 CreateNetLogSocketErrorCallback(rv
, errno
));
533 base::StatsCounter
write_bytes("tcp.write_bytes");
535 net_log_
.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT
, rv
,
540 int TCPSocketLibevent::TcpFastOpenWrite(
543 const CompletionCallback
& callback
) {
544 SockaddrStorage storage
;
545 int rv
= socket_
->GetPeerAddress(&storage
);
549 int flags
= 0x20000000; // Magic flag to enable TCP_FASTOPEN.
550 #if defined(OS_LINUX)
551 // sendto() will fail with EPIPE when the system doesn't support TCP Fast
552 // Open. Theoretically that shouldn't happen since the caller should check
553 // for system support on startup, but users may dynamically disable TCP Fast
555 flags
|= MSG_NOSIGNAL
;
556 #endif // defined(OS_LINUX)
557 rv
= HANDLE_EINTR(sendto(socket_
->socket_fd(),
563 tcp_fastopen_connected_
= true;
566 fast_open_status_
= FAST_OPEN_FAST_CONNECT_RETURN
;
570 DCHECK_NE(EPIPE
, errno
);
572 // If errno == EINPROGRESS, that means the kernel didn't have a cookie
573 // and would block. The kernel is internally doing a connect() though.
574 // Remap EINPROGRESS to EAGAIN so we treat this the same as our other
575 // asynchronous cases. Note that the user buffer has not been copied to
577 if (errno
== EINPROGRESS
) {
580 rv
= MapSystemError(errno
);
583 if (rv
!= ERR_IO_PENDING
) {
584 fast_open_status_
= FAST_OPEN_ERROR
;
588 fast_open_status_
= FAST_OPEN_SLOW_CONNECT_RETURN
;
589 return socket_
->WaitForWrite(buf
, buf_len
, callback
);
592 void TCPSocketLibevent::RecordFastOpenStatus() {
593 if (use_tcp_fastopen_
&&
594 (fast_open_status_
== FAST_OPEN_FAST_CONNECT_RETURN
||
595 fast_open_status_
== FAST_OPEN_SLOW_CONNECT_RETURN
)) {
596 DCHECK_NE(FAST_OPEN_STATUS_UNKNOWN
, fast_open_status_
);
597 bool getsockopt_success(false);
598 bool server_acked_data(false);
599 #if defined(TCP_INFO)
600 // Probe to see the if the socket used TCP Fast Open.
602 socklen_t info_len
= sizeof(tcp_info
);
604 getsockopt(socket_
->socket_fd(), IPPROTO_TCP
, TCP_INFO
,
605 &info
, &info_len
) == 0 &&
606 info_len
== sizeof(tcp_info
);
607 server_acked_data
= getsockopt_success
&&
608 (info
.tcpi_options
& TCPI_OPT_SYN_DATA
);
610 if (getsockopt_success
) {
611 if (fast_open_status_
== FAST_OPEN_FAST_CONNECT_RETURN
) {
612 fast_open_status_
= (server_acked_data
? FAST_OPEN_SYN_DATA_ACK
:
613 FAST_OPEN_SYN_DATA_NACK
);
615 fast_open_status_
= (server_acked_data
? FAST_OPEN_NO_SYN_DATA_ACK
:
616 FAST_OPEN_NO_SYN_DATA_NACK
);
619 fast_open_status_
= (fast_open_status_
== FAST_OPEN_FAST_CONNECT_RETURN
?
620 FAST_OPEN_SYN_DATA_FAILED
:
621 FAST_OPEN_NO_SYN_DATA_FAILED
);