1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/socket/tcp_socket.h"
8 #include <netinet/tcp.h>
9 #include <sys/socket.h>
11 #include "base/bind.h"
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "base/metrics/stats_counters.h"
15 #include "base/posix/eintr_wrapper.h"
16 #include "net/base/address_list.h"
17 #include "net/base/connection_type_histograms.h"
18 #include "net/base/io_buffer.h"
19 #include "net/base/ip_endpoint.h"
20 #include "net/base/net_errors.h"
21 #include "net/base/net_util.h"
22 #include "net/base/network_change_notifier.h"
23 #include "net/socket/socket_libevent.h"
24 #include "net/socket/socket_net_log_params.h"
26 // If we don't have a definition for TCPI_OPT_SYN_DATA, create one.
27 #ifndef TCPI_OPT_SYN_DATA
28 #define TCPI_OPT_SYN_DATA 32
35 // SetTCPNoDelay turns on/off buffering in the kernel. By default, TCP sockets
36 // will wait up to 200ms for more data to complete a packet before transmitting.
37 // After calling this function, the kernel will not wait. See TCP_NODELAY in
39 bool SetTCPNoDelay(int fd
, bool no_delay
) {
40 int on
= no_delay
? 1 : 0;
41 int error
= setsockopt(fd
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
45 // SetTCPKeepAlive sets SO_KEEPALIVE.
46 bool SetTCPKeepAlive(int fd
, bool enable
, int delay
) {
47 int on
= enable
? 1 : 0;
48 if (setsockopt(fd
, SOL_SOCKET
, SO_KEEPALIVE
, &on
, sizeof(on
))) {
49 PLOG(ERROR
) << "Failed to set SO_KEEPALIVE on fd: " << fd
;
53 // If we disabled TCP keep alive, our work is done here.
57 #if defined(OS_LINUX) || defined(OS_ANDROID)
58 // Set seconds until first TCP keep alive.
59 if (setsockopt(fd
, SOL_TCP
, TCP_KEEPIDLE
, &delay
, sizeof(delay
))) {
60 PLOG(ERROR
) << "Failed to set TCP_KEEPIDLE on fd: " << fd
;
63 // Set seconds between TCP keep alives.
64 if (setsockopt(fd
, SOL_TCP
, TCP_KEEPINTVL
, &delay
, sizeof(delay
))) {
65 PLOG(ERROR
) << "Failed to set TCP_KEEPINTVL on fd: " << fd
;
74 //-----------------------------------------------------------------------------
76 TCPSocketLibevent::TCPSocketLibevent(NetLog
* net_log
,
77 const NetLog::Source
& source
)
78 : use_tcp_fastopen_(IsTCPFastOpenEnabled()),
79 tcp_fastopen_connected_(false),
80 fast_open_status_(FAST_OPEN_STATUS_UNKNOWN
),
81 logging_multiple_connect_attempts_(false),
82 net_log_(BoundNetLog::Make(net_log
, NetLog::SOURCE_SOCKET
)) {
83 net_log_
.BeginEvent(NetLog::TYPE_SOCKET_ALIVE
,
84 source
.ToEventParametersCallback());
87 TCPSocketLibevent::~TCPSocketLibevent() {
88 net_log_
.EndEvent(NetLog::TYPE_SOCKET_ALIVE
);
89 if (tcp_fastopen_connected_
) {
90 UMA_HISTOGRAM_ENUMERATION("Net.TcpFastOpenSocketConnection",
91 fast_open_status_
, FAST_OPEN_MAX_VALUE
);
95 int TCPSocketLibevent::Open(AddressFamily family
) {
97 socket_
.reset(new SocketLibevent
);
98 int rv
= socket_
->Open(ConvertAddressFamily(family
));
104 int TCPSocketLibevent::AdoptConnectedSocket(int socket_fd
,
105 const IPEndPoint
& peer_address
) {
108 SockaddrStorage storage
;
109 if (!peer_address
.ToSockAddr(storage
.addr
, &storage
.addr_len
) &&
110 // For backward compatibility, allows the empty address.
111 !(peer_address
== IPEndPoint())) {
112 return ERR_ADDRESS_INVALID
;
115 socket_
.reset(new SocketLibevent
);
116 int rv
= socket_
->AdoptConnectedSocket(socket_fd
, storage
);
122 int TCPSocketLibevent::Bind(const IPEndPoint
& address
) {
125 SockaddrStorage storage
;
126 if (!address
.ToSockAddr(storage
.addr
, &storage
.addr_len
))
127 return ERR_ADDRESS_INVALID
;
129 return socket_
->Bind(storage
);
132 int TCPSocketLibevent::Listen(int backlog
) {
134 return socket_
->Listen(backlog
);
137 int TCPSocketLibevent::Accept(scoped_ptr
<TCPSocketLibevent
>* tcp_socket
,
139 const CompletionCallback
& callback
) {
141 DCHECK(!callback
.is_null());
143 DCHECK(!accept_socket_
);
145 net_log_
.BeginEvent(NetLog::TYPE_TCP_ACCEPT
);
147 int rv
= socket_
->Accept(
149 base::Bind(&TCPSocketLibevent::AcceptCompleted
,
150 base::Unretained(this), tcp_socket
, address
, callback
));
151 if (rv
!= ERR_IO_PENDING
)
152 rv
= HandleAcceptCompleted(tcp_socket
, address
, rv
);
156 int TCPSocketLibevent::Connect(const IPEndPoint
& address
,
157 const CompletionCallback
& callback
) {
160 if (!logging_multiple_connect_attempts_
)
161 LogConnectBegin(AddressList(address
));
163 net_log_
.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT
,
164 CreateNetLogIPEndPointCallback(&address
));
166 SockaddrStorage storage
;
167 if (!address
.ToSockAddr(storage
.addr
, &storage
.addr_len
))
168 return ERR_ADDRESS_INVALID
;
170 if (use_tcp_fastopen_
) {
171 // With TCP FastOpen, we pretend that the socket is connected.
172 DCHECK(!tcp_fastopen_connected_
);
173 socket_
->SetPeerAddress(storage
);
177 int rv
= socket_
->Connect(storage
,
178 base::Bind(&TCPSocketLibevent::ConnectCompleted
,
179 base::Unretained(this), callback
));
180 if (rv
!= ERR_IO_PENDING
)
181 rv
= HandleConnectCompleted(rv
);
185 bool TCPSocketLibevent::IsConnected() const {
189 if (use_tcp_fastopen_
&& !tcp_fastopen_connected_
&&
190 socket_
->HasPeerAddress()) {
191 // With TCP FastOpen, we pretend that the socket is connected.
192 // This allows GetPeerAddress() to return peer_address_.
196 return socket_
->IsConnected();
199 bool TCPSocketLibevent::IsConnectedAndIdle() const {
200 // TODO(wtc): should we also handle the TCP FastOpen case here,
201 // as we do in IsConnected()?
202 return socket_
&& socket_
->IsConnectedAndIdle();
205 int TCPSocketLibevent::Read(IOBuffer
* buf
,
207 const CompletionCallback
& callback
) {
209 DCHECK(!callback
.is_null());
211 int rv
= socket_
->Read(
213 base::Bind(&TCPSocketLibevent::ReadCompleted
,
214 base::Unretained(this), base::Unretained(buf
), callback
));
216 RecordFastOpenStatus();
217 if (rv
!= ERR_IO_PENDING
)
218 rv
= HandleReadCompleted(buf
, rv
);
222 int TCPSocketLibevent::Write(IOBuffer
* buf
,
224 const CompletionCallback
& callback
) {
226 DCHECK(!callback
.is_null());
228 CompletionCallback write_callback
=
229 base::Bind(&TCPSocketLibevent::WriteCompleted
,
230 base::Unretained(this), base::Unretained(buf
), callback
);
232 if (use_tcp_fastopen_
&& !tcp_fastopen_connected_
) {
233 rv
= TcpFastOpenWrite(buf
, buf_len
, write_callback
);
235 rv
= socket_
->Write(buf
, buf_len
, write_callback
);
238 if (rv
!= ERR_IO_PENDING
)
239 rv
= HandleWriteCompleted(buf
, rv
);
243 int TCPSocketLibevent::GetLocalAddress(IPEndPoint
* address
) const {
247 return ERR_SOCKET_NOT_CONNECTED
;
249 SockaddrStorage storage
;
250 int rv
= socket_
->GetLocalAddress(&storage
);
254 if (!address
->FromSockAddr(storage
.addr
, storage
.addr_len
))
255 return ERR_ADDRESS_INVALID
;
260 int TCPSocketLibevent::GetPeerAddress(IPEndPoint
* address
) const {
264 return ERR_SOCKET_NOT_CONNECTED
;
266 SockaddrStorage storage
;
267 int rv
= socket_
->GetPeerAddress(&storage
);
271 if (!address
->FromSockAddr(storage
.addr
, storage
.addr_len
))
272 return ERR_ADDRESS_INVALID
;
277 int TCPSocketLibevent::SetDefaultOptionsForServer() {
279 return SetAddressReuse(true);
282 void TCPSocketLibevent::SetDefaultOptionsForClient() {
285 // This mirrors the behaviour on Windows. See the comment in
286 // tcp_socket_win.cc after searching for "NODELAY".
287 // If SetTCPNoDelay fails, we don't care.
288 SetTCPNoDelay(socket_
->socket_fd(), true);
290 // TCP keep alive wakes up the radio, which is expensive on mobile. Do not
291 // enable it there. It's useful to prevent TCP middleboxes from timing out
292 // connection mappings. Packets for timed out connection mappings at
293 // middleboxes will either lead to:
294 // a) Middleboxes sending TCP RSTs. It's up to higher layers to check for this
295 // and retry. The HTTP network transaction code does this.
296 // b) Middleboxes just drop the unrecognized TCP packet. This leads to the TCP
297 // stack retransmitting packets per TCP stack retransmission timeouts, which
298 // are very high (on the order of seconds). Given the number of
299 // retransmissions required before killing the connection, this can lead to
300 // tens of seconds or even minutes of delay, depending on OS.
301 #if !defined(OS_ANDROID) && !defined(OS_IOS)
302 const int kTCPKeepAliveSeconds
= 45;
304 SetTCPKeepAlive(socket_
->socket_fd(), true, kTCPKeepAliveSeconds
);
308 int TCPSocketLibevent::SetAddressReuse(bool allow
) {
311 // SO_REUSEADDR is useful for server sockets to bind to a recently unbound
312 // port. When a socket is closed, the end point changes its state to TIME_WAIT
313 // and wait for 2 MSL (maximum segment lifetime) to ensure the remote peer
314 // acknowledges its closure. For server sockets, it is usually safe to
315 // bind to a TIME_WAIT end point immediately, which is a widely adopted
318 // Note that on *nix, SO_REUSEADDR does not enable the TCP socket to bind to
319 // an end point that is already bound by another socket. To do that one must
320 // set SO_REUSEPORT instead. This option is not provided on Linux prior
323 // SO_REUSEPORT is provided in MacOS X and iOS.
324 int boolean_value
= allow
? 1 : 0;
325 int rv
= setsockopt(socket_
->socket_fd(), SOL_SOCKET
, SO_REUSEADDR
,
326 &boolean_value
, sizeof(boolean_value
));
328 return MapSystemError(errno
);
332 int TCPSocketLibevent::SetReceiveBufferSize(int32 size
) {
334 int rv
= setsockopt(socket_
->socket_fd(), SOL_SOCKET
, SO_RCVBUF
,
335 reinterpret_cast<const char*>(&size
), sizeof(size
));
336 return (rv
== 0) ? OK
: MapSystemError(errno
);
339 int TCPSocketLibevent::SetSendBufferSize(int32 size
) {
341 int rv
= setsockopt(socket_
->socket_fd(), SOL_SOCKET
, SO_SNDBUF
,
342 reinterpret_cast<const char*>(&size
), sizeof(size
));
343 return (rv
== 0) ? OK
: MapSystemError(errno
);
346 bool TCPSocketLibevent::SetKeepAlive(bool enable
, int delay
) {
348 return SetTCPKeepAlive(socket_
->socket_fd(), enable
, delay
);
351 bool TCPSocketLibevent::SetNoDelay(bool no_delay
) {
353 return SetTCPNoDelay(socket_
->socket_fd(), no_delay
);
356 void TCPSocketLibevent::Close() {
358 tcp_fastopen_connected_
= false;
359 fast_open_status_
= FAST_OPEN_STATUS_UNKNOWN
;
362 bool TCPSocketLibevent::UsingTCPFastOpen() const {
363 return use_tcp_fastopen_
;
366 bool TCPSocketLibevent::IsValid() const {
367 return socket_
!= NULL
&& socket_
->socket_fd() != kInvalidSocket
;
370 void TCPSocketLibevent::StartLoggingMultipleConnectAttempts(
371 const AddressList
& addresses
) {
372 if (!logging_multiple_connect_attempts_
) {
373 logging_multiple_connect_attempts_
= true;
374 LogConnectBegin(addresses
);
380 void TCPSocketLibevent::EndLoggingMultipleConnectAttempts(int net_error
) {
381 if (logging_multiple_connect_attempts_
) {
382 LogConnectEnd(net_error
);
383 logging_multiple_connect_attempts_
= false;
389 void TCPSocketLibevent::AcceptCompleted(
390 scoped_ptr
<TCPSocketLibevent
>* tcp_socket
,
392 const CompletionCallback
& callback
,
394 DCHECK_NE(ERR_IO_PENDING
, rv
);
395 callback
.Run(HandleAcceptCompleted(tcp_socket
, address
, rv
));
398 int TCPSocketLibevent::HandleAcceptCompleted(
399 scoped_ptr
<TCPSocketLibevent
>* tcp_socket
,
403 rv
= BuildTcpSocketLibevent(tcp_socket
, address
);
406 net_log_
.EndEvent(NetLog::TYPE_TCP_ACCEPT
,
407 CreateNetLogIPEndPointCallback(address
));
409 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT
, rv
);
415 int TCPSocketLibevent::BuildTcpSocketLibevent(
416 scoped_ptr
<TCPSocketLibevent
>* tcp_socket
,
417 IPEndPoint
* address
) {
418 DCHECK(accept_socket_
);
420 SockaddrStorage storage
;
421 if (accept_socket_
->GetPeerAddress(&storage
) != OK
||
422 !address
->FromSockAddr(storage
.addr
, storage
.addr_len
)) {
423 accept_socket_
.reset();
424 return ERR_ADDRESS_INVALID
;
427 tcp_socket
->reset(new TCPSocketLibevent(net_log_
.net_log(),
429 (*tcp_socket
)->socket_
.reset(accept_socket_
.release());
433 void TCPSocketLibevent::ConnectCompleted(const CompletionCallback
& callback
,
435 DCHECK_NE(ERR_IO_PENDING
, rv
);
436 callback
.Run(HandleConnectCompleted(rv
));
439 int TCPSocketLibevent::HandleConnectCompleted(int rv
) const {
440 // Log the end of this attempt (and any OS error it threw).
442 net_log_
.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT
,
443 NetLog::IntegerCallback("os_error", errno
));
445 net_log_
.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT
);
448 // Give a more specific error when the user is offline.
449 if (rv
== ERR_ADDRESS_UNREACHABLE
&& NetworkChangeNotifier::IsOffline())
450 rv
= ERR_INTERNET_DISCONNECTED
;
452 if (!logging_multiple_connect_attempts_
)
458 void TCPSocketLibevent::LogConnectBegin(const AddressList
& addresses
) const {
459 base::StatsCounter
connects("tcp.connect");
460 connects
.Increment();
462 net_log_
.BeginEvent(NetLog::TYPE_TCP_CONNECT
,
463 addresses
.CreateNetLogCallback());
466 void TCPSocketLibevent::LogConnectEnd(int net_error
) const {
467 if (net_error
!= OK
) {
468 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT
, net_error
);
472 UpdateConnectionTypeHistograms(CONNECTION_ANY
);
474 SockaddrStorage storage
;
475 int rv
= socket_
->GetLocalAddress(&storage
);
477 PLOG(ERROR
) << "GetLocalAddress() [rv: " << rv
<< "] error: ";
479 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT
, rv
);
483 net_log_
.EndEvent(NetLog::TYPE_TCP_CONNECT
,
484 CreateNetLogSourceAddressCallback(storage
.addr
,
488 void TCPSocketLibevent::ReadCompleted(IOBuffer
* buf
,
489 const CompletionCallback
& callback
,
491 DCHECK_NE(ERR_IO_PENDING
, rv
);
492 // Records fast open status regardless of error in asynchronous case.
493 // TODO(rdsmith,jri): Change histogram name to indicate it could be called on
495 RecordFastOpenStatus();
496 callback
.Run(HandleReadCompleted(buf
, rv
));
499 int TCPSocketLibevent::HandleReadCompleted(IOBuffer
* buf
, int rv
) {
501 net_log_
.AddEvent(NetLog::TYPE_SOCKET_READ_ERROR
,
502 CreateNetLogSocketErrorCallback(rv
, errno
));
506 base::StatsCounter
read_bytes("tcp.read_bytes");
508 net_log_
.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED
, rv
,
513 void TCPSocketLibevent::WriteCompleted(IOBuffer
* buf
,
514 const CompletionCallback
& callback
,
516 DCHECK_NE(ERR_IO_PENDING
, rv
);
517 callback
.Run(HandleWriteCompleted(buf
, rv
));
520 int TCPSocketLibevent::HandleWriteCompleted(IOBuffer
* buf
, int rv
) const {
522 net_log_
.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR
,
523 CreateNetLogSocketErrorCallback(rv
, errno
));
527 base::StatsCounter
write_bytes("tcp.write_bytes");
529 net_log_
.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT
, rv
,
534 int TCPSocketLibevent::TcpFastOpenWrite(
537 const CompletionCallback
& callback
) {
538 SockaddrStorage storage
;
539 int rv
= socket_
->GetPeerAddress(&storage
);
543 int flags
= 0x20000000; // Magic flag to enable TCP_FASTOPEN.
544 #if defined(OS_LINUX)
545 // sendto() will fail with EPIPE when the system doesn't support TCP Fast
546 // Open. Theoretically that shouldn't happen since the caller should check
547 // for system support on startup, but users may dynamically disable TCP Fast
549 flags
|= MSG_NOSIGNAL
;
550 #endif // defined(OS_LINUX)
551 rv
= HANDLE_EINTR(sendto(socket_
->socket_fd(),
557 tcp_fastopen_connected_
= true;
560 fast_open_status_
= FAST_OPEN_FAST_CONNECT_RETURN
;
564 DCHECK_NE(EPIPE
, errno
);
566 // If errno == EINPROGRESS, that means the kernel didn't have a cookie
567 // and would block. The kernel is internally doing a connect() though.
568 // Remap EINPROGRESS to EAGAIN so we treat this the same as our other
569 // asynchronous cases. Note that the user buffer has not been copied to
571 if (errno
== EINPROGRESS
) {
574 rv
= MapSystemError(errno
);
577 if (rv
!= ERR_IO_PENDING
) {
578 fast_open_status_
= FAST_OPEN_ERROR
;
582 fast_open_status_
= FAST_OPEN_SLOW_CONNECT_RETURN
;
583 return socket_
->WaitForWrite(buf
, buf_len
, callback
);
586 void TCPSocketLibevent::RecordFastOpenStatus() {
587 if (use_tcp_fastopen_
&&
588 (fast_open_status_
== FAST_OPEN_FAST_CONNECT_RETURN
||
589 fast_open_status_
== FAST_OPEN_SLOW_CONNECT_RETURN
)) {
590 DCHECK_NE(FAST_OPEN_STATUS_UNKNOWN
, fast_open_status_
);
591 bool getsockopt_success(false);
592 bool server_acked_data(false);
593 #if defined(TCP_INFO)
594 // Probe to see the if the socket used TCP Fast Open.
596 socklen_t info_len
= sizeof(tcp_info
);
598 getsockopt(socket_
->socket_fd(), IPPROTO_TCP
, TCP_INFO
,
599 &info
, &info_len
) == 0 &&
600 info_len
== sizeof(tcp_info
);
601 server_acked_data
= getsockopt_success
&&
602 (info
.tcpi_options
& TCPI_OPT_SYN_DATA
);
604 if (getsockopt_success
) {
605 if (fast_open_status_
== FAST_OPEN_FAST_CONNECT_RETURN
) {
606 fast_open_status_
= (server_acked_data
? FAST_OPEN_SYN_DATA_ACK
:
607 FAST_OPEN_SYN_DATA_NACK
);
609 fast_open_status_
= (server_acked_data
? FAST_OPEN_NO_SYN_DATA_ACK
:
610 FAST_OPEN_NO_SYN_DATA_NACK
);
613 fast_open_status_
= (fast_open_status_
== FAST_OPEN_FAST_CONNECT_RETURN
?
614 FAST_OPEN_SYN_DATA_FAILED
:
615 FAST_OPEN_NO_SYN_DATA_FAILED
);