1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/socket/tcp_socket_win.h"
9 #include "base/callback_helpers.h"
10 #include "base/logging.h"
11 #include "base/metrics/stats_counters.h"
12 #include "base/win/windows_version.h"
13 #include "net/base/address_list.h"
14 #include "net/base/connection_type_histograms.h"
15 #include "net/base/io_buffer.h"
16 #include "net/base/ip_endpoint.h"
17 #include "net/base/net_errors.h"
18 #include "net/base/net_util.h"
19 #include "net/base/network_change_notifier.h"
20 #include "net/base/winsock_init.h"
21 #include "net/base/winsock_util.h"
22 #include "net/socket/socket_descriptor.h"
23 #include "net/socket/socket_net_log_params.h"
29 const int kTCPKeepAliveSeconds
= 45;
31 bool SetSocketReceiveBufferSize(SOCKET socket
, int32 size
) {
32 int rv
= setsockopt(socket
, SOL_SOCKET
, SO_RCVBUF
,
33 reinterpret_cast<const char*>(&size
), sizeof(size
));
34 DCHECK(!rv
) << "Could not set socket receive buffer size: " << GetLastError();
38 bool SetSocketSendBufferSize(SOCKET socket
, int32 size
) {
39 int rv
= setsockopt(socket
, SOL_SOCKET
, SO_SNDBUF
,
40 reinterpret_cast<const char*>(&size
), sizeof(size
));
41 DCHECK(!rv
) << "Could not set socket send buffer size: " << GetLastError();
46 // The Nagle implementation on windows is governed by RFC 896. The idea
47 // behind Nagle is to reduce small packets on the network. When Nagle is
48 // enabled, if a partial packet has been sent, the TCP stack will disallow
49 // further *partial* packets until an ACK has been received from the other
50 // side. Good applications should always strive to send as much data as
51 // possible and avoid partial-packet sends. However, in most real world
52 // applications, there are edge cases where this does not happen, and two
53 // partial packets may be sent back to back. For a browser, it is NEVER
54 // a benefit to delay for an RTT before the second packet is sent.
56 // As a practical example in Chromium today, consider the case of a small
57 // POST. I have verified this:
58 // Client writes 649 bytes of header (partial packet #1)
59 // Client writes 50 bytes of POST data (partial packet #2)
60 // In the above example, with Nagle, a RTT delay is inserted between these
61 // two sends due to nagle. RTTs can easily be 100ms or more. The best
62 // fix is to make sure that for POSTing data, we write as much data as
63 // possible and minimize partial packets. We will fix that. But disabling
64 // Nagle also ensure we don't run into this delay in other edge cases.
66 // http://technet.microsoft.com/en-us/library/bb726981.aspx
67 bool DisableNagle(SOCKET socket
, bool disable
) {
68 BOOL val
= disable
? TRUE
: FALSE
;
69 int rv
= setsockopt(socket
, IPPROTO_TCP
, TCP_NODELAY
,
70 reinterpret_cast<const char*>(&val
),
72 DCHECK(!rv
) << "Could not disable nagle";
76 // Enable TCP Keep-Alive to prevent NAT routers from timing out TCP
77 // connections. See http://crbug.com/27400 for details.
78 bool SetTCPKeepAlive(SOCKET socket
, BOOL enable
, int delay_secs
) {
79 int delay
= delay_secs
* 1000;
80 struct tcp_keepalive keepalive_vals
= {
81 enable
? 1 : 0, // TCP keep-alive on.
82 delay
, // Delay seconds before sending first TCP keep-alive packet.
83 delay
, // Delay seconds between sending TCP keep-alive packets.
85 DWORD bytes_returned
= 0xABAB;
86 int rv
= WSAIoctl(socket
, SIO_KEEPALIVE_VALS
, &keepalive_vals
,
87 sizeof(keepalive_vals
), NULL
, 0,
88 &bytes_returned
, NULL
, NULL
);
89 DCHECK(!rv
) << "Could not enable TCP Keep-Alive for socket: " << socket
90 << " [error: " << WSAGetLastError() << "].";
92 // Disregard any failure in disabling nagle or enabling TCP Keep-Alive.
96 int MapConnectError(int os_error
) {
98 // connect fails with WSAEACCES when Windows Firewall blocks the
101 return ERR_NETWORK_ACCESS_DENIED
;
103 return ERR_CONNECTION_TIMED_OUT
;
105 int net_error
= MapSystemError(os_error
);
106 if (net_error
== ERR_FAILED
)
107 return ERR_CONNECTION_FAILED
; // More specific than ERR_FAILED.
109 // Give a more specific error when the user is offline.
110 if (net_error
== ERR_ADDRESS_UNREACHABLE
&&
111 NetworkChangeNotifier::IsOffline()) {
112 return ERR_INTERNET_DISCONNECTED
;
122 //-----------------------------------------------------------------------------
124 // This class encapsulates all the state that has to be preserved as long as
125 // there is a network IO operation in progress. If the owner TCPSocketWin is
126 // destroyed while an operation is in progress, the Core is detached and it
127 // lives until the operation completes and the OS doesn't reference any resource
128 // declared on this class anymore.
129 class TCPSocketWin::Core
: public base::RefCounted
<Core
> {
131 explicit Core(TCPSocketWin
* socket
);
133 // Start watching for the end of a read or write operation.
135 void WatchForWrite();
137 // The TCPSocketWin is going away.
138 void Detach() { socket_
= NULL
; }
140 // The separate OVERLAPPED variables for asynchronous operation.
141 // |read_overlapped_| is used for both Connect() and Read().
142 // |write_overlapped_| is only used for Write();
143 OVERLAPPED read_overlapped_
;
144 OVERLAPPED write_overlapped_
;
146 // The buffers used in Read() and Write().
147 scoped_refptr
<IOBuffer
> read_iobuffer_
;
148 scoped_refptr
<IOBuffer
> write_iobuffer_
;
149 int read_buffer_length_
;
150 int write_buffer_length_
;
152 bool non_blocking_reads_initialized_
;
155 friend class base::RefCounted
<Core
>;
157 class ReadDelegate
: public base::win::ObjectWatcher::Delegate
{
159 explicit ReadDelegate(Core
* core
) : core_(core
) {}
160 virtual ~ReadDelegate() {}
162 // base::ObjectWatcher::Delegate methods:
163 virtual void OnObjectSignaled(HANDLE object
);
169 class WriteDelegate
: public base::win::ObjectWatcher::Delegate
{
171 explicit WriteDelegate(Core
* core
) : core_(core
) {}
172 virtual ~WriteDelegate() {}
174 // base::ObjectWatcher::Delegate methods:
175 virtual void OnObjectSignaled(HANDLE object
);
183 // The socket that created this object.
184 TCPSocketWin
* socket_
;
186 // |reader_| handles the signals from |read_watcher_|.
187 ReadDelegate reader_
;
188 // |writer_| handles the signals from |write_watcher_|.
189 WriteDelegate writer_
;
191 // |read_watcher_| watches for events from Connect() and Read().
192 base::win::ObjectWatcher read_watcher_
;
193 // |write_watcher_| watches for events from Write();
194 base::win::ObjectWatcher write_watcher_
;
196 DISALLOW_COPY_AND_ASSIGN(Core
);
199 TCPSocketWin::Core::Core(TCPSocketWin
* socket
)
200 : read_buffer_length_(0),
201 write_buffer_length_(0),
202 non_blocking_reads_initialized_(false),
206 memset(&read_overlapped_
, 0, sizeof(read_overlapped_
));
207 memset(&write_overlapped_
, 0, sizeof(write_overlapped_
));
209 read_overlapped_
.hEvent
= WSACreateEvent();
210 write_overlapped_
.hEvent
= WSACreateEvent();
213 TCPSocketWin::Core::~Core() {
214 // Make sure the message loop is not watching this object anymore.
215 read_watcher_
.StopWatching();
216 write_watcher_
.StopWatching();
218 WSACloseEvent(read_overlapped_
.hEvent
);
219 memset(&read_overlapped_
, 0xaf, sizeof(read_overlapped_
));
220 WSACloseEvent(write_overlapped_
.hEvent
);
221 memset(&write_overlapped_
, 0xaf, sizeof(write_overlapped_
));
224 void TCPSocketWin::Core::WatchForRead() {
225 // We grab an extra reference because there is an IO operation in progress.
226 // Balanced in ReadDelegate::OnObjectSignaled().
228 read_watcher_
.StartWatching(read_overlapped_
.hEvent
, &reader_
);
231 void TCPSocketWin::Core::WatchForWrite() {
232 // We grab an extra reference because there is an IO operation in progress.
233 // Balanced in WriteDelegate::OnObjectSignaled().
235 write_watcher_
.StartWatching(write_overlapped_
.hEvent
, &writer_
);
238 void TCPSocketWin::Core::ReadDelegate::OnObjectSignaled(HANDLE object
) {
239 DCHECK_EQ(object
, core_
->read_overlapped_
.hEvent
);
240 if (core_
->socket_
) {
241 if (core_
->socket_
->waiting_connect_
)
242 core_
->socket_
->DidCompleteConnect();
244 core_
->socket_
->DidSignalRead();
250 void TCPSocketWin::Core::WriteDelegate::OnObjectSignaled(
252 DCHECK_EQ(object
, core_
->write_overlapped_
.hEvent
);
254 core_
->socket_
->DidCompleteWrite();
259 //-----------------------------------------------------------------------------
261 TCPSocketWin::TCPSocketWin(net::NetLog
* net_log
,
262 const net::NetLog::Source
& source
)
263 : socket_(INVALID_SOCKET
),
264 accept_event_(WSA_INVALID_EVENT
),
265 accept_socket_(NULL
),
266 accept_address_(NULL
),
267 waiting_connect_(false),
268 waiting_read_(false),
269 waiting_write_(false),
270 connect_os_error_(0),
271 logging_multiple_connect_attempts_(false),
272 net_log_(BoundNetLog::Make(net_log
, NetLog::SOURCE_SOCKET
)) {
273 net_log_
.BeginEvent(NetLog::TYPE_SOCKET_ALIVE
,
274 source
.ToEventParametersCallback());
278 TCPSocketWin::~TCPSocketWin() {
280 net_log_
.EndEvent(NetLog::TYPE_SOCKET_ALIVE
);
283 int TCPSocketWin::Open(AddressFamily family
) {
284 DCHECK(CalledOnValidThread());
285 DCHECK_EQ(socket_
, INVALID_SOCKET
);
287 socket_
= CreatePlatformSocket(ConvertAddressFamily(family
), SOCK_STREAM
,
289 if (socket_
== INVALID_SOCKET
) {
290 PLOG(ERROR
) << "CreatePlatformSocket() returned an error";
291 return MapSystemError(WSAGetLastError());
294 if (SetNonBlocking(socket_
)) {
295 int result
= MapSystemError(WSAGetLastError());
303 int TCPSocketWin::AdoptConnectedSocket(SOCKET socket
,
304 const IPEndPoint
& peer_address
) {
305 DCHECK(CalledOnValidThread());
306 DCHECK_EQ(socket_
, INVALID_SOCKET
);
311 if (SetNonBlocking(socket_
)) {
312 int result
= MapSystemError(WSAGetLastError());
317 core_
= new Core(this);
318 peer_address_
.reset(new IPEndPoint(peer_address
));
323 int TCPSocketWin::Bind(const IPEndPoint
& address
) {
324 DCHECK(CalledOnValidThread());
325 DCHECK_NE(socket_
, INVALID_SOCKET
);
327 SockaddrStorage storage
;
328 if (!address
.ToSockAddr(storage
.addr
, &storage
.addr_len
))
329 return ERR_ADDRESS_INVALID
;
331 int result
= bind(socket_
, storage
.addr
, storage
.addr_len
);
333 PLOG(ERROR
) << "bind() returned an error";
334 return MapSystemError(WSAGetLastError());
340 int TCPSocketWin::Listen(int backlog
) {
341 DCHECK(CalledOnValidThread());
342 DCHECK_GT(backlog
, 0);
343 DCHECK_NE(socket_
, INVALID_SOCKET
);
344 DCHECK_EQ(accept_event_
, WSA_INVALID_EVENT
);
346 accept_event_
= WSACreateEvent();
347 if (accept_event_
== WSA_INVALID_EVENT
) {
348 PLOG(ERROR
) << "WSACreateEvent()";
349 return MapSystemError(WSAGetLastError());
352 int result
= listen(socket_
, backlog
);
354 PLOG(ERROR
) << "listen() returned an error";
355 return MapSystemError(WSAGetLastError());
361 int TCPSocketWin::Accept(scoped_ptr
<TCPSocketWin
>* socket
,
363 const CompletionCallback
& callback
) {
364 DCHECK(CalledOnValidThread());
367 DCHECK(!callback
.is_null());
368 DCHECK(accept_callback_
.is_null());
370 net_log_
.BeginEvent(NetLog::TYPE_TCP_ACCEPT
);
372 int result
= AcceptInternal(socket
, address
);
374 if (result
== ERR_IO_PENDING
) {
376 WSAEventSelect(socket_
, accept_event_
, FD_ACCEPT
);
377 accept_watcher_
.StartWatching(accept_event_
, this);
379 accept_socket_
= socket
;
380 accept_address_
= address
;
381 accept_callback_
= callback
;
387 int TCPSocketWin::Connect(const IPEndPoint
& address
,
388 const CompletionCallback
& callback
) {
389 DCHECK(CalledOnValidThread());
390 DCHECK_NE(socket_
, INVALID_SOCKET
);
391 DCHECK(!waiting_connect_
);
393 // |peer_address_| and |core_| will be non-NULL if Connect() has been called.
394 // Unless Close() is called to reset the internal state, a second call to
395 // Connect() is not allowed.
396 // Please note that we enforce this even if the previous Connect() has
397 // completed and failed. Although it is allowed to connect the same |socket_|
398 // again after a connection attempt failed on Windows, it results in
399 // unspecified behavior according to POSIX. Therefore, we make it behave in
400 // the same way as TCPSocketLibevent.
401 DCHECK(!peer_address_
&& !core_
);
403 if (!logging_multiple_connect_attempts_
)
404 LogConnectBegin(AddressList(address
));
406 peer_address_
.reset(new IPEndPoint(address
));
408 int rv
= DoConnect();
409 if (rv
== ERR_IO_PENDING
) {
410 // Synchronous operation not supported.
411 DCHECK(!callback
.is_null());
412 read_callback_
= callback
;
413 waiting_connect_
= true;
415 DoConnectComplete(rv
);
421 bool TCPSocketWin::IsConnected() const {
422 DCHECK(CalledOnValidThread());
424 if (socket_
== INVALID_SOCKET
|| waiting_connect_
)
430 // Check if connection is alive.
432 int rv
= recv(socket_
, &c
, 1, MSG_PEEK
);
435 if (rv
== SOCKET_ERROR
&& WSAGetLastError() != WSAEWOULDBLOCK
)
441 bool TCPSocketWin::IsConnectedAndIdle() const {
442 DCHECK(CalledOnValidThread());
444 if (socket_
== INVALID_SOCKET
|| waiting_connect_
)
450 // Check if connection is alive and we haven't received any data
453 int rv
= recv(socket_
, &c
, 1, MSG_PEEK
);
456 if (WSAGetLastError() != WSAEWOULDBLOCK
)
462 int TCPSocketWin::Read(IOBuffer
* buf
,
464 const CompletionCallback
& callback
) {
465 DCHECK(CalledOnValidThread());
466 DCHECK_NE(socket_
, INVALID_SOCKET
);
467 DCHECK(!waiting_read_
);
468 DCHECK(read_callback_
.is_null());
469 DCHECK(!core_
->read_iobuffer_
);
471 return DoRead(buf
, buf_len
, callback
);
474 int TCPSocketWin::Write(IOBuffer
* buf
,
476 const CompletionCallback
& callback
) {
477 DCHECK(CalledOnValidThread());
478 DCHECK_NE(socket_
, INVALID_SOCKET
);
479 DCHECK(!waiting_write_
);
480 DCHECK(write_callback_
.is_null());
481 DCHECK_GT(buf_len
, 0);
482 DCHECK(!core_
->write_iobuffer_
);
484 base::StatsCounter
writes("tcp.writes");
488 write_buffer
.len
= buf_len
;
489 write_buffer
.buf
= buf
->data();
491 // TODO(wtc): Remove the assertion after enough testing.
492 AssertEventNotSignaled(core_
->write_overlapped_
.hEvent
);
494 int rv
= WSASend(socket_
, &write_buffer
, 1, &num
, 0,
495 &core_
->write_overlapped_
, NULL
);
497 if (ResetEventIfSignaled(core_
->write_overlapped_
.hEvent
)) {
498 rv
= static_cast<int>(num
);
499 if (rv
> buf_len
|| rv
< 0) {
500 // It seems that some winsock interceptors report that more was written
501 // than was available. Treat this as an error. http://crbug.com/27870
502 LOG(ERROR
) << "Detected broken LSP: Asked to write " << buf_len
503 << " bytes, but " << rv
<< " bytes reported.";
504 return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES
;
506 base::StatsCounter
write_bytes("tcp.write_bytes");
508 net_log_
.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT
, rv
,
513 int os_error
= WSAGetLastError();
514 if (os_error
!= WSA_IO_PENDING
) {
515 int net_error
= MapSystemError(os_error
);
516 net_log_
.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR
,
517 CreateNetLogSocketErrorCallback(net_error
, os_error
));
521 waiting_write_
= true;
522 write_callback_
= callback
;
523 core_
->write_iobuffer_
= buf
;
524 core_
->write_buffer_length_
= buf_len
;
525 core_
->WatchForWrite();
526 return ERR_IO_PENDING
;
529 int TCPSocketWin::GetLocalAddress(IPEndPoint
* address
) const {
530 DCHECK(CalledOnValidThread());
533 SockaddrStorage storage
;
534 if (getsockname(socket_
, storage
.addr
, &storage
.addr_len
))
535 return MapSystemError(WSAGetLastError());
536 if (!address
->FromSockAddr(storage
.addr
, storage
.addr_len
))
537 return ERR_ADDRESS_INVALID
;
542 int TCPSocketWin::GetPeerAddress(IPEndPoint
* address
) const {
543 DCHECK(CalledOnValidThread());
546 return ERR_SOCKET_NOT_CONNECTED
;
547 *address
= *peer_address_
;
551 int TCPSocketWin::SetDefaultOptionsForServer() {
552 return SetExclusiveAddrUse();
555 void TCPSocketWin::SetDefaultOptionsForClient() {
556 // Increase the socket buffer sizes from the default sizes for WinXP. In
557 // performance testing, there is substantial benefit by increasing from 8KB
560 // http://support.microsoft.com/kb/823764/EN-US
561 // On Vista, if we manually set these sizes, Vista turns off its receive
562 // window auto-tuning feature.
563 // http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx
564 // Since Vista's auto-tune is better than any static value we can could set,
565 // only change these on pre-vista machines.
566 if (base::win::GetVersion() < base::win::VERSION_VISTA
) {
567 const int32 kSocketBufferSize
= 64 * 1024;
568 SetSocketReceiveBufferSize(socket_
, kSocketBufferSize
);
569 SetSocketSendBufferSize(socket_
, kSocketBufferSize
);
572 DisableNagle(socket_
, true);
573 SetTCPKeepAlive(socket_
, true, kTCPKeepAliveSeconds
);
576 int TCPSocketWin::SetExclusiveAddrUse() {
577 // On Windows, a bound end point can be hijacked by another process by
578 // setting SO_REUSEADDR. Therefore a Windows-only option SO_EXCLUSIVEADDRUSE
579 // was introduced in Windows NT 4.0 SP4. If the socket that is bound to the
580 // end point has SO_EXCLUSIVEADDRUSE enabled, it is not possible for another
581 // socket to forcibly bind to the end point until the end point is unbound.
582 // It is recommend that all server applications must use SO_EXCLUSIVEADDRUSE.
583 // MSDN: http://goo.gl/M6fjQ.
585 // Unlike on *nix, on Windows a TCP server socket can always bind to an end
586 // point in TIME_WAIT state without setting SO_REUSEADDR, therefore it is not
589 // SO_EXCLUSIVEADDRUSE will prevent a TCP client socket from binding to an end
590 // point in TIME_WAIT status. It does not have this effect for a TCP server
594 int rv
= setsockopt(socket_
, SOL_SOCKET
, SO_EXCLUSIVEADDRUSE
,
595 reinterpret_cast<const char*>(&true_value
),
598 return MapSystemError(errno
);
602 bool TCPSocketWin::SetReceiveBufferSize(int32 size
) {
603 DCHECK(CalledOnValidThread());
604 return SetSocketReceiveBufferSize(socket_
, size
);
607 bool TCPSocketWin::SetSendBufferSize(int32 size
) {
608 DCHECK(CalledOnValidThread());
609 return SetSocketSendBufferSize(socket_
, size
);
612 bool TCPSocketWin::SetKeepAlive(bool enable
, int delay
) {
613 return SetTCPKeepAlive(socket_
, enable
, delay
);
616 bool TCPSocketWin::SetNoDelay(bool no_delay
) {
617 return DisableNagle(socket_
, no_delay
);
620 void TCPSocketWin::Close() {
621 DCHECK(CalledOnValidThread());
623 if (socket_
!= INVALID_SOCKET
) {
624 // Note: don't use CancelIo to cancel pending IO because it doesn't work
625 // when there is a Winsock layered service provider.
627 // In most socket implementations, closing a socket results in a graceful
628 // connection shutdown, but in Winsock we have to call shutdown explicitly.
629 // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure"
630 // at http://msdn.microsoft.com/en-us/library/ms738547.aspx
631 shutdown(socket_
, SD_SEND
);
633 // This cancels any pending IO.
634 if (closesocket(socket_
) < 0)
635 PLOG(ERROR
) << "closesocket";
636 socket_
= INVALID_SOCKET
;
639 if (!accept_callback_
.is_null()) {
640 accept_watcher_
.StopWatching();
641 accept_socket_
= NULL
;
642 accept_address_
= NULL
;
643 accept_callback_
.Reset();
647 WSACloseEvent(accept_event_
);
648 accept_event_
= WSA_INVALID_EVENT
;
652 if (waiting_connect_
) {
653 // We closed the socket, so this notification will never come.
654 // From MSDN' WSAEventSelect documentation:
655 // "Closing a socket with closesocket also cancels the association and
656 // selection of network events specified in WSAEventSelect for the
664 waiting_connect_
= false;
665 waiting_read_
= false;
666 waiting_write_
= false;
668 read_callback_
.Reset();
669 write_callback_
.Reset();
670 peer_address_
.reset();
671 connect_os_error_
= 0;
674 bool TCPSocketWin::UsingTCPFastOpen() const {
675 // Not supported on windows.
679 void TCPSocketWin::StartLoggingMultipleConnectAttempts(
680 const AddressList
& addresses
) {
681 if (!logging_multiple_connect_attempts_
) {
682 logging_multiple_connect_attempts_
= true;
683 LogConnectBegin(addresses
);
689 void TCPSocketWin::EndLoggingMultipleConnectAttempts(int net_error
) {
690 if (logging_multiple_connect_attempts_
) {
691 LogConnectEnd(net_error
);
692 logging_multiple_connect_attempts_
= false;
698 int TCPSocketWin::AcceptInternal(scoped_ptr
<TCPSocketWin
>* socket
,
699 IPEndPoint
* address
) {
700 SockaddrStorage storage
;
701 int new_socket
= accept(socket_
, storage
.addr
, &storage
.addr_len
);
702 if (new_socket
< 0) {
703 int net_error
= MapSystemError(WSAGetLastError());
704 if (net_error
!= ERR_IO_PENDING
)
705 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT
, net_error
);
709 IPEndPoint ip_end_point
;
710 if (!ip_end_point
.FromSockAddr(storage
.addr
, storage
.addr_len
)) {
712 if (closesocket(new_socket
) < 0)
713 PLOG(ERROR
) << "closesocket";
714 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT
, ERR_FAILED
);
717 scoped_ptr
<TCPSocketWin
> tcp_socket(new TCPSocketWin(
718 net_log_
.net_log(), net_log_
.source()));
719 int adopt_result
= tcp_socket
->AdoptConnectedSocket(new_socket
, ip_end_point
);
720 if (adopt_result
!= OK
) {
721 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT
, adopt_result
);
724 *socket
= tcp_socket
.Pass();
725 *address
= ip_end_point
;
726 net_log_
.EndEvent(NetLog::TYPE_TCP_ACCEPT
,
727 CreateNetLogIPEndPointCallback(&ip_end_point
));
731 void TCPSocketWin::OnObjectSignaled(HANDLE object
) {
733 if (WSAEnumNetworkEvents(socket_
, accept_event_
, &ev
) == SOCKET_ERROR
) {
734 PLOG(ERROR
) << "WSAEnumNetworkEvents()";
738 if (ev
.lNetworkEvents
& FD_ACCEPT
) {
739 int result
= AcceptInternal(accept_socket_
, accept_address_
);
740 if (result
!= ERR_IO_PENDING
) {
741 accept_socket_
= NULL
;
742 accept_address_
= NULL
;
743 base::ResetAndReturn(&accept_callback_
).Run(result
);
746 // This happens when a client opens a connection and closes it before we
747 // have a chance to accept it.
748 DCHECK(ev
.lNetworkEvents
== 0);
750 // Start watching the next FD_ACCEPT event.
751 WSAEventSelect(socket_
, accept_event_
, FD_ACCEPT
);
752 accept_watcher_
.StartWatching(accept_event_
, this);
756 int TCPSocketWin::DoConnect() {
757 DCHECK_EQ(connect_os_error_
, 0);
760 net_log_
.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT
,
761 CreateNetLogIPEndPointCallback(peer_address_
.get()));
763 core_
= new Core(this);
764 // WSAEventSelect sets the socket to non-blocking mode as a side effect.
765 // Our connect() and recv() calls require that the socket be non-blocking.
766 WSAEventSelect(socket_
, core_
->read_overlapped_
.hEvent
, FD_CONNECT
);
768 SockaddrStorage storage
;
769 if (!peer_address_
->ToSockAddr(storage
.addr
, &storage
.addr_len
))
770 return ERR_INVALID_ARGUMENT
;
771 if (!connect(socket_
, storage
.addr
, storage
.addr_len
)) {
772 // Connected without waiting!
774 // The MSDN page for connect says:
775 // With a nonblocking socket, the connection attempt cannot be completed
776 // immediately. In this case, connect will return SOCKET_ERROR, and
777 // WSAGetLastError will return WSAEWOULDBLOCK.
778 // which implies that for a nonblocking socket, connect never returns 0.
779 // It's not documented whether the event object will be signaled or not
780 // if connect does return 0. So the code below is essentially dead code
781 // and we don't know if it's correct.
784 if (ResetEventIfSignaled(core_
->read_overlapped_
.hEvent
))
787 int os_error
= WSAGetLastError();
788 if (os_error
!= WSAEWOULDBLOCK
) {
789 LOG(ERROR
) << "connect failed: " << os_error
;
790 connect_os_error_
= os_error
;
791 int rv
= MapConnectError(os_error
);
792 CHECK_NE(ERR_IO_PENDING
, rv
);
797 core_
->WatchForRead();
798 return ERR_IO_PENDING
;
801 void TCPSocketWin::DoConnectComplete(int result
) {
802 // Log the end of this attempt (and any OS error it threw).
803 int os_error
= connect_os_error_
;
804 connect_os_error_
= 0;
806 net_log_
.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT
,
807 NetLog::IntegerCallback("os_error", os_error
));
809 net_log_
.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT
);
812 if (!logging_multiple_connect_attempts_
)
813 LogConnectEnd(result
);
816 void TCPSocketWin::LogConnectBegin(const AddressList
& addresses
) {
817 base::StatsCounter
connects("tcp.connect");
818 connects
.Increment();
820 net_log_
.BeginEvent(NetLog::TYPE_TCP_CONNECT
,
821 addresses
.CreateNetLogCallback());
824 void TCPSocketWin::LogConnectEnd(int net_error
) {
826 UpdateConnectionTypeHistograms(CONNECTION_ANY
);
828 if (net_error
!= OK
) {
829 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT
, net_error
);
833 struct sockaddr_storage source_address
;
834 socklen_t addrlen
= sizeof(source_address
);
835 int rv
= getsockname(
836 socket_
, reinterpret_cast<struct sockaddr
*>(&source_address
), &addrlen
);
838 LOG(ERROR
) << "getsockname() [rv: " << rv
839 << "] error: " << WSAGetLastError();
841 net_log_
.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT
, rv
);
846 NetLog::TYPE_TCP_CONNECT
,
847 CreateNetLogSourceAddressCallback(
848 reinterpret_cast<const struct sockaddr
*>(&source_address
),
849 sizeof(source_address
)));
852 int TCPSocketWin::DoRead(IOBuffer
* buf
, int buf_len
,
853 const CompletionCallback
& callback
) {
854 if (!core_
->non_blocking_reads_initialized_
) {
855 WSAEventSelect(socket_
, core_
->read_overlapped_
.hEvent
,
857 core_
->non_blocking_reads_initialized_
= true;
859 int rv
= recv(socket_
, buf
->data(), buf_len
, 0);
860 if (rv
== SOCKET_ERROR
) {
861 int os_error
= WSAGetLastError();
862 if (os_error
!= WSAEWOULDBLOCK
) {
863 int net_error
= MapSystemError(os_error
);
865 NetLog::TYPE_SOCKET_READ_ERROR
,
866 CreateNetLogSocketErrorCallback(net_error
, os_error
));
870 base::StatsCounter
read_bytes("tcp.read_bytes");
873 net_log_
.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED
, rv
,
878 waiting_read_
= true;
879 read_callback_
= callback
;
880 core_
->read_iobuffer_
= buf
;
881 core_
->read_buffer_length_
= buf_len
;
882 core_
->WatchForRead();
883 return ERR_IO_PENDING
;
886 void TCPSocketWin::DidCompleteConnect() {
887 DCHECK(waiting_connect_
);
888 DCHECK(!read_callback_
.is_null());
891 WSANETWORKEVENTS events
;
892 int rv
= WSAEnumNetworkEvents(socket_
, core_
->read_overlapped_
.hEvent
,
895 if (rv
== SOCKET_ERROR
) {
897 os_error
= WSAGetLastError();
898 result
= MapSystemError(os_error
);
899 } else if (events
.lNetworkEvents
& FD_CONNECT
) {
900 os_error
= events
.iErrorCode
[FD_CONNECT_BIT
];
901 result
= MapConnectError(os_error
);
904 result
= ERR_UNEXPECTED
;
907 connect_os_error_
= os_error
;
908 DoConnectComplete(result
);
909 waiting_connect_
= false;
911 DCHECK_NE(result
, ERR_IO_PENDING
);
912 base::ResetAndReturn(&read_callback_
).Run(result
);
915 void TCPSocketWin::DidCompleteWrite() {
916 DCHECK(waiting_write_
);
917 DCHECK(!write_callback_
.is_null());
919 DWORD num_bytes
, flags
;
920 BOOL ok
= WSAGetOverlappedResult(socket_
, &core_
->write_overlapped_
,
921 &num_bytes
, FALSE
, &flags
);
922 WSAResetEvent(core_
->write_overlapped_
.hEvent
);
923 waiting_write_
= false;
926 int os_error
= WSAGetLastError();
927 rv
= MapSystemError(os_error
);
928 net_log_
.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR
,
929 CreateNetLogSocketErrorCallback(rv
, os_error
));
931 rv
= static_cast<int>(num_bytes
);
932 if (rv
> core_
->write_buffer_length_
|| rv
< 0) {
933 // It seems that some winsock interceptors report that more was written
934 // than was available. Treat this as an error. http://crbug.com/27870
935 LOG(ERROR
) << "Detected broken LSP: Asked to write "
936 << core_
->write_buffer_length_
<< " bytes, but " << rv
937 << " bytes reported.";
938 rv
= ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES
;
940 base::StatsCounter
write_bytes("tcp.write_bytes");
941 write_bytes
.Add(num_bytes
);
942 net_log_
.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT
, num_bytes
,
943 core_
->write_iobuffer_
->data());
947 core_
->write_iobuffer_
= NULL
;
949 DCHECK_NE(rv
, ERR_IO_PENDING
);
950 base::ResetAndReturn(&write_callback_
).Run(rv
);
953 void TCPSocketWin::DidSignalRead() {
954 DCHECK(waiting_read_
);
955 DCHECK(!read_callback_
.is_null());
958 WSANETWORKEVENTS network_events
;
959 int rv
= WSAEnumNetworkEvents(socket_
, core_
->read_overlapped_
.hEvent
,
961 if (rv
== SOCKET_ERROR
) {
962 os_error
= WSAGetLastError();
963 rv
= MapSystemError(os_error
);
964 } else if (network_events
.lNetworkEvents
) {
965 DCHECK_EQ(network_events
.lNetworkEvents
& ~(FD_READ
| FD_CLOSE
), 0);
966 // If network_events.lNetworkEvents is FD_CLOSE and
967 // network_events.iErrorCode[FD_CLOSE_BIT] is 0, it is a graceful
968 // connection closure. It is tempting to directly set rv to 0 in
969 // this case, but the MSDN pages for WSAEventSelect and
970 // WSAAsyncSelect recommend we still call DoRead():
971 // FD_CLOSE should only be posted after all data is read from a
972 // socket, but an application should check for remaining data upon
973 // receipt of FD_CLOSE to avoid any possibility of losing data.
975 // If network_events.iErrorCode[FD_READ_BIT] or
976 // network_events.iErrorCode[FD_CLOSE_BIT] is nonzero, still call
977 // DoRead() because recv() reports a more accurate error code
978 // (WSAECONNRESET vs. WSAECONNABORTED) when the connection was
980 rv
= DoRead(core_
->read_iobuffer_
, core_
->read_buffer_length_
,
982 if (rv
== ERR_IO_PENDING
)
985 // This may happen because Read() may succeed synchronously and
986 // consume all the received data without resetting the event object.
987 core_
->WatchForRead();
991 waiting_read_
= false;
992 core_
->read_iobuffer_
= NULL
;
993 core_
->read_buffer_length_
= 0;
995 DCHECK_NE(rv
, ERR_IO_PENDING
);
996 base::ResetAndReturn(&read_callback_
).Run(rv
);