Updating trunk VERSION from 2139.0 to 2140.0
[chromium-blink-merge.git] / net / socket / tcp_socket_win.cc
blob88db36fd41c3da42f24b7eb17da0bab4d6a7ac52
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/socket/tcp_socket_win.h"
7 #include <mstcpip.h>
9 #include "base/callback_helpers.h"
10 #include "base/logging.h"
11 #include "base/metrics/stats_counters.h"
12 #include "base/win/windows_version.h"
13 #include "net/base/address_list.h"
14 #include "net/base/connection_type_histograms.h"
15 #include "net/base/io_buffer.h"
16 #include "net/base/ip_endpoint.h"
17 #include "net/base/net_errors.h"
18 #include "net/base/net_util.h"
19 #include "net/base/network_change_notifier.h"
20 #include "net/base/winsock_init.h"
21 #include "net/base/winsock_util.h"
22 #include "net/socket/socket_descriptor.h"
23 #include "net/socket/socket_net_log_params.h"
25 namespace net {
27 namespace {
29 const int kTCPKeepAliveSeconds = 45;
31 int SetSocketReceiveBufferSize(SOCKET socket, int32 size) {
32 int rv = setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
33 reinterpret_cast<const char*>(&size), sizeof(size));
34 int net_error = (rv == 0) ? OK : MapSystemError(WSAGetLastError());
35 DCHECK(!rv) << "Could not set socket receive buffer size: " << net_error;
36 return net_error;
39 int SetSocketSendBufferSize(SOCKET socket, int32 size) {
40 int rv = setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
41 reinterpret_cast<const char*>(&size), sizeof(size));
42 int net_error = (rv == 0) ? OK : MapSystemError(WSAGetLastError());
43 DCHECK(!rv) << "Could not set socket send buffer size: " << net_error;
44 return net_error;
47 // Disable Nagle.
48 // The Nagle implementation on windows is governed by RFC 896. The idea
49 // behind Nagle is to reduce small packets on the network. When Nagle is
50 // enabled, if a partial packet has been sent, the TCP stack will disallow
51 // further *partial* packets until an ACK has been received from the other
52 // side. Good applications should always strive to send as much data as
53 // possible and avoid partial-packet sends. However, in most real world
54 // applications, there are edge cases where this does not happen, and two
55 // partial packets may be sent back to back. For a browser, it is NEVER
56 // a benefit to delay for an RTT before the second packet is sent.
58 // As a practical example in Chromium today, consider the case of a small
59 // POST. I have verified this:
60 // Client writes 649 bytes of header (partial packet #1)
61 // Client writes 50 bytes of POST data (partial packet #2)
62 // In the above example, with Nagle, a RTT delay is inserted between these
63 // two sends due to nagle. RTTs can easily be 100ms or more. The best
64 // fix is to make sure that for POSTing data, we write as much data as
65 // possible and minimize partial packets. We will fix that. But disabling
66 // Nagle also ensure we don't run into this delay in other edge cases.
67 // See also:
68 // http://technet.microsoft.com/en-us/library/bb726981.aspx
69 bool DisableNagle(SOCKET socket, bool disable) {
70 BOOL val = disable ? TRUE : FALSE;
71 int rv = setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
72 reinterpret_cast<const char*>(&val),
73 sizeof(val));
74 DCHECK(!rv) << "Could not disable nagle";
75 return rv == 0;
78 // Enable TCP Keep-Alive to prevent NAT routers from timing out TCP
79 // connections. See http://crbug.com/27400 for details.
80 bool SetTCPKeepAlive(SOCKET socket, BOOL enable, int delay_secs) {
81 int delay = delay_secs * 1000;
82 struct tcp_keepalive keepalive_vals = {
83 enable ? 1 : 0, // TCP keep-alive on.
84 delay, // Delay seconds before sending first TCP keep-alive packet.
85 delay, // Delay seconds between sending TCP keep-alive packets.
87 DWORD bytes_returned = 0xABAB;
88 int rv = WSAIoctl(socket, SIO_KEEPALIVE_VALS, &keepalive_vals,
89 sizeof(keepalive_vals), NULL, 0,
90 &bytes_returned, NULL, NULL);
91 DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket
92 << " [error: " << WSAGetLastError() << "].";
94 // Disregard any failure in disabling nagle or enabling TCP Keep-Alive.
95 return rv == 0;
98 int MapConnectError(int os_error) {
99 switch (os_error) {
100 // connect fails with WSAEACCES when Windows Firewall blocks the
101 // connection.
102 case WSAEACCES:
103 return ERR_NETWORK_ACCESS_DENIED;
104 case WSAETIMEDOUT:
105 return ERR_CONNECTION_TIMED_OUT;
106 default: {
107 int net_error = MapSystemError(os_error);
108 if (net_error == ERR_FAILED)
109 return ERR_CONNECTION_FAILED; // More specific than ERR_FAILED.
111 // Give a more specific error when the user is offline.
112 if (net_error == ERR_ADDRESS_UNREACHABLE &&
113 NetworkChangeNotifier::IsOffline()) {
114 return ERR_INTERNET_DISCONNECTED;
117 return net_error;
122 } // namespace
124 //-----------------------------------------------------------------------------
126 // This class encapsulates all the state that has to be preserved as long as
127 // there is a network IO operation in progress. If the owner TCPSocketWin is
128 // destroyed while an operation is in progress, the Core is detached and it
129 // lives until the operation completes and the OS doesn't reference any resource
130 // declared on this class anymore.
131 class TCPSocketWin::Core : public base::RefCounted<Core> {
132 public:
133 explicit Core(TCPSocketWin* socket);
135 // Start watching for the end of a read or write operation.
136 void WatchForRead();
137 void WatchForWrite();
139 // The TCPSocketWin is going away.
140 void Detach() { socket_ = NULL; }
142 // The separate OVERLAPPED variables for asynchronous operation.
143 // |read_overlapped_| is used for both Connect() and Read().
144 // |write_overlapped_| is only used for Write();
145 OVERLAPPED read_overlapped_;
146 OVERLAPPED write_overlapped_;
148 // The buffers used in Read() and Write().
149 scoped_refptr<IOBuffer> read_iobuffer_;
150 scoped_refptr<IOBuffer> write_iobuffer_;
151 int read_buffer_length_;
152 int write_buffer_length_;
154 bool non_blocking_reads_initialized_;
156 private:
157 friend class base::RefCounted<Core>;
159 class ReadDelegate : public base::win::ObjectWatcher::Delegate {
160 public:
161 explicit ReadDelegate(Core* core) : core_(core) {}
162 virtual ~ReadDelegate() {}
164 // base::ObjectWatcher::Delegate methods:
165 virtual void OnObjectSignaled(HANDLE object);
167 private:
168 Core* const core_;
171 class WriteDelegate : public base::win::ObjectWatcher::Delegate {
172 public:
173 explicit WriteDelegate(Core* core) : core_(core) {}
174 virtual ~WriteDelegate() {}
176 // base::ObjectWatcher::Delegate methods:
177 virtual void OnObjectSignaled(HANDLE object);
179 private:
180 Core* const core_;
183 ~Core();
185 // The socket that created this object.
186 TCPSocketWin* socket_;
188 // |reader_| handles the signals from |read_watcher_|.
189 ReadDelegate reader_;
190 // |writer_| handles the signals from |write_watcher_|.
191 WriteDelegate writer_;
193 // |read_watcher_| watches for events from Connect() and Read().
194 base::win::ObjectWatcher read_watcher_;
195 // |write_watcher_| watches for events from Write();
196 base::win::ObjectWatcher write_watcher_;
198 DISALLOW_COPY_AND_ASSIGN(Core);
201 TCPSocketWin::Core::Core(TCPSocketWin* socket)
202 : read_buffer_length_(0),
203 write_buffer_length_(0),
204 non_blocking_reads_initialized_(false),
205 socket_(socket),
206 reader_(this),
207 writer_(this) {
208 memset(&read_overlapped_, 0, sizeof(read_overlapped_));
209 memset(&write_overlapped_, 0, sizeof(write_overlapped_));
211 read_overlapped_.hEvent = WSACreateEvent();
212 write_overlapped_.hEvent = WSACreateEvent();
215 TCPSocketWin::Core::~Core() {
216 // Make sure the message loop is not watching this object anymore.
217 read_watcher_.StopWatching();
218 write_watcher_.StopWatching();
220 WSACloseEvent(read_overlapped_.hEvent);
221 memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_));
222 WSACloseEvent(write_overlapped_.hEvent);
223 memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_));
226 void TCPSocketWin::Core::WatchForRead() {
227 // We grab an extra reference because there is an IO operation in progress.
228 // Balanced in ReadDelegate::OnObjectSignaled().
229 AddRef();
230 read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_);
233 void TCPSocketWin::Core::WatchForWrite() {
234 // We grab an extra reference because there is an IO operation in progress.
235 // Balanced in WriteDelegate::OnObjectSignaled().
236 AddRef();
237 write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_);
240 void TCPSocketWin::Core::ReadDelegate::OnObjectSignaled(HANDLE object) {
241 DCHECK_EQ(object, core_->read_overlapped_.hEvent);
242 if (core_->socket_) {
243 if (core_->socket_->waiting_connect_)
244 core_->socket_->DidCompleteConnect();
245 else
246 core_->socket_->DidSignalRead();
249 core_->Release();
252 void TCPSocketWin::Core::WriteDelegate::OnObjectSignaled(
253 HANDLE object) {
254 DCHECK_EQ(object, core_->write_overlapped_.hEvent);
255 if (core_->socket_)
256 core_->socket_->DidCompleteWrite();
258 core_->Release();
261 //-----------------------------------------------------------------------------
263 TCPSocketWin::TCPSocketWin(net::NetLog* net_log,
264 const net::NetLog::Source& source)
265 : socket_(INVALID_SOCKET),
266 accept_event_(WSA_INVALID_EVENT),
267 accept_socket_(NULL),
268 accept_address_(NULL),
269 waiting_connect_(false),
270 waiting_read_(false),
271 waiting_write_(false),
272 connect_os_error_(0),
273 logging_multiple_connect_attempts_(false),
274 net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) {
275 net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE,
276 source.ToEventParametersCallback());
277 EnsureWinsockInit();
280 TCPSocketWin::~TCPSocketWin() {
281 Close();
282 net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE);
285 int TCPSocketWin::Open(AddressFamily family) {
286 DCHECK(CalledOnValidThread());
287 DCHECK_EQ(socket_, INVALID_SOCKET);
289 socket_ = CreatePlatformSocket(ConvertAddressFamily(family), SOCK_STREAM,
290 IPPROTO_TCP);
291 if (socket_ == INVALID_SOCKET) {
292 PLOG(ERROR) << "CreatePlatformSocket() returned an error";
293 return MapSystemError(WSAGetLastError());
296 if (SetNonBlocking(socket_)) {
297 int result = MapSystemError(WSAGetLastError());
298 Close();
299 return result;
302 return OK;
305 int TCPSocketWin::AdoptConnectedSocket(SOCKET socket,
306 const IPEndPoint& peer_address) {
307 DCHECK(CalledOnValidThread());
308 DCHECK_EQ(socket_, INVALID_SOCKET);
309 DCHECK(!core_);
311 socket_ = socket;
313 if (SetNonBlocking(socket_)) {
314 int result = MapSystemError(WSAGetLastError());
315 Close();
316 return result;
319 core_ = new Core(this);
320 peer_address_.reset(new IPEndPoint(peer_address));
322 return OK;
325 int TCPSocketWin::AdoptListenSocket(SOCKET socket) {
326 DCHECK(CalledOnValidThread());
327 DCHECK_EQ(socket_, INVALID_SOCKET);
329 socket_ = socket;
331 if (SetNonBlocking(socket_)) {
332 int result = MapSystemError(WSAGetLastError());
333 Close();
334 return result;
337 // |core_| is not needed for sockets that are used to accept connections.
338 // The operation here is more like Open but with an existing socket.
340 return OK;
343 int TCPSocketWin::Bind(const IPEndPoint& address) {
344 DCHECK(CalledOnValidThread());
345 DCHECK_NE(socket_, INVALID_SOCKET);
347 SockaddrStorage storage;
348 if (!address.ToSockAddr(storage.addr, &storage.addr_len))
349 return ERR_ADDRESS_INVALID;
351 int result = bind(socket_, storage.addr, storage.addr_len);
352 if (result < 0) {
353 PLOG(ERROR) << "bind() returned an error";
354 return MapSystemError(WSAGetLastError());
357 return OK;
360 int TCPSocketWin::Listen(int backlog) {
361 DCHECK(CalledOnValidThread());
362 DCHECK_GT(backlog, 0);
363 DCHECK_NE(socket_, INVALID_SOCKET);
364 DCHECK_EQ(accept_event_, WSA_INVALID_EVENT);
366 accept_event_ = WSACreateEvent();
367 if (accept_event_ == WSA_INVALID_EVENT) {
368 PLOG(ERROR) << "WSACreateEvent()";
369 return MapSystemError(WSAGetLastError());
372 int result = listen(socket_, backlog);
373 if (result < 0) {
374 PLOG(ERROR) << "listen() returned an error";
375 return MapSystemError(WSAGetLastError());
378 return OK;
381 int TCPSocketWin::Accept(scoped_ptr<TCPSocketWin>* socket,
382 IPEndPoint* address,
383 const CompletionCallback& callback) {
384 DCHECK(CalledOnValidThread());
385 DCHECK(socket);
386 DCHECK(address);
387 DCHECK(!callback.is_null());
388 DCHECK(accept_callback_.is_null());
390 net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT);
392 int result = AcceptInternal(socket, address);
394 if (result == ERR_IO_PENDING) {
395 // Start watching.
396 WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
397 accept_watcher_.StartWatching(accept_event_, this);
399 accept_socket_ = socket;
400 accept_address_ = address;
401 accept_callback_ = callback;
404 return result;
407 int TCPSocketWin::Connect(const IPEndPoint& address,
408 const CompletionCallback& callback) {
409 DCHECK(CalledOnValidThread());
410 DCHECK_NE(socket_, INVALID_SOCKET);
411 DCHECK(!waiting_connect_);
413 // |peer_address_| and |core_| will be non-NULL if Connect() has been called.
414 // Unless Close() is called to reset the internal state, a second call to
415 // Connect() is not allowed.
416 // Please note that we enforce this even if the previous Connect() has
417 // completed and failed. Although it is allowed to connect the same |socket_|
418 // again after a connection attempt failed on Windows, it results in
419 // unspecified behavior according to POSIX. Therefore, we make it behave in
420 // the same way as TCPSocketLibevent.
421 DCHECK(!peer_address_ && !core_);
423 if (!logging_multiple_connect_attempts_)
424 LogConnectBegin(AddressList(address));
426 peer_address_.reset(new IPEndPoint(address));
428 int rv = DoConnect();
429 if (rv == ERR_IO_PENDING) {
430 // Synchronous operation not supported.
431 DCHECK(!callback.is_null());
432 read_callback_ = callback;
433 waiting_connect_ = true;
434 } else {
435 DoConnectComplete(rv);
438 return rv;
441 bool TCPSocketWin::IsConnected() const {
442 DCHECK(CalledOnValidThread());
444 if (socket_ == INVALID_SOCKET || waiting_connect_)
445 return false;
447 if (waiting_read_)
448 return true;
450 // Check if connection is alive.
451 char c;
452 int rv = recv(socket_, &c, 1, MSG_PEEK);
453 if (rv == 0)
454 return false;
455 if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK)
456 return false;
458 return true;
461 bool TCPSocketWin::IsConnectedAndIdle() const {
462 DCHECK(CalledOnValidThread());
464 if (socket_ == INVALID_SOCKET || waiting_connect_)
465 return false;
467 if (waiting_read_)
468 return true;
470 // Check if connection is alive and we haven't received any data
471 // unexpectedly.
472 char c;
473 int rv = recv(socket_, &c, 1, MSG_PEEK);
474 if (rv >= 0)
475 return false;
476 if (WSAGetLastError() != WSAEWOULDBLOCK)
477 return false;
479 return true;
482 int TCPSocketWin::Read(IOBuffer* buf,
483 int buf_len,
484 const CompletionCallback& callback) {
485 DCHECK(CalledOnValidThread());
486 DCHECK_NE(socket_, INVALID_SOCKET);
487 DCHECK(!waiting_read_);
488 DCHECK(read_callback_.is_null());
489 DCHECK(!core_->read_iobuffer_);
491 return DoRead(buf, buf_len, callback);
494 int TCPSocketWin::Write(IOBuffer* buf,
495 int buf_len,
496 const CompletionCallback& callback) {
497 DCHECK(CalledOnValidThread());
498 DCHECK_NE(socket_, INVALID_SOCKET);
499 DCHECK(!waiting_write_);
500 DCHECK(write_callback_.is_null());
501 DCHECK_GT(buf_len, 0);
502 DCHECK(!core_->write_iobuffer_);
504 base::StatsCounter writes("tcp.writes");
505 writes.Increment();
507 WSABUF write_buffer;
508 write_buffer.len = buf_len;
509 write_buffer.buf = buf->data();
511 // TODO(wtc): Remove the assertion after enough testing.
512 AssertEventNotSignaled(core_->write_overlapped_.hEvent);
513 DWORD num;
514 int rv = WSASend(socket_, &write_buffer, 1, &num, 0,
515 &core_->write_overlapped_, NULL);
516 if (rv == 0) {
517 if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) {
518 rv = static_cast<int>(num);
519 if (rv > buf_len || rv < 0) {
520 // It seems that some winsock interceptors report that more was written
521 // than was available. Treat this as an error. http://crbug.com/27870
522 LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len
523 << " bytes, but " << rv << " bytes reported.";
524 return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
526 base::StatsCounter write_bytes("tcp.write_bytes");
527 write_bytes.Add(rv);
528 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, rv,
529 buf->data());
530 return rv;
532 } else {
533 int os_error = WSAGetLastError();
534 if (os_error != WSA_IO_PENDING) {
535 int net_error = MapSystemError(os_error);
536 net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
537 CreateNetLogSocketErrorCallback(net_error, os_error));
538 return net_error;
541 waiting_write_ = true;
542 write_callback_ = callback;
543 core_->write_iobuffer_ = buf;
544 core_->write_buffer_length_ = buf_len;
545 core_->WatchForWrite();
546 return ERR_IO_PENDING;
549 int TCPSocketWin::GetLocalAddress(IPEndPoint* address) const {
550 DCHECK(CalledOnValidThread());
551 DCHECK(address);
553 SockaddrStorage storage;
554 if (getsockname(socket_, storage.addr, &storage.addr_len))
555 return MapSystemError(WSAGetLastError());
556 if (!address->FromSockAddr(storage.addr, storage.addr_len))
557 return ERR_ADDRESS_INVALID;
559 return OK;
562 int TCPSocketWin::GetPeerAddress(IPEndPoint* address) const {
563 DCHECK(CalledOnValidThread());
564 DCHECK(address);
565 if (!IsConnected())
566 return ERR_SOCKET_NOT_CONNECTED;
567 *address = *peer_address_;
568 return OK;
571 int TCPSocketWin::SetDefaultOptionsForServer() {
572 return SetExclusiveAddrUse();
575 void TCPSocketWin::SetDefaultOptionsForClient() {
576 // Increase the socket buffer sizes from the default sizes for WinXP. In
577 // performance testing, there is substantial benefit by increasing from 8KB
578 // to 64KB.
579 // See also:
580 // http://support.microsoft.com/kb/823764/EN-US
581 // On Vista, if we manually set these sizes, Vista turns off its receive
582 // window auto-tuning feature.
583 // http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx
584 // Since Vista's auto-tune is better than any static value we can could set,
585 // only change these on pre-vista machines.
586 if (base::win::GetVersion() < base::win::VERSION_VISTA) {
587 const int32 kSocketBufferSize = 64 * 1024;
588 SetSocketReceiveBufferSize(socket_, kSocketBufferSize);
589 SetSocketSendBufferSize(socket_, kSocketBufferSize);
592 DisableNagle(socket_, true);
593 SetTCPKeepAlive(socket_, true, kTCPKeepAliveSeconds);
596 int TCPSocketWin::SetExclusiveAddrUse() {
597 // On Windows, a bound end point can be hijacked by another process by
598 // setting SO_REUSEADDR. Therefore a Windows-only option SO_EXCLUSIVEADDRUSE
599 // was introduced in Windows NT 4.0 SP4. If the socket that is bound to the
600 // end point has SO_EXCLUSIVEADDRUSE enabled, it is not possible for another
601 // socket to forcibly bind to the end point until the end point is unbound.
602 // It is recommend that all server applications must use SO_EXCLUSIVEADDRUSE.
603 // MSDN: http://goo.gl/M6fjQ.
605 // Unlike on *nix, on Windows a TCP server socket can always bind to an end
606 // point in TIME_WAIT state without setting SO_REUSEADDR, therefore it is not
607 // needed here.
609 // SO_EXCLUSIVEADDRUSE will prevent a TCP client socket from binding to an end
610 // point in TIME_WAIT status. It does not have this effect for a TCP server
611 // socket.
613 BOOL true_value = 1;
614 int rv = setsockopt(socket_, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
615 reinterpret_cast<const char*>(&true_value),
616 sizeof(true_value));
617 if (rv < 0)
618 return MapSystemError(errno);
619 return OK;
622 int TCPSocketWin::SetReceiveBufferSize(int32 size) {
623 DCHECK(CalledOnValidThread());
624 return SetSocketReceiveBufferSize(socket_, size);
627 int TCPSocketWin::SetSendBufferSize(int32 size) {
628 DCHECK(CalledOnValidThread());
629 return SetSocketSendBufferSize(socket_, size);
632 bool TCPSocketWin::SetKeepAlive(bool enable, int delay) {
633 return SetTCPKeepAlive(socket_, enable, delay);
636 bool TCPSocketWin::SetNoDelay(bool no_delay) {
637 return DisableNagle(socket_, no_delay);
640 void TCPSocketWin::Close() {
641 DCHECK(CalledOnValidThread());
643 if (socket_ != INVALID_SOCKET) {
644 // Only log the close event if there's actually a socket to close.
645 net_log_.AddEvent(NetLog::EventType::TYPE_SOCKET_CLOSED);
647 // Note: don't use CancelIo to cancel pending IO because it doesn't work
648 // when there is a Winsock layered service provider.
650 // In most socket implementations, closing a socket results in a graceful
651 // connection shutdown, but in Winsock we have to call shutdown explicitly.
652 // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure"
653 // at http://msdn.microsoft.com/en-us/library/ms738547.aspx
654 shutdown(socket_, SD_SEND);
656 // This cancels any pending IO.
657 if (closesocket(socket_) < 0)
658 PLOG(ERROR) << "closesocket";
659 socket_ = INVALID_SOCKET;
662 if (!accept_callback_.is_null()) {
663 accept_watcher_.StopWatching();
664 accept_socket_ = NULL;
665 accept_address_ = NULL;
666 accept_callback_.Reset();
669 if (accept_event_) {
670 WSACloseEvent(accept_event_);
671 accept_event_ = WSA_INVALID_EVENT;
674 if (core_) {
675 if (waiting_connect_) {
676 // We closed the socket, so this notification will never come.
677 // From MSDN' WSAEventSelect documentation:
678 // "Closing a socket with closesocket also cancels the association and
679 // selection of network events specified in WSAEventSelect for the
680 // socket".
681 core_->Release();
683 core_->Detach();
684 core_ = NULL;
687 waiting_connect_ = false;
688 waiting_read_ = false;
689 waiting_write_ = false;
691 read_callback_.Reset();
692 write_callback_.Reset();
693 peer_address_.reset();
694 connect_os_error_ = 0;
697 bool TCPSocketWin::UsingTCPFastOpen() const {
698 // Not supported on windows.
699 return false;
702 void TCPSocketWin::StartLoggingMultipleConnectAttempts(
703 const AddressList& addresses) {
704 if (!logging_multiple_connect_attempts_) {
705 logging_multiple_connect_attempts_ = true;
706 LogConnectBegin(addresses);
707 } else {
708 NOTREACHED();
712 void TCPSocketWin::EndLoggingMultipleConnectAttempts(int net_error) {
713 if (logging_multiple_connect_attempts_) {
714 LogConnectEnd(net_error);
715 logging_multiple_connect_attempts_ = false;
716 } else {
717 NOTREACHED();
721 int TCPSocketWin::AcceptInternal(scoped_ptr<TCPSocketWin>* socket,
722 IPEndPoint* address) {
723 SockaddrStorage storage;
724 int new_socket = accept(socket_, storage.addr, &storage.addr_len);
725 if (new_socket < 0) {
726 int net_error = MapSystemError(WSAGetLastError());
727 if (net_error != ERR_IO_PENDING)
728 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
729 return net_error;
732 IPEndPoint ip_end_point;
733 if (!ip_end_point.FromSockAddr(storage.addr, storage.addr_len)) {
734 NOTREACHED();
735 if (closesocket(new_socket) < 0)
736 PLOG(ERROR) << "closesocket";
737 int net_error = ERR_ADDRESS_INVALID;
738 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
739 return net_error;
741 scoped_ptr<TCPSocketWin> tcp_socket(new TCPSocketWin(
742 net_log_.net_log(), net_log_.source()));
743 int adopt_result = tcp_socket->AdoptConnectedSocket(new_socket, ip_end_point);
744 if (adopt_result != OK) {
745 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, adopt_result);
746 return adopt_result;
748 *socket = tcp_socket.Pass();
749 *address = ip_end_point;
750 net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT,
751 CreateNetLogIPEndPointCallback(&ip_end_point));
752 return OK;
755 void TCPSocketWin::OnObjectSignaled(HANDLE object) {
756 WSANETWORKEVENTS ev;
757 if (WSAEnumNetworkEvents(socket_, accept_event_, &ev) == SOCKET_ERROR) {
758 PLOG(ERROR) << "WSAEnumNetworkEvents()";
759 return;
762 if (ev.lNetworkEvents & FD_ACCEPT) {
763 int result = AcceptInternal(accept_socket_, accept_address_);
764 if (result != ERR_IO_PENDING) {
765 accept_socket_ = NULL;
766 accept_address_ = NULL;
767 base::ResetAndReturn(&accept_callback_).Run(result);
769 } else {
770 // This happens when a client opens a connection and closes it before we
771 // have a chance to accept it.
772 DCHECK(ev.lNetworkEvents == 0);
774 // Start watching the next FD_ACCEPT event.
775 WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
776 accept_watcher_.StartWatching(accept_event_, this);
780 int TCPSocketWin::DoConnect() {
781 DCHECK_EQ(connect_os_error_, 0);
782 DCHECK(!core_);
784 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
785 CreateNetLogIPEndPointCallback(peer_address_.get()));
787 core_ = new Core(this);
788 // WSAEventSelect sets the socket to non-blocking mode as a side effect.
789 // Our connect() and recv() calls require that the socket be non-blocking.
790 WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT);
792 SockaddrStorage storage;
793 if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len))
794 return ERR_ADDRESS_INVALID;
795 if (!connect(socket_, storage.addr, storage.addr_len)) {
796 // Connected without waiting!
798 // The MSDN page for connect says:
799 // With a nonblocking socket, the connection attempt cannot be completed
800 // immediately. In this case, connect will return SOCKET_ERROR, and
801 // WSAGetLastError will return WSAEWOULDBLOCK.
802 // which implies that for a nonblocking socket, connect never returns 0.
803 // It's not documented whether the event object will be signaled or not
804 // if connect does return 0. So the code below is essentially dead code
805 // and we don't know if it's correct.
806 NOTREACHED();
808 if (ResetEventIfSignaled(core_->read_overlapped_.hEvent))
809 return OK;
810 } else {
811 int os_error = WSAGetLastError();
812 if (os_error != WSAEWOULDBLOCK) {
813 LOG(ERROR) << "connect failed: " << os_error;
814 connect_os_error_ = os_error;
815 int rv = MapConnectError(os_error);
816 CHECK_NE(ERR_IO_PENDING, rv);
817 return rv;
821 core_->WatchForRead();
822 return ERR_IO_PENDING;
825 void TCPSocketWin::DoConnectComplete(int result) {
826 // Log the end of this attempt (and any OS error it threw).
827 int os_error = connect_os_error_;
828 connect_os_error_ = 0;
829 if (result != OK) {
830 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
831 NetLog::IntegerCallback("os_error", os_error));
832 } else {
833 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT);
836 if (!logging_multiple_connect_attempts_)
837 LogConnectEnd(result);
840 void TCPSocketWin::LogConnectBegin(const AddressList& addresses) {
841 base::StatsCounter connects("tcp.connect");
842 connects.Increment();
844 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
845 addresses.CreateNetLogCallback());
848 void TCPSocketWin::LogConnectEnd(int net_error) {
849 if (net_error == OK)
850 UpdateConnectionTypeHistograms(CONNECTION_ANY);
852 if (net_error != OK) {
853 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
854 return;
857 struct sockaddr_storage source_address;
858 socklen_t addrlen = sizeof(source_address);
859 int rv = getsockname(
860 socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen);
861 if (rv != 0) {
862 LOG(ERROR) << "getsockname() [rv: " << rv
863 << "] error: " << WSAGetLastError();
864 NOTREACHED();
865 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
866 return;
869 net_log_.EndEvent(
870 NetLog::TYPE_TCP_CONNECT,
871 CreateNetLogSourceAddressCallback(
872 reinterpret_cast<const struct sockaddr*>(&source_address),
873 sizeof(source_address)));
876 int TCPSocketWin::DoRead(IOBuffer* buf, int buf_len,
877 const CompletionCallback& callback) {
878 if (!core_->non_blocking_reads_initialized_) {
879 WSAEventSelect(socket_, core_->read_overlapped_.hEvent,
880 FD_READ | FD_CLOSE);
881 core_->non_blocking_reads_initialized_ = true;
883 int rv = recv(socket_, buf->data(), buf_len, 0);
884 if (rv == SOCKET_ERROR) {
885 int os_error = WSAGetLastError();
886 if (os_error != WSAEWOULDBLOCK) {
887 int net_error = MapSystemError(os_error);
888 net_log_.AddEvent(
889 NetLog::TYPE_SOCKET_READ_ERROR,
890 CreateNetLogSocketErrorCallback(net_error, os_error));
891 return net_error;
893 } else {
894 base::StatsCounter read_bytes("tcp.read_bytes");
895 if (rv > 0)
896 read_bytes.Add(rv);
897 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, rv,
898 buf->data());
899 return rv;
902 waiting_read_ = true;
903 read_callback_ = callback;
904 core_->read_iobuffer_ = buf;
905 core_->read_buffer_length_ = buf_len;
906 core_->WatchForRead();
907 return ERR_IO_PENDING;
910 void TCPSocketWin::DidCompleteConnect() {
911 DCHECK(waiting_connect_);
912 DCHECK(!read_callback_.is_null());
913 int result;
915 WSANETWORKEVENTS events;
916 int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
917 &events);
918 int os_error = 0;
919 if (rv == SOCKET_ERROR) {
920 NOTREACHED();
921 os_error = WSAGetLastError();
922 result = MapSystemError(os_error);
923 } else if (events.lNetworkEvents & FD_CONNECT) {
924 os_error = events.iErrorCode[FD_CONNECT_BIT];
925 result = MapConnectError(os_error);
926 } else {
927 NOTREACHED();
928 result = ERR_UNEXPECTED;
931 connect_os_error_ = os_error;
932 DoConnectComplete(result);
933 waiting_connect_ = false;
935 DCHECK_NE(result, ERR_IO_PENDING);
936 base::ResetAndReturn(&read_callback_).Run(result);
939 void TCPSocketWin::DidCompleteWrite() {
940 DCHECK(waiting_write_);
941 DCHECK(!write_callback_.is_null());
943 DWORD num_bytes, flags;
944 BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_,
945 &num_bytes, FALSE, &flags);
946 WSAResetEvent(core_->write_overlapped_.hEvent);
947 waiting_write_ = false;
948 int rv;
949 if (!ok) {
950 int os_error = WSAGetLastError();
951 rv = MapSystemError(os_error);
952 net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
953 CreateNetLogSocketErrorCallback(rv, os_error));
954 } else {
955 rv = static_cast<int>(num_bytes);
956 if (rv > core_->write_buffer_length_ || rv < 0) {
957 // It seems that some winsock interceptors report that more was written
958 // than was available. Treat this as an error. http://crbug.com/27870
959 LOG(ERROR) << "Detected broken LSP: Asked to write "
960 << core_->write_buffer_length_ << " bytes, but " << rv
961 << " bytes reported.";
962 rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
963 } else {
964 base::StatsCounter write_bytes("tcp.write_bytes");
965 write_bytes.Add(num_bytes);
966 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes,
967 core_->write_iobuffer_->data());
971 core_->write_iobuffer_ = NULL;
973 DCHECK_NE(rv, ERR_IO_PENDING);
974 base::ResetAndReturn(&write_callback_).Run(rv);
977 void TCPSocketWin::DidSignalRead() {
978 DCHECK(waiting_read_);
979 DCHECK(!read_callback_.is_null());
981 int os_error = 0;
982 WSANETWORKEVENTS network_events;
983 int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
984 &network_events);
985 if (rv == SOCKET_ERROR) {
986 os_error = WSAGetLastError();
987 rv = MapSystemError(os_error);
988 } else if (network_events.lNetworkEvents) {
989 DCHECK_EQ(network_events.lNetworkEvents & ~(FD_READ | FD_CLOSE), 0);
990 // If network_events.lNetworkEvents is FD_CLOSE and
991 // network_events.iErrorCode[FD_CLOSE_BIT] is 0, it is a graceful
992 // connection closure. It is tempting to directly set rv to 0 in
993 // this case, but the MSDN pages for WSAEventSelect and
994 // WSAAsyncSelect recommend we still call DoRead():
995 // FD_CLOSE should only be posted after all data is read from a
996 // socket, but an application should check for remaining data upon
997 // receipt of FD_CLOSE to avoid any possibility of losing data.
999 // If network_events.iErrorCode[FD_READ_BIT] or
1000 // network_events.iErrorCode[FD_CLOSE_BIT] is nonzero, still call
1001 // DoRead() because recv() reports a more accurate error code
1002 // (WSAECONNRESET vs. WSAECONNABORTED) when the connection was
1003 // reset.
1004 rv = DoRead(core_->read_iobuffer_, core_->read_buffer_length_,
1005 read_callback_);
1006 if (rv == ERR_IO_PENDING)
1007 return;
1008 } else {
1009 // This may happen because Read() may succeed synchronously and
1010 // consume all the received data without resetting the event object.
1011 core_->WatchForRead();
1012 return;
1015 waiting_read_ = false;
1016 core_->read_iobuffer_ = NULL;
1017 core_->read_buffer_length_ = 0;
1019 DCHECK_NE(rv, ERR_IO_PENDING);
1020 base::ResetAndReturn(&read_callback_).Run(rv);
1023 } // namespace net