1 /* fhandler_socket_inet.cc.
3 See fhandler.h for a description of the fhandler classes.
5 This file is part of Cygwin.
7 This software is a copyrighted work licensed under the terms of the
8 Cygwin license. Please consult the file "CYGWIN_LICENSE" for
11 #define __INSIDE_CYGWIN_NET__
12 #define USE_SYS_TYPES_FD_SET
15 /* 2014-04-24: Current Mingw headers define sockaddr_in6 using u_long (8 byte)
16 because a redefinition for LP64 systems is missing. This leads to a wrong
17 definition and size of sockaddr_in6 when building with winsock headers.
18 This definition is also required to use the right u_long type in subsequent
21 #define u_long __ms_u_long
22 #include <w32api/ws2tcpip.h>
23 #include <w32api/mswsock.h>
24 #include <w32api/mstcpip.h>
25 #include <netinet/tcp.h>
26 #include <netinet/udp.h>
28 #include <asm/byteorder.h>
29 #include <sys/socket.h>
30 #include <sys/param.h>
31 #include <sys/statvfs.h>
32 #include <cygwin/acl.h>
38 #include "shared_info.h"
42 #define ASYNC_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT)
43 #define EVENT_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT|FD_CLOSE)
47 WaitForSingleObject (wsock_mtx, INFINITE) != WAIT_FAILED) \
50 #define UNLOCK_EVENTS \
51 ReleaseMutex (wsock_mtx); \
54 /* Maximum number of concurrently opened sockets from all Cygwin processes
55 per session. Note that shared sockets (through dup/fork/exec) are
56 counted as one socket. */
57 #define NUM_SOCKS 2048U
61 WaitForSingleObject (wsock_mtx, INFINITE) != WAIT_FAILED) \
64 #define UNLOCK_EVENTS \
65 ReleaseMutex (wsock_mtx); \
68 static wsa_event wsa_events
[NUM_SOCKS
] __attribute__((section (".cygwin_dll_common"), shared
));
70 static LONG socket_serial_number
__attribute__((section (".cygwin_dll_common"), shared
));
72 static HANDLE wsa_slot_mtx
;
75 sock_shared_name (PWCHAR buf
, LONG num
)
77 __small_swprintf (buf
, L
"socket.%d", num
);
82 search_wsa_event_slot (LONG new_serial_number
)
84 WCHAR name
[32], searchname
[32];
86 OBJECT_ATTRIBUTES attr
;
91 RtlInitUnicodeString (&uname
, sock_shared_name (name
, 0));
92 InitializeObjectAttributes (&attr
, &uname
, OBJ_INHERIT
| OBJ_OPENIF
,
93 get_session_parent_dir (),
94 everyone_sd (CYG_MUTANT_ACCESS
));
95 status
= NtCreateMutant (&wsa_slot_mtx
, CYG_MUTANT_ACCESS
, &attr
, FALSE
);
96 if (!NT_SUCCESS (status
))
97 api_fatal ("Couldn't create/open shared socket mutex %S, %y",
100 switch (WaitForSingleObject (wsa_slot_mtx
, INFINITE
))
106 api_fatal ("WFSO failed for shared socket mutex, %E");
109 unsigned int slot
= new_serial_number
% NUM_SOCKS
;
110 while (wsa_events
[slot
].serial_number
)
113 RtlInitUnicodeString (&uname
, sock_shared_name (searchname
,
114 wsa_events
[slot
].serial_number
));
115 InitializeObjectAttributes (&attr
, &uname
, 0, get_session_parent_dir (),
117 status
= NtOpenMutant (&searchmtx
, READ_CONTROL
, &attr
);
118 if (!NT_SUCCESS (status
))
120 /* Mutex still exists, attached socket is active, try next slot. */
122 slot
= (slot
+ 1) % NUM_SOCKS
;
123 if (slot
== (new_serial_number
% NUM_SOCKS
))
125 /* Did the whole array once. Too bad. */
126 debug_printf ("No free socket slot");
127 ReleaseMutex (wsa_slot_mtx
);
131 memset (&wsa_events
[slot
], 0, sizeof (wsa_event
));
132 wsa_events
[slot
].serial_number
= new_serial_number
;
133 ReleaseMutex (wsa_slot_mtx
);
134 return wsa_events
+ slot
;
137 /* cygwin internal: map sockaddr into internet domain address */
139 get_inet_addr_inet (const struct sockaddr
*in
, int inlen
,
140 struct sockaddr_storage
*out
, int *outlen
)
142 switch (in
->sa_family
)
145 memcpy (out
, in
, inlen
);
147 /* If the peer address given in connect or sendto is the ANY address,
148 Winsock fails with WSAEADDRNOTAVAIL, while Linux converts that into
149 a connection/send attempt to LOOPBACK. We're doing the same here. */
150 if (((struct sockaddr_in
*) out
)->sin_addr
.s_addr
== htonl (INADDR_ANY
))
151 ((struct sockaddr_in
*) out
)->sin_addr
.s_addr
= htonl (INADDR_LOOPBACK
);
154 memcpy (out
, in
, inlen
);
156 /* See comment in AF_INET case. */
157 if (IN6_IS_ADDR_UNSPECIFIED (&((struct sockaddr_in6
*) out
)->sin6_addr
))
158 ((struct sockaddr_in6
*) out
)->sin6_addr
= in6addr_loopback
;
161 set_errno (EAFNOSUPPORT
);
166 /* There's no DLL which exports the symbol WSARecvMsg. One has to call
167 WSAIoctl as below to fetch the function pointer. Why on earth did the
168 MS developers decide not to export a normal symbol for these extension
171 get_ext_funcptr (SOCKET sock
, void *funcptr
)
174 const GUID guid
= WSAID_WSARECVMSG
;
175 return WSAIoctl (sock
, SIO_GET_EXTENSION_FUNCTION_POINTER
,
176 (void *) &guid
, sizeof (GUID
), funcptr
, sizeof (void *),
180 fhandler_socket_wsock::fhandler_socket_wsock () :
188 need_fork_fixup (true);
191 fhandler_socket_wsock::~fhandler_socket_wsock ()
194 cfree (prot_info_ptr
);
198 fhandler_socket_wsock::init_events ()
200 LONG new_serial_number
;
202 UNICODE_STRING uname
;
203 OBJECT_ATTRIBUTES attr
;
209 InterlockedIncrement (&socket_serial_number
);
210 if (!new_serial_number
) /* 0 is reserved for global mutex */
211 InterlockedIncrement (&socket_serial_number
);
212 set_ino (new_serial_number
);
213 RtlInitUnicodeString (&uname
, sock_shared_name (name
, new_serial_number
));
214 InitializeObjectAttributes (&attr
, &uname
, OBJ_INHERIT
| OBJ_OPENIF
,
215 get_session_parent_dir (),
216 everyone_sd (CYG_MUTANT_ACCESS
));
217 status
= NtCreateMutant (&wsock_mtx
, CYG_MUTANT_ACCESS
, &attr
, FALSE
);
218 if (!NT_SUCCESS (status
))
220 debug_printf ("NtCreateMutant(%S), %y", &uname
, status
);
224 if (status
== STATUS_OBJECT_NAME_EXISTS
)
227 while (status
== STATUS_OBJECT_NAME_EXISTS
);
228 if ((wsock_evt
= CreateEvent (&sec_all
, TRUE
, FALSE
, NULL
))
229 == WSA_INVALID_EVENT
)
231 debug_printf ("CreateEvent, %E");
236 if (WSAEventSelect (get_socket (), wsock_evt
, EVENT_MASK
) == SOCKET_ERROR
)
238 debug_printf ("WSAEventSelect, %E");
239 set_winsock_errno ();
244 if (!(wsock_events
= search_wsa_event_slot (new_serial_number
)))
251 if (get_socket_type () == SOCK_DGRAM
)
252 wsock_events
->events
= FD_WRITE
;
257 fhandler_socket_wsock::evaluate_events (const long event_mask
, long &events
,
263 WSANETWORKEVENTS evts
= { 0 };
264 if (!(WSAEnumNetworkEvents (get_socket (), wsock_evt
, &evts
)))
266 if (evts
.lNetworkEvents
)
269 wsock_events
->events
|= evts
.lNetworkEvents
;
270 events_now
= (wsock_events
->events
& event_mask
);
271 if (evts
.lNetworkEvents
& FD_CONNECT
)
273 wsock_events
->connect_errorcode
= evts
.iErrorCode
[FD_CONNECT_BIT
];
275 /* Setting the connect_state and calling the AF_LOCAL handshake
276 here allows to handle this stuff from a single point. This
277 is independent of FD_CONNECT being requested. Consider a
278 server calling connect(2) and then immediately poll(2) with
279 only polling for POLLIN (example: postfix), or select(2) just
280 asking for descriptors ready to read.
282 Something weird occurs in Winsock: If you fork off and call
283 recv/send on the duplicated, already connected socket, another
284 FD_CONNECT event is generated in the child process. This
285 would trigger a call to af_local_connect which obviously fail.
286 Avoid this by calling set_connect_state only if connect_state
287 is connect_pending. */
288 if (connect_state () == connect_pending
)
290 if (wsock_events
->connect_errorcode
)
291 connect_state (connect_failed
);
292 else if (af_local_connect ())
294 wsock_events
->connect_errorcode
= WSAGetLastError ();
295 connect_state (connect_failed
);
298 connect_state (connected
);
302 if ((evts
.lNetworkEvents
& FD_OOB
) && wsock_events
->owner
)
303 kill (wsock_events
->owner
, SIGURG
);
308 if ((events
= events_now
) != 0
309 || (events
= (wsock_events
->events
& event_mask
)) != 0)
311 if (events
& FD_CONNECT
)
313 int wsa_err
= wsock_events
->connect_errorcode
;
316 /* CV 2014-04-23: This is really weird. If you call connect
317 asynchronously on a socket and then select, an error like
318 "Connection refused" is set in the event and in the SO_ERROR
319 socket option. If you call connect, then dup, then select,
320 the error is set in the event, but not in the SO_ERROR socket
321 option, despite the dup'ed socket handle referring to the same
322 socket. We're trying to workaround this problem here by
323 taking the connect errorcode from the event and write it back
324 into the SO_ERROR socket option.
326 CV 2014-06-16: Call WSASetLastError *after* setsockopt since,
327 apparently, setsockopt sets the last WSA error code to 0 on
329 ::setsockopt (get_socket (), SOL_SOCKET
, SO_ERROR
,
330 (const char *) &wsa_err
, sizeof wsa_err
);
331 WSASetLastError (wsa_err
);
334 /* Since FD_CONNECT is only given once, we have to keep FD_CONNECT
335 for connection failed sockets to have consistent behaviour in
336 programs calling poll/select multiple times. Example test to
337 non-listening port: curl -v 127.0.0.1:47 */
338 if (connect_state () != connect_failed
)
339 wsock_events
->events
&= ~FD_CONNECT
;
340 wsock_events
->events
|= FD_WRITE
;
341 wsock_events
->connect_errorcode
= 0;
343 if (events
& FD_CLOSE
)
345 if (evts
.iErrorCode
[FD_CLOSE_BIT
])
347 WSASetLastError (evts
.iErrorCode
[FD_CLOSE_BIT
]);
350 /* This test makes accept/connect behave as on Linux when accept/
351 connect is called on a socket for which shutdown has been called.
352 The second half of this code is in the shutdown method. Note that
353 we only do this when called from accept/connect, not from select.
354 In this case erase == false, just as with read (MSG_PEEK). */
357 if ((event_mask
& FD_ACCEPT
) && saw_shutdown_read ())
359 WSASetLastError (WSAEINVAL
);
362 if (event_mask
& FD_CONNECT
)
364 WSASetLastError (WSAECONNRESET
);
370 wsock_events
->events
&= ~(events
& ~(FD_WRITE
| FD_CLOSE
));
378 fhandler_socket_wsock::wait_for_events (const long event_mask
,
386 DWORD wfmo_timeout
= 50;
389 WSAEVENT ev
[3] = { wsock_evt
, NULL
, NULL
};
390 wait_signal_arrived
here (ev
[1]);
392 if ((ev
[2] = pthread::get_cancel_event ()) != NULL
)
395 if (is_nonblocking () || (flags
& MSG_DONTWAIT
))
397 else if (event_mask
& FD_READ
)
398 timeout
= rcvtimeo ();
399 else if (event_mask
& FD_WRITE
)
400 timeout
= sndtimeo ();
404 while (!(ret
= evaluate_events (event_mask
, events
, !(flags
& MSG_PEEK
)))
409 WSASetLastError (WSAEWOULDBLOCK
);
413 if (timeout
< wfmo_timeout
)
414 wfmo_timeout
= timeout
;
415 switch (WSAWaitForMultipleEvents (ev_cnt
, ev
, FALSE
, wfmo_timeout
, FALSE
))
417 case WSA_WAIT_TIMEOUT
:
418 case WSA_WAIT_EVENT_0
:
419 if (timeout
!= INFINITE
)
420 timeout
-= wfmo_timeout
;
423 case WSA_WAIT_EVENT_0
+ 1:
424 if (_my_tls
.call_signal_handler ())
426 WSASetLastError (WSAEINTR
);
429 case WSA_WAIT_EVENT_0
+ 2:
430 pthread::static_cancel_self ();
434 /* wsock_evt can be NULL. We're generating the same errno values
435 as for sockets on which shutdown has been called. */
436 if (WSAGetLastError () != WSA_INVALID_HANDLE
)
437 WSASetLastError (WSAEFAULT
);
439 WSASetLastError ((event_mask
& FD_CONNECT
) ? WSAECONNRESET
448 fhandler_socket_wsock::release_events ()
450 if (WaitForSingleObject (wsock_mtx
, INFINITE
) != WAIT_FAILED
)
452 HANDLE evt
= wsock_evt
;
453 HANDLE mtx
= wsock_mtx
;
455 wsock_evt
= wsock_mtx
= NULL
;
463 fhandler_socket_wsock::set_close_on_exec (bool val
)
465 set_no_inheritance (wsock_mtx
, val
);
466 set_no_inheritance (wsock_evt
, val
);
467 if (need_fixup_before ())
470 debug_printf ("set close_on_exec for %s to %d", get_name (), val
);
473 fhandler_base::set_close_on_exec (val
);
476 /* Called if a freshly created socket is not inheritable. In that case we
477 have to use fixup_before_fork_exec. See comment in set_socket_handle for
478 a description of the problem. */
480 fhandler_socket_wsock::init_fixup_before ()
482 prot_info_ptr
= (LPWSAPROTOCOL_INFOW
)
483 cmalloc_abort (HEAP_BUF
, sizeof (WSAPROTOCOL_INFOW
));
484 cygheap
->fdtab
.inc_need_fixup_before ();
488 fhandler_socket_wsock::fixup_before_fork_exec (DWORD win_pid
)
490 SOCKET ret
= WSADuplicateSocketW (get_socket (), win_pid
, prot_info_ptr
);
492 set_winsock_errno ();
494 debug_printf ("WSADuplicateSocket succeeded (%x)", prot_info_ptr
->dwProviderReserved
);
499 fhandler_socket_wsock::fixup_after_fork (HANDLE parent
)
501 fork_fixup (parent
, wsock_mtx
, "wsock_mtx");
502 fork_fixup (parent
, wsock_evt
, "wsock_evt");
504 if (!need_fixup_before ())
506 fhandler_base::fixup_after_fork (parent
);
510 SOCKET new_sock
= WSASocketW (FROM_PROTOCOL_INFO
, FROM_PROTOCOL_INFO
,
511 FROM_PROTOCOL_INFO
, prot_info_ptr
, 0,
512 WSA_FLAG_OVERLAPPED
);
513 if (new_sock
== INVALID_SOCKET
)
515 set_winsock_errno ();
516 set_handle ((HANDLE
) INVALID_SOCKET
);
520 /* Even though the original socket was not inheritable, the duplicated
521 socket is potentially inheritable again. */
522 SetHandleInformation ((HANDLE
) new_sock
, HANDLE_FLAG_INHERIT
, 0);
523 set_handle ((HANDLE
) new_sock
);
524 debug_printf ("WSASocket succeeded (%p)", new_sock
);
529 fhandler_socket_wsock::fixup_after_exec ()
531 if (need_fixup_before () && !close_on_exec ())
532 fixup_after_fork (NULL
); /* No parent handle required. */
536 fhandler_socket_wsock::dup (fhandler_base
*child
, int flags
)
538 debug_printf ("here");
539 fhandler_socket_wsock
*fhs
= (fhandler_socket_wsock
*) child
;
541 if (!DuplicateHandle (GetCurrentProcess (), wsock_mtx
,
542 GetCurrentProcess (), &fhs
->wsock_mtx
,
543 0, TRUE
, DUPLICATE_SAME_ACCESS
))
548 if (!DuplicateHandle (GetCurrentProcess (), wsock_evt
,
549 GetCurrentProcess (), &fhs
->wsock_evt
,
550 0, TRUE
, DUPLICATE_SAME_ACCESS
))
553 NtClose (fhs
->wsock_mtx
);
556 if (!need_fixup_before ())
558 int ret
= fhandler_base::dup (child
, flags
);
561 NtClose (fhs
->wsock_evt
);
562 NtClose (fhs
->wsock_mtx
);
567 cygheap
->user
.deimpersonate ();
568 fhs
->init_fixup_before ();
569 fhs
->set_handle (get_handle ());
570 int ret
= fhs
->fixup_before_fork_exec (GetCurrentProcessId ());
571 cygheap
->user
.reimpersonate ();
574 fhs
->fixup_after_fork (GetCurrentProcess ());
575 if (fhs
->get_handle() != (HANDLE
) INVALID_SOCKET
)
578 cygheap
->fdtab
.dec_need_fixup_before ();
579 NtClose (fhs
->wsock_evt
);
580 NtClose (fhs
->wsock_mtx
);
585 fhandler_socket_wsock::set_socket_handle (SOCKET sock
, int af
, int type
,
589 bool lsp_fixup
= false;
590 int file_flags
= O_RDWR
| O_BINARY
;
592 /* Usually sockets are inheritable IFS objects. Unfortunately some virus
593 scanners or other network-oriented software replace normal sockets
594 with their own kind, which is running through a filter driver called
595 "layered service provider" (LSP) which, fortunately, are deprecated.
597 LSP sockets are not kernel objects. They are typically not marked as
598 inheritable, nor are they IFS handles. They are in fact not inheritable
599 to child processes, and it does not help to mark them inheritable via
600 SetHandleInformation. Subsequent socket calls in the child process fail
601 with error 10038, WSAENOTSOCK.
603 There's a neat way to workaround these annoying LSP sockets. WSAIoctl
604 allows to fetch the underlying base socket, which is a normal, inheritable
605 IFS handle. So we fetch the base socket, duplicate it, and close the
606 original socket. Now we have a standard IFS socket which (hopefully)
609 If that doesn't work for some reason, mark the sockets for duplication
610 via WSADuplicateSocket/WSASocket. This requires to start the child
611 process in SUSPENDED state so we only do this if really necessary. */
612 if (!GetHandleInformation ((HANDLE
) sock
, &hdl_flags
)
613 || !(hdl_flags
& HANDLE_FLAG_INHERIT
))
620 debug_printf ("LSP handle: %p", sock
);
621 ret
= WSAIoctl (sock
, SIO_BASE_HANDLE
, NULL
, 0, (void *) &base_sock
,
622 sizeof (base_sock
), &bret
, NULL
, NULL
);
624 debug_printf ("WSAIoctl: %u", WSAGetLastError ());
625 else if (base_sock
!= sock
)
627 if (GetHandleInformation ((HANDLE
) base_sock
, &hdl_flags
)
628 && (flags
& HANDLE_FLAG_INHERIT
))
630 if (!DuplicateHandle (GetCurrentProcess (), (HANDLE
) base_sock
,
631 GetCurrentProcess (), (PHANDLE
) &base_sock
,
632 0, TRUE
, DUPLICATE_SAME_ACCESS
))
633 debug_printf ("DuplicateHandle failed, %E");
636 ::closesocket (sock
);
643 set_handle ((HANDLE
) sock
);
644 set_addr_family (af
);
645 set_socket_type (type
);
648 if (flags
& SOCK_NONBLOCK
)
649 file_flags
|= O_NONBLOCK
;
650 if (flags
& SOCK_CLOEXEC
)
652 set_close_on_exec (true);
653 file_flags
|= O_CLOEXEC
;
655 set_flags (file_flags
);
657 init_fixup_before ();
659 if (get_socket_type () == SOCK_DGRAM
)
661 /* Workaround the problem that a missing listener on a UDP socket
662 in a call to sendto will result in select/WSAEnumNetworkEvents
663 reporting that the socket has pending data and a subsequent call
664 to recvfrom will return -1 with error set to WSAECONNRESET.
666 This problem is a regression introduced in Windows 2000.
667 Instead of fixing the problem, a new socket IOCTL code has
668 been added, see http://support.microsoft.com/kb/263823 */
671 if (WSAIoctl (sock
, SIO_UDP_CONNRESET
, &cr
, sizeof cr
, NULL
, 0,
672 &blen
, NULL
, NULL
) == SOCKET_ERROR
)
673 debug_printf ("Reset SIO_UDP_CONNRESET: WinSock error %u",
681 fhandler_socket_inet::fhandler_socket_inet () :
682 fhandler_socket_wsock (),
684 tcp_quickack (false),
685 tcp_fastopen (false),
686 tcp_keepidle (7200), /* WinSock default */
687 tcp_keepcnt (10), /* WinSock default */
688 tcp_keepintvl (1) /* WinSock default */
692 fhandler_socket_inet::~fhandler_socket_inet ()
697 fhandler_socket_inet::socket (int af
, int type
, int protocol
, int flags
)
702 /* This test should be covered by ::socket, but make sure we don't
703 accidentally try anything else. */
704 if (type
!= SOCK_STREAM
&& type
!= SOCK_DGRAM
&& type
!= SOCK_RAW
)
709 sock
= ::socket (af
, type
, protocol
);
710 if (sock
== INVALID_SOCKET
)
712 set_winsock_errno ();
715 ret
= set_socket_handle (sock
, af
, type
, flags
);
717 ::closesocket (sock
);
722 fhandler_socket_inet::socketpair (int af
, int type
, int protocol
, int flags
,
723 fhandler_socket
*fh_out
)
725 set_errno (EAFNOSUPPORT
);
730 fhandler_socket_inet::bind (const struct sockaddr
*name
, int namelen
)
734 if (!saw_reuseaddr ())
736 /* If the application didn't explicitely request SO_REUSEADDR,
737 enforce POSIX standard socket binding behaviour by setting the
738 SO_EXCLUSIVEADDRUSE socket option. See cygwin_setsockopt()
739 for a more detailed description. */
741 int ret
= ::setsockopt (get_socket (), SOL_SOCKET
,
743 (const char *) &on
, sizeof on
);
744 debug_printf ("%d = setsockopt(SO_EXCLUSIVEADDRUSE), %E", ret
);
746 if (::bind (get_socket (), name
, namelen
))
747 set_winsock_errno ();
755 fhandler_socket_inet::connect (const struct sockaddr
*name
, int namelen
)
757 struct sockaddr_storage sst
;
758 bool reset
= (name
->sa_family
== AF_UNSPEC
759 && get_socket_type () == SOCK_DGRAM
);
763 if (connect_state () == unconnected
)
765 /* To reset a connected DGRAM socket, call Winsock's connect
766 function with the address member of the sockaddr structure
767 filled with zeroes. */
768 memset (&sst
, 0, sizeof sst
);
769 sst
.ss_family
= get_addr_family ();
771 else if (get_inet_addr_inet (name
, namelen
, &sst
, &namelen
) == SOCKET_ERROR
)
774 /* Initialize connect state to "connect_pending". In the SOCK_STREAM
775 case, the state is ultimately set to "connected" or "connect_failed" in
776 wait_for_events when the FD_CONNECT event occurs. Note that the
777 underlying OS sockets are always non-blocking in this case and a
778 successfully initiated non-blocking Winsock connect always returns
779 WSAEWOULDBLOCK. Thus it's safe to rely on event handling. For DGRAM
780 sockets, however, connect can return immediately.
782 Check for either unconnected or connect_failed since in both cases it's
783 allowed to retry connecting the socket. It's also ok (albeit ugly) to
784 call connect to check if a previous non-blocking connect finished.
786 Set connect_state before calling connect, otherwise a race condition with
787 an already running select or poll might occur. */
788 if (connect_state () == unconnected
|| connect_state () == connect_failed
)
789 connect_state (connect_pending
);
791 int res
= ::connect (get_socket (), (struct sockaddr
*) &sst
, namelen
);
795 connect_state (unconnected
);
797 connect_state (connected
);
799 else if (!is_nonblocking ()
800 && res
== SOCKET_ERROR
801 && WSAGetLastError () == WSAEWOULDBLOCK
)
802 res
= wait_for_events (FD_CONNECT
| FD_CLOSE
, 0);
806 DWORD err
= WSAGetLastError ();
808 /* Some applications use the ugly technique to check if a non-blocking
809 connect succeeded by calling connect again, until it returns EISCONN.
810 This circumvents the event handling and connect_state is never set.
811 Thus we check for this situation here. */
812 if (err
== WSAEISCONN
)
813 connect_state (connected
);
814 /* Winsock returns WSAEWOULDBLOCK if the non-blocking socket cannot be
815 conected immediately. Convert to POSIX/Linux compliant EINPROGRESS. */
816 else if (is_nonblocking () && err
== WSAEWOULDBLOCK
)
817 WSASetLastError (WSAEINPROGRESS
);
818 /* Winsock returns WSAEINVAL if the socket is already a listener.
819 Convert to POSIX/Linux compliant EISCONN. */
820 else if (err
== WSAEINVAL
&& connect_state () == listener
)
821 WSASetLastError (WSAEISCONN
);
822 /* Any other error except WSAEALREADY means the connect failed. */
823 else if (connect_state () == connect_pending
&& err
!= WSAEALREADY
)
824 connect_state (connect_failed
);
825 set_winsock_errno ();
832 fhandler_socket_inet::listen (int backlog
)
834 int res
= ::listen (get_socket (), backlog
);
835 if (res
&& WSAGetLastError () == WSAEINVAL
)
837 /* It's perfectly valid to call listen on an unbound INET socket.
838 In this case the socket is automatically bound to an unused
839 port number, listening on all interfaces. On WinSock, listen
840 fails with WSAEINVAL when it's called on an unbound socket.
841 So we have to bind manually here to have POSIX semantics. */
842 if (get_addr_family () == AF_INET
)
844 struct sockaddr_in sin
;
845 sin
.sin_family
= AF_INET
;
847 sin
.sin_addr
.s_addr
= INADDR_ANY
;
848 if (!::bind (get_socket (), (struct sockaddr
*) &sin
, sizeof sin
))
849 res
= ::listen (get_socket (), backlog
);
851 else if (get_addr_family () == AF_INET6
)
853 struct sockaddr_in6 sin6
;
854 memset (&sin6
, 0, sizeof sin6
);
855 sin6
.sin6_family
= AF_INET6
;
856 if (!::bind (get_socket (), (struct sockaddr
*) &sin6
, sizeof sin6
))
857 res
= ::listen (get_socket (), backlog
);
861 connect_state (listener
); /* gets set to connected on accepted socket. */
863 set_winsock_errno ();
868 fhandler_socket_inet::accept4 (struct sockaddr
*peer
, int *len
, int flags
)
871 /* Allows NULL peer and len parameters. */
872 struct sockaddr_storage lpeer
;
873 int llen
= sizeof (struct sockaddr_storage
);
875 /* Windows event handling does not check for the validity of the desired
876 flags so we have to do it here. */
877 if (connect_state () != listener
)
879 WSASetLastError (WSAEINVAL
);
880 set_winsock_errno ();
884 SOCKET res
= INVALID_SOCKET
;
885 while (!(res
= wait_for_events (FD_ACCEPT
| FD_CLOSE
, 0))
886 && (res
= ::accept (get_socket (), (struct sockaddr
*) &lpeer
, &llen
))
888 && WSAGetLastError () == WSAEWOULDBLOCK
)
890 if (res
== INVALID_SOCKET
)
891 set_winsock_errno ();
898 fhandler_socket_inet
*sock
= (fhandler_socket_inet
*)
899 build_fh_dev (dev ());
900 if (sock
&& sock
->set_socket_handle (res
, get_addr_family (),
902 get_socket_flags ()) == 0)
904 sock
->async_io (false); /* set_socket_handle disables async. */
905 /* No locking necessary at this point. */
906 sock
->wsock_events
->events
= wsock_events
->events
| FD_WRITE
;
907 sock
->wsock_events
->owner
= wsock_events
->owner
;
908 sock
->connect_state (connected
);
915 memcpy (peer
, &lpeer
, MIN (*len
, llen
));
929 fhandler_socket_inet::getsockname (struct sockaddr
*name
, int *namelen
)
933 /* WinSock just returns WSAEFAULT if the buffer is too small. Use a
934 big enough local buffer and truncate later as necessary, per POSIX. */
935 struct sockaddr_storage sock
;
936 int len
= sizeof sock
;
937 res
= ::getsockname (get_socket (), (struct sockaddr
*) &sock
, &len
);
940 memcpy (name
, &sock
, MIN (*namelen
, len
));
945 if (WSAGetLastError () == WSAEINVAL
)
947 /* WinSock returns WSAEINVAL if the socket is locally
948 unbound. Per SUSv3 this is not an error condition.
949 We're faking a valid return value here by creating the
950 same content in the sockaddr structure as on Linux. */
951 memset (&sock
, 0, sizeof sock
);
952 sock
.ss_family
= get_addr_family ();
953 switch (get_addr_family ())
957 len
= (int) sizeof (struct sockaddr_in
);
961 len
= (int) sizeof (struct sockaddr_in6
);
964 WSASetLastError (WSAEOPNOTSUPP
);
969 memcpy (name
, &sock
, MIN (*namelen
, len
));
974 set_winsock_errno ();
980 fhandler_socket_inet::getpeername (struct sockaddr
*name
, int *namelen
)
982 /* Always use a local big enough buffer and truncate later as necessary
983 per POSIX. WinSock unfortunately only returns WSAEFAULT if the buffer
985 struct sockaddr_storage sock
;
986 int len
= sizeof sock
;
987 int res
= ::getpeername (get_socket (), (struct sockaddr
*) &sock
, &len
);
989 set_winsock_errno ();
992 memcpy (name
, &sock
, MIN (*namelen
, len
));
999 fhandler_socket_wsock::shutdown (int how
)
1001 int res
= ::shutdown (get_socket (), how
);
1003 /* Linux allows to call shutdown for any socket, even if it's not connected.
1004 This also disables to call accept on this socket, if shutdown has been
1005 called with the SHUT_RD or SHUT_RDWR parameter. In contrast, WinSock
1006 only allows to call shutdown on a connected socket. The accept function
1007 is in no way affected. So, what we do here is to fake success, and to
1008 change the event settings so that an FD_CLOSE event is triggered for the
1009 calling Cygwin function. The evaluate_events method handles the call
1010 from accept specially to generate a Linux-compatible behaviour. */
1011 if (res
&& WSAGetLastError () != WSAENOTCONN
)
1012 set_winsock_errno ();
1019 saw_shutdown_read (true);
1020 wsock_events
->events
|= FD_CLOSE
;
1021 SetEvent (wsock_evt
);
1024 saw_shutdown_write (true);
1027 saw_shutdown_read (true);
1028 saw_shutdown_write (true);
1029 wsock_events
->events
|= FD_CLOSE
;
1030 SetEvent (wsock_evt
);
1038 fhandler_socket_wsock::close ()
1043 while ((res
= ::closesocket (get_socket ())) != 0)
1045 if (WSAGetLastError () != WSAEWOULDBLOCK
)
1047 set_winsock_errno ();
1051 if (cygwait (10) == WAIT_SIGNALED
)
1057 WSASetLastError (0);
1063 fhandler_socket_inet::recv_internal (LPWSAMSG wsamsg
, bool use_recvmsg
)
1066 DWORD ret
= 0, wret
;
1067 int evt_mask
= (wsamsg
->dwFlags
& MSG_OOB
) ? FD_OOB
: FD_READ
;
1068 LPWSABUF
&wsabuf
= wsamsg
->lpBuffers
;
1069 ULONG
&wsacnt
= wsamsg
->dwBufferCount
;
1070 static NO_COPY LPFN_WSARECVMSG WSARecvMsg
;
1071 bool read_oob
= false;
1073 /* CV 2014-10-26: Do not check for the connect_state at this point. In
1074 certain scenarios there's no way to check the connect state reliably.
1075 Example (hexchat): Parent process creates socket, forks, child process
1076 calls connect, parent process calls read. Even if the event handling
1077 allows to check for FD_CONNECT in the parent, there is always yet another
1078 scenario we can easily break. */
1080 DWORD wait_flags
= wsamsg
->dwFlags
;
1081 bool waitall
= !!(wait_flags
& MSG_WAITALL
);
1082 wsamsg
->dwFlags
&= (MSG_OOB
| MSG_PEEK
| MSG_DONTROUTE
);
1086 && get_ext_funcptr (get_socket (), &WSARecvMsg
) == SOCKET_ERROR
)
1088 if (wsamsg
->Control
.len
> 0)
1090 set_winsock_errno ();
1091 return SOCKET_ERROR
;
1093 use_recvmsg
= false;
1095 else /* Only MSG_PEEK is supported by WSARecvMsg. */
1096 wsamsg
->dwFlags
&= MSG_PEEK
;
1100 if (get_socket_type () != SOCK_STREAM
)
1102 WSASetLastError (WSAEOPNOTSUPP
);
1103 set_winsock_errno ();
1104 return SOCKET_ERROR
;
1106 if (is_nonblocking () || (wsamsg
->dwFlags
& (MSG_OOB
| MSG_PEEK
)))
1110 /* recv() returns EINVAL if MSG_OOB flag is set in inline mode. */
1111 if (oobinline
&& (wsamsg
->dwFlags
& MSG_OOB
))
1114 return SOCKET_ERROR
;
1117 /* Check whether OOB data is ready or not */
1118 if (get_socket_type () == SOCK_STREAM
)
1119 if ((wsamsg
->dwFlags
& MSG_OOB
) || oobinline
)
1122 /* SIOCATMARK = _IOR('s',7,u_long) */
1123 int err
= ::ioctlsocket (get_socket (), _IOR('s',7,u_long
), &atmark
);
1126 set_winsock_errno ();
1127 return SOCKET_ERROR
;
1129 /* If there is no OOB data, recv() with MSG_OOB returns EINVAL.
1130 Note: The return value of SIOCATMARK in non-inline mode of
1131 winsock is FALSE if OOB data exists, TRUE otherwise. */
1132 if (atmark
&& (wsamsg
->dwFlags
& MSG_OOB
))
1136 return SOCKET_ERROR
;
1138 /* Inline mode for out-of-band (OOB) data of winsock is
1139 completely broken. That is, SIOCATMARK always returns
1140 TRUE in inline mode. Due to this problem, application
1141 cannot determine OOB data at all. Therefore the behavior
1142 of a socket with SO_OOBINLINE set is simulated using
1143 a socket with SO_OOBINLINE not set. In this fake inline
1144 mode, the order of the OOB and non-OOB data is not
1145 preserved. OOB data is read before non-OOB data sent
1146 prior to the OOB data. However, this most likely is
1147 not a problem in most cases. */
1148 /* If there is OOB data, read OOB data using MSG_OOB in
1149 fake inline mode. */
1150 if (!atmark
&& oobinline
)
1157 /* Note: Don't call WSARecvFrom(MSG_PEEK) without actually having data
1158 waiting in the buffers, otherwise the event handling gets messed up
1160 while (!(res
= wait_for_events (evt_mask
| FD_CLOSE
, wait_flags
))
1161 || saw_shutdown_read ())
1163 DWORD dwFlags
= wsamsg
->dwFlags
| (read_oob
? MSG_OOB
: 0);
1165 res
= WSARecvMsg (get_socket (), wsamsg
, &wret
, NULL
, NULL
);
1166 /* This is working around a really weird problem in WinSock.
1168 Assume you create a socket, fork the process (thus duplicating
1169 the socket), connect the socket in the child, then call recv
1170 on the original socket handle in the parent process.
1171 In this scenario, calls to WinSock's recvfrom and WSARecvFrom
1172 in the parent will fail with WSAEINVAL, regardless whether both
1173 address parameters, name and namelen, are NULL or point to valid
1174 storage. However, calls to recv and WSARecv succeed as expected.
1175 Per MSDN, WSAEINVAL in the context of recv means "The socket has not
1176 been bound". It is as if the recvfrom functions test if the socket
1177 is bound locally, but in the parent process, WinSock doesn't know
1178 about that and fails, while the same test is omitted in the recv
1181 This also covers another weird case: WinSock returns WSAEFAULT if
1182 namelen is a valid pointer while name is NULL. Both parameters are
1183 ignored for TCP sockets, so this only occurs when using UDP socket. */
1184 else if (!wsamsg
->name
|| get_socket_type () == SOCK_STREAM
)
1185 res
= WSARecv (get_socket (), wsabuf
, wsacnt
, &wret
, &dwFlags
,
1188 res
= WSARecvFrom (get_socket (), wsabuf
, wsacnt
, &wret
,
1189 &dwFlags
, wsamsg
->name
, &wsamsg
->namelen
,
1196 while (wret
&& wsacnt
)
1198 if (wsabuf
->len
> wret
)
1200 wsabuf
->len
-= wret
;
1201 wsabuf
->buf
+= wret
;
1206 wret
-= wsabuf
->len
;
1214 else if (WSAGetLastError () != WSAEWOULDBLOCK
)
1220 /* According to SUSv3, errno isn't set in that case and no error
1221 condition is returned. */
1222 if (WSAGetLastError () == WSAEMSGSIZE
)
1226 /* ESHUTDOWN isn't defined for recv in SUSv3. Simply EOF is returned
1228 if (WSAGetLastError () == WSAESHUTDOWN
)
1232 set_winsock_errno ();
1233 return SOCKET_ERROR
;
1242 fhandler_socket_wsock::recvfrom (void *in_ptr
, size_t len
, int flags
,
1243 struct sockaddr
*from
, int *fromlen
)
1245 char *ptr
= (char *) in_ptr
;
1247 /* size_t is 64 bit, but the len member in WSABUF is 32 bit.
1248 Split buffer if necessary. */
1249 DWORD bufcnt
= len
/ UINT32_MAX
+ ((!len
|| (len
% UINT32_MAX
)) ? 1 : 0);
1250 WSABUF wsabuf
[bufcnt
];
1251 WSAMSG wsamsg
= { from
, from
&& fromlen
? *fromlen
: 0,
1255 /* Don't use len as loop condition, it could be 0. */
1256 for (WSABUF
*wsaptr
= wsabuf
; bufcnt
--; ++wsaptr
)
1258 wsaptr
->len
= MIN (len
, UINT32_MAX
);
1263 ssize_t ret
= recv_internal (&wsamsg
, false);
1265 *fromlen
= wsamsg
.namelen
;
1270 fhandler_socket_wsock::recvmsg (struct msghdr
*msg
, int flags
)
1272 /* Disappointing but true: Even if WSARecvMsg is supported, it's only
1273 supported for datagram and raw sockets. */
1274 bool use_recvmsg
= true;
1275 if (get_socket_type () == SOCK_STREAM
|| get_addr_family () == AF_LOCAL
)
1277 use_recvmsg
= false;
1278 msg
->msg_controllen
= 0;
1281 WSABUF wsabuf
[msg
->msg_iovlen
];
1282 WSABUF
*wsaptr
= wsabuf
+ msg
->msg_iovlen
;
1283 const struct iovec
*iovptr
= msg
->msg_iov
+ msg
->msg_iovlen
;
1284 while (--wsaptr
>= wsabuf
)
1286 wsaptr
->len
= (--iovptr
)->iov_len
;
1287 wsaptr
->buf
= (char *) iovptr
->iov_base
;
1289 WSAMSG wsamsg
= { (struct sockaddr
*) msg
->msg_name
, msg
->msg_namelen
,
1290 wsabuf
, (DWORD
) msg
->msg_iovlen
,
1291 { (DWORD
) msg
->msg_controllen
, (char *) msg
->msg_control
},
1293 ssize_t ret
= recv_internal (&wsamsg
, use_recvmsg
);
1296 msg
->msg_namelen
= wsamsg
.namelen
;
1297 msg
->msg_controllen
= wsamsg
.Control
.len
;
1298 msg
->msg_flags
= wsamsg
.dwFlags
;
1299 /* if a UDP_GRO packet is present, convert gso_size from Windows DWORD
1300 to Linux-compatible uint16_t. We don't have to change the
1301 msg_control block layout for that, assuming applications do as they
1302 have been told and only use CMSG_FIRSTHDR/CMSG_NXTHDR/CMSG_DATA to
1303 access control messages. The cmsghdr alignment saves our ass here! */
1304 if (msg
->msg_controllen
&& get_socket_type () == SOCK_DGRAM
1305 && (get_addr_family () == AF_INET
|| get_addr_family () == AF_INET6
))
1307 struct cmsghdr
*cmsg
;
1309 for (cmsg
= CMSG_FIRSTHDR (msg
);
1311 cmsg
= CMSG_NXTHDR (msg
, cmsg
))
1313 if (cmsg
->cmsg_level
== SOL_UDP
1314 && cmsg
->cmsg_type
== UDP_GRO
)
1316 PDWORD gso_size_win
= (PDWORD
) CMSG_DATA(cmsg
);
1317 uint16_t *gso_size_cyg
= (uint16_t *) CMSG_DATA(cmsg
);
1318 uint16_t gso_size
= (uint16_t) *gso_size_win
;
1319 *gso_size_cyg
= gso_size
;
1329 fhandler_socket_wsock::read (void *in_ptr
, size_t& len
)
1331 char *ptr
= (char *) in_ptr
;
1333 /* size_t is 64 bit, but the len member in WSABUF is 32 bit.
1334 Split buffer if necessary. */
1335 DWORD bufcnt
= len
/ UINT32_MAX
+ ((!len
|| (len
% UINT32_MAX
)) ? 1 : 0);
1336 WSABUF wsabuf
[bufcnt
];
1337 WSAMSG wsamsg
= { NULL
, 0, wsabuf
, bufcnt
, { 0, NULL
}, 0 };
1338 /* Don't use len as loop condition, it could be 0. */
1339 for (WSABUF
*wsaptr
= wsabuf
; bufcnt
--; ++wsaptr
)
1341 wsaptr
->len
= MIN (len
, UINT32_MAX
);
1346 len
= recv_internal (&wsamsg
, false);
1350 fhandler_socket_wsock::readv (const struct iovec
*const iov
, const int iovcnt
,
1353 WSABUF wsabuf
[iovcnt
];
1354 WSABUF
*wsaptr
= wsabuf
+ iovcnt
;
1355 const struct iovec
*iovptr
= iov
+ iovcnt
;
1356 while (--wsaptr
>= wsabuf
)
1358 wsaptr
->len
= (--iovptr
)->iov_len
;
1359 wsaptr
->buf
= (char *) iovptr
->iov_base
;
1361 WSAMSG wsamsg
= { NULL
, 0, wsabuf
, (DWORD
) iovcnt
, { 0, NULL
}, 0 };
1362 return recv_internal (&wsamsg
, false);
1366 fhandler_socket_wsock::send_internal (struct _WSAMSG
*wsamsg
, int flags
)
1369 DWORD ret
= 0, sum
= 0;
1370 WSABUF out_buf
[wsamsg
->dwBufferCount
];
1371 bool use_sendmsg
= false;
1372 DWORD wait_flags
= flags
& MSG_DONTWAIT
;
1373 bool nosignal
= !!(flags
& MSG_NOSIGNAL
);
1375 /* MSG_EOR not supported by any protocol */
1376 if (flags
& MSG_EOR
)
1378 set_errno (EOPNOTSUPP
);
1379 return SOCKET_ERROR
;
1382 flags
&= (MSG_OOB
| MSG_DONTROUTE
);
1383 if (wsamsg
->Control
.len
> 0)
1385 /* Workaround for MSDN KB 823764: Split a message into chunks <= SO_SNDBUF.
1386 in_idx is the index of the current lpBuffers from the input wsamsg buffer.
1387 in_off is used to keep track of the next byte to write from a wsamsg
1388 buffer which only gets partially written. */
1389 for (DWORD in_idx
= 0, in_off
= 0;
1390 in_idx
< wsamsg
->dwBufferCount
;
1391 in_off
>= wsamsg
->lpBuffers
[in_idx
].len
&& (++in_idx
, (in_off
= 0)))
1393 /* Split a message into the least number of pieces to minimize the
1394 number of WsaSendTo calls. Don't split datagram messages (bad idea).
1395 out_idx is the index of the next buffer in the out_buf WSABUF,
1396 also the number of buffers given to WSASendTo.
1397 out_len is the number of bytes in the buffers given to WSASendTo.
1398 Don't split datagram messages (very bad idea). */
1401 if (get_socket_type () == SOCK_STREAM
)
1405 out_buf
[out_idx
].buf
= wsamsg
->lpBuffers
[in_idx
].buf
+ in_off
;
1406 out_buf
[out_idx
].len
= wsamsg
->lpBuffers
[in_idx
].len
- in_off
;
1407 out_len
+= out_buf
[out_idx
].len
;
1410 while (out_len
< (unsigned) wmem ()
1411 && (in_off
= 0, ++in_idx
< wsamsg
->dwBufferCount
));
1412 /* Tweak len of the last out_buf buffer so the entire number of bytes
1413 is (less than or) equal to wmem (). Fix out_len as well since it's
1414 used in a subsequent test expression. */
1415 if (out_len
> (unsigned) wmem ())
1417 out_buf
[out_idx
- 1].len
-= out_len
- (unsigned) wmem ();
1418 out_len
= (unsigned) wmem ();
1420 /* Add the bytes written from the current last buffer to in_off,
1421 so in_off points to the next byte to be written from that buffer,
1422 or beyond which lets the outper loop skip to the next buffer. */
1423 in_off
+= out_buf
[out_idx
- 1].len
;
1429 res
= WSASendMsg (get_socket (), wsamsg
, flags
, &ret
, NULL
, NULL
);
1430 else if (get_socket_type () == SOCK_STREAM
)
1431 res
= WSASendTo (get_socket (), out_buf
, out_idx
, &ret
, flags
,
1432 wsamsg
->name
, wsamsg
->namelen
, NULL
, NULL
);
1434 res
= WSASendTo (get_socket (), wsamsg
->lpBuffers
,
1435 wsamsg
->dwBufferCount
, &ret
, flags
,
1436 wsamsg
->name
, wsamsg
->namelen
, NULL
, NULL
);
1437 if (res
&& (WSAGetLastError () == WSAEWOULDBLOCK
))
1440 wsock_events
->events
&= ~FD_WRITE
;
1444 while (res
&& (WSAGetLastError () == WSAEWOULDBLOCK
)
1445 && !(res
= wait_for_events (FD_WRITE
| FD_CLOSE
, wait_flags
)));
1450 /* For streams, return to application if the number of bytes written
1451 is less than the number of bytes we intended to write in a single
1452 call to WSASendTo. Otherwise we would have to add code to
1453 backtrack in the input buffers, which is questionable. There was
1454 probably a good reason we couldn't write more. */
1455 if (get_socket_type () != SOCK_STREAM
|| ret
< out_len
)
1458 else if (is_nonblocking () || WSAGetLastError() != WSAEWOULDBLOCK
)
1464 else if (res
== SOCKET_ERROR
)
1466 set_winsock_errno ();
1468 /* Special handling for EPIPE and SIGPIPE.
1470 EPIPE is generated if the local end has been shut down on a connection
1471 oriented socket. In this case the process will also receive a SIGPIPE
1472 unless MSG_NOSIGNAL is set. */
1473 if ((get_errno () == ECONNABORTED
|| get_errno () == ESHUTDOWN
)
1474 && get_socket_type () == SOCK_STREAM
)
1486 fhandler_socket_inet::sendto (const void *in_ptr
, size_t len
, int flags
,
1487 const struct sockaddr
*to
, int tolen
)
1489 char *ptr
= (char *) in_ptr
;
1490 struct sockaddr_storage sst
;
1492 if (to
&& get_inet_addr_inet (to
, tolen
, &sst
, &tolen
) == SOCKET_ERROR
)
1493 return SOCKET_ERROR
;
1495 /* size_t is 64 bit, but the len member in WSABUF is 32 bit.
1496 Split buffer if necessary. */
1497 DWORD bufcnt
= len
/ UINT32_MAX
+ ((!len
|| (len
% UINT32_MAX
)) ? 1 : 0);
1498 WSABUF wsabuf
[bufcnt
];
1499 WSAMSG wsamsg
= { to
? (struct sockaddr
*) &sst
: NULL
, tolen
,
1503 /* Don't use len as loop condition, it could be 0. */
1504 for (WSABUF
*wsaptr
= wsabuf
; bufcnt
--; ++wsaptr
)
1506 wsaptr
->len
= MIN (len
, UINT32_MAX
);
1511 return send_internal (&wsamsg
, flags
);
1515 fhandler_socket_inet::sendmsg (const struct msghdr
*in_msg
, int flags
)
1517 struct sockaddr_storage sst
;
1519 DWORD old_gso_size
= MAXDWORD
;
1522 /* Copy incoming msghdr into a local copy. We only access this from
1523 here on. Thus, make sure not to manipulate user space data. */
1524 struct msghdr local_msg
= *in_msg
;
1525 struct msghdr
*msg
= &local_msg
;
1528 && get_inet_addr_inet ((struct sockaddr
*) msg
->msg_name
,
1529 msg
->msg_namelen
, &sst
, &len
) == SOCKET_ERROR
)
1530 return SOCKET_ERROR
;
1532 /* Check for our optmem_max value */
1533 if (msg
->msg_controllen
> NT_MAX_PATH
)
1535 set_errno (ENOBUFS
);
1536 return SOCKET_ERROR
;
1539 /* WSASendMsg is supported only for datagram and raw sockets. */
1540 if (get_socket_type () != SOCK_DGRAM
&& get_socket_type () != SOCK_RAW
)
1541 msg
->msg_controllen
= 0;
1543 /* If we actually have control data, copy it to local storage. Control
1544 messages only handled by us have to be dropped from the msg_control
1545 block, and we don't want to change user space data. */
1547 if (msg
->msg_controllen
)
1549 void *local_cmsg
= tp
.c_get ();
1550 memcpy (local_cmsg
, msg
->msg_control
, msg
->msg_controllen
);
1551 msg
->msg_control
= local_cmsg
;
1554 /* Check for control message we handle inside Cygwin. Right now this
1555 only affects UDP sockets, so check here early. */
1556 if (msg
->msg_controllen
&& get_socket_type () == SOCK_DGRAM
)
1558 struct cmsghdr
*cmsg
;
1559 bool dropped
= false;
1561 for (cmsg
= CMSG_FIRSTHDR (msg
);
1563 cmsg
= dropped
? cmsg
: CMSG_NXTHDR (msg
, cmsg
))
1566 /* cmsg within bounds? */
1567 if (cmsg
->cmsg_len
< sizeof (struct cmsghdr
)
1568 || cmsg
->cmsg_len
> (size_t) msg
->msg_controllen
1570 - (uintptr_t) msg
->msg_control
))
1573 return SOCKET_ERROR
;
1575 /* UDP_SEGMENT? Override gso_size for this single sendmsg. */
1576 if (cmsg
->cmsg_level
== SOL_UDP
&& cmsg
->cmsg_type
== UDP_SEGMENT
)
1578 /* 16 bit unsigned, as on Linux */
1579 DWORD gso_size
= *(uint16_t *) CMSG_DATA(cmsg
);
1580 int size
= sizeof old_gso_size
;
1581 /* Save the old gso_size and set the requested one. */
1582 if (::getsockopt (get_socket (), IPPROTO_UDP
, UDP_SEGMENT
,
1583 (char *) &old_gso_size
, &size
) == SOCKET_ERROR
1584 || ::setsockopt (get_socket (), IPPROTO_UDP
, UDP_SEGMENT
,
1585 (char *) &gso_size
, sizeof gso_size
)
1588 set_winsock_errno ();
1589 return SOCKET_ERROR
;
1591 /* Drop message from msgbuf, Windows doesn't know it. */
1592 size_t cmsg_size
= CMSG_ALIGN (cmsg
->cmsg_len
);
1593 struct cmsghdr
*cmsg_next
= CMSG_NXTHDR (msg
, cmsg
);
1595 memmove (cmsg
, cmsg_next
, (char *) msg
->msg_control
1596 + msg
->msg_controllen
1597 - (char *) cmsg_next
);
1598 msg
->msg_controllen
-= cmsg_size
;
1600 /* Avoid infinite loop */
1601 if (msg
->msg_controllen
<= 0)
1604 msg
->msg_controllen
= 0;
1610 /* Copy over msg_iov into an equivalent WSABUF array. */
1611 WSABUF wsabuf
[msg
->msg_iovlen
];
1612 WSABUF
*wsaptr
= wsabuf
;
1613 const struct iovec
*iovptr
= msg
->msg_iov
;
1614 for (int i
= 0; i
< msg
->msg_iovlen
; ++i
)
1616 wsaptr
->len
= iovptr
->iov_len
;
1617 (wsaptr
++)->buf
= (char *) (iovptr
++)->iov_base
;
1620 /* Eventually copy over to a WSAMSG and call send_internal with that. */
1621 WSAMSG wsamsg
= { msg
->msg_name
? (struct sockaddr
*) &sst
: NULL
, len
,
1622 wsabuf
, (DWORD
) msg
->msg_iovlen
,
1623 { (DWORD
) msg
->msg_controllen
,
1624 msg
->msg_controllen
? (char *) msg
->msg_control
: NULL
},
1626 ret
= send_internal (&wsamsg
, flags
);
1627 if (old_gso_size
!= MAXDWORD
)
1628 ::setsockopt (get_socket (), IPPROTO_UDP
, UDP_SEGMENT
,
1629 (char *) &old_gso_size
, sizeof old_gso_size
);
1634 fhandler_socket_wsock::write (const void *in_ptr
, size_t len
)
1636 char *ptr
= (char *) in_ptr
;
1638 /* size_t is 64 bit, but the len member in WSABUF is 32 bit.
1639 Split buffer if necessary. */
1640 DWORD bufcnt
= len
/ UINT32_MAX
+ ((!len
|| (len
% UINT32_MAX
)) ? 1 : 0);
1641 WSABUF wsabuf
[bufcnt
];
1642 WSAMSG wsamsg
= { NULL
, 0, wsabuf
, bufcnt
, { 0, NULL
}, 0 };
1643 /* Don't use len as loop condition, it could be 0. */
1644 for (WSABUF
*wsaptr
= wsabuf
; bufcnt
--; ++wsaptr
)
1646 wsaptr
->len
= MIN (len
, UINT32_MAX
);
1651 return send_internal (&wsamsg
, 0);
1655 fhandler_socket_wsock::writev (const struct iovec
*const iov
, const int iovcnt
,
1658 WSABUF wsabuf
[iovcnt
];
1659 WSABUF
*wsaptr
= wsabuf
;
1660 const struct iovec
*iovptr
= iov
;
1661 for (int i
= 0; i
< iovcnt
; ++i
)
1663 wsaptr
->len
= iovptr
->iov_len
;
1664 (wsaptr
++)->buf
= (char *) (iovptr
++)->iov_base
;
1666 WSAMSG wsamsg
= { NULL
, 0, wsabuf
, (DWORD
) iovcnt
, { 0, NULL
}, 0 };
1667 return send_internal (&wsamsg
, 0);
1670 #define TCP_MAXRT 5 /* Older systems don't support TCP_MAXRTMS
1671 TCP_MAXRT takes secs, not msecs. */
1673 #ifndef SIO_TCP_SET_ACK_FREQUENCY
1674 #define SIO_TCP_SET_ACK_FREQUENCY _WSAIOW(IOC_VENDOR,23)
1677 #define MAX_TCP_KEEPIDLE 32767
1678 #define MAX_TCP_KEEPCNT 255
1679 #define MAX_TCP_KEEPINTVL 32767
1681 #define FIXED_WSOCK_TCP_KEEPCNT 10
1684 fhandler_socket_inet::set_keepalive (int keepidle
, int keepcnt
, int keepintvl
)
1686 struct tcp_keepalive tka
;
1687 int so_keepalive
= 0;
1688 int len
= sizeof so_keepalive
;
1693 https://docs.microsoft.com/en-us/windows/win32/winsock/sio-keepalive-vals
1694 the subsequent keep-alive settings in struct tcp_keepalive are only used
1695 if the onoff member is != 0. Request the current state of SO_KEEPALIVE,
1696 then set the keep-alive options with onoff set to 1. On success, if
1697 SO_KEEPALIVE was 0, restore to the original SO_KEEPALIVE setting. Per
1698 the above MSDN doc, the SIO_KEEPALIVE_VALS settings are persistent
1699 across switching SO_KEEPALIVE. */
1700 ret
= ::getsockopt (get_socket (), SOL_SOCKET
, SO_KEEPALIVE
,
1701 (char *) &so_keepalive
, &len
);
1702 if (ret
== SOCKET_ERROR
)
1703 debug_printf ("getsockopt (SO_KEEPALIVE) failed, %u\n", WSAGetLastError ());
1705 tka
.keepalivetime
= keepidle
* MSPERSEC
;
1706 /* WinSock TCP_KEEPCNT is fixed. But we still want that the keep-alive
1707 times out after TCP_KEEPIDLE + TCP_KEEPCNT * TCP_KEEPINTVL secs.
1708 To that end, we set keepaliveinterval so that
1710 keepaliveinterval * FIXED_WSOCK_TCP_KEEPCNT == TCP_KEEPINTVL * TCP_KEEPCNT
1712 FIXME? Does that make sense?
1714 Sidenote: Given the max values, the entire operation fits into an int. */
1715 tka
.keepaliveinterval
= MSPERSEC
/ FIXED_WSOCK_TCP_KEEPCNT
* keepcnt
1717 if (WSAIoctl (get_socket (), SIO_KEEPALIVE_VALS
, (LPVOID
) &tka
, sizeof tka
,
1718 NULL
, 0, &dummy
, NULL
, NULL
) == SOCKET_ERROR
)
1720 set_winsock_errno ();
1725 ret
= ::setsockopt (get_socket (), SOL_SOCKET
, SO_KEEPALIVE
,
1726 (const char *) &so_keepalive
, sizeof so_keepalive
);
1727 if (ret
== SOCKET_ERROR
)
1728 debug_printf ("setsockopt (SO_KEEPALIVE) failed, %u\n",
1729 WSAGetLastError ());
1735 fhandler_socket_inet::setsockopt (int level
, int optname
, const void *optval
,
1738 bool ignore
= false;
1740 unsigned int winsock_val
;
1742 /* Preprocessing setsockopt. Set ignore to true if setsockopt call should
1743 get skipped entirely. */
1750 set_errno (ENOPROTOOPT
);
1754 /* Per POSIX we must not be able to reuse a complete duplicate of a
1755 local TCP address (same IP, same port), even if SO_REUSEADDR has
1756 been set. This behaviour is maintained in WinSock for backward
1757 compatibility, while the WinSock standard behaviour of stream
1758 socket binding is equivalent to the POSIX behaviour as if
1759 SO_REUSEADDR has been set. The SO_EXCLUSIVEADDRUSE option has
1760 been added to allow an application to request POSIX standard
1761 behaviour in the non-SO_REUSEADDR case.
1763 To emulate POSIX socket binding behaviour, note that SO_REUSEADDR
1764 has been set but don't call setsockopt. Instead
1765 fhandler_socket::bind sets SO_EXCLUSIVEADDRUSE if the application
1766 did not set SO_REUSEADDR. */
1767 if (optlen
< (socklen_t
) sizeof (int))
1772 if (get_socket_type () == SOCK_STREAM
)
1778 if (optlen
< (socklen_t
) sizeof (struct timeval
))
1783 if (timeval_to_ms ((struct timeval
*) optval
,
1784 (optname
== SO_RCVTIMEO
) ? rcvtimeo ()
1792 /* Inline mode for out-of-band (OOB) data of winsock is
1793 completely broken. That is, SIOCATMARK always returns
1794 TRUE in inline mode. Due to this problem, application
1795 cannot determine OOB data at all. Therefore the behavior
1796 of a socket with SO_OOBINLINE set is simulated using
1797 a socket with SO_OOBINLINE not set. In this fake inline
1798 mode, the order of the OOB and non-OOB data is not
1799 preserved. OOB data is read before non-OOB data sent
1800 prior to the OOB data. However, this most likely is
1801 not a problem in most cases. */
1802 /* Here, instead of actually setting inline mode, simply
1803 set the variable oobinline. */
1804 oobinline
= *(int *) optval
? true : false;
1817 /* Winsock doesn't support setting the IP_TOS field with setsockopt
1818 and TOS was never implemented for TCP anyway. setsockopt returns
1819 WinSock error 10022, WSAEINVAL when trying to set the IP_TOS
1820 field. We just return 0 instead. */
1843 /* Check for stream socket early on, so we don't have to do this for
1844 every option. Also, WinSock returns EINVAL. */
1845 if (type
!= SOCK_STREAM
)
1847 set_errno (EOPNOTSUPP
);
1854 /* Winsock doesn't support setting TCP_MAXSEG, only requesting it
1855 via getsockopt. Make this a no-op. */
1860 /* Various sources on the net claim that TCP_QUICKACK is supported
1861 by Windows, even using the same optname value of 12. However,
1862 the ws2ipdef.h header calls this option TCP_CONGESTION_ALGORITHM
1863 and there's no official statement, nor official documentation
1864 confirming or denying this option is equivalent to Linux'
1865 TCP_QUICKACK. Also, weirdly, this option takes values from 0..7.
1867 There is another undocumented option to WSAIoctl called
1868 SIO_TCP_SET_ACK_FREQUENCY which is already used by some
1869 projects, so we're going to use it here, too, for now.
1871 There's an open issue in the dotnet github,
1872 https://github.com/dotnet/runtime/issues/798
1873 Hopefully this clarifies the situation in the not too distant
1877 /* https://stackoverflow.com/questions/55034112/c-disable-delayed-ack-on-windows
1878 claims that valid values for SIO_TCP_SET_ACK_FREQUENCY are
1879 1..255. In contrast to that, my own testing shows that
1880 valid values are 0 and 1 exclusively. */
1881 int freq
= !!*(int *) optval
;
1882 if (WSAIoctl (get_socket (), SIO_TCP_SET_ACK_FREQUENCY
, &freq
,
1883 sizeof freq
, NULL
, 0, &dummy
, NULL
, NULL
)
1886 set_winsock_errno ();
1890 tcp_quickack
= freq
? true : false;
1895 /* Don't let this option slip through from user space. */
1896 set_errno (EOPNOTSUPP
);
1899 case TCP_USER_TIMEOUT
:
1900 if (!wincap
.has_tcp_maxrtms ())
1902 /* convert msecs to secs. Values < 1000 ms are converted to
1903 0 secs, just as in WinSock. */
1904 winsock_val
= *(unsigned int *) optval
/ MSPERSEC
;
1905 optname
= TCP_MAXRT
;
1906 optval
= (const void *) &winsock_val
;
1911 /* Fake FastOpen on older systems. */
1912 if (!wincap
.has_tcp_fastopen ())
1915 tcp_fastopen
= *(int *) optval
? true : false;
1920 /* Handle TCP_KEEPIDLE on older systems. */
1921 if (!wincap
.has_linux_tcp_keepalive_sockopts ())
1923 if (*(int *) optval
< 1 || *(int *) optval
> MAX_TCP_KEEPIDLE
)
1928 if (set_keepalive (*(int *) optval
, tcp_keepcnt
, tcp_keepintvl
))
1931 tcp_keepidle
= *(int *) optval
;
1936 /* Fake TCP_KEEPCNT on older systems. */
1937 if (!wincap
.has_linux_tcp_keepalive_sockopts ())
1939 if (*(int *) optval
< 1 || *(int *) optval
> MAX_TCP_KEEPCNT
)
1944 if (set_keepalive (tcp_keepidle
, *(int *) optval
, tcp_keepintvl
))
1947 tcp_keepcnt
= *(int *) optval
;
1952 /* Handle TCP_KEEPINTVL on older systems. */
1953 if (!wincap
.has_linux_tcp_keepalive_sockopts ())
1955 if (*(int *) optval
< 1 || *(int *) optval
> MAX_TCP_KEEPINTVL
)
1960 if (set_keepalive (tcp_keepidle
, tcp_keepcnt
, *(int *) optval
))
1963 tcp_keepintvl
= *(int *) optval
;
1973 /* Check for dgram socket early on, so we don't have to do this for
1974 every option. Also, WinSock returns EINVAL. */
1975 if (type
!= SOCK_DGRAM
)
1977 set_errno (EOPNOTSUPP
);
1980 if (optlen
< (socklen_t
) sizeof (int))
1988 if (*(int *) optval
< 0 || *(int *) optval
> USHRT_MAX
)
1996 /* In contrast to Windows' UDP_RECV_MAX_COALESCED_SIZE option,
1997 Linux' UDP_GRO option is just a bool. The max. packet size
1998 is dynamically evaluated from the MRU. There's no easy,
1999 reliable way to get the MRU. We assume that this is what Windows
2000 will do internally anyway and, given UDP_RECV_MAX_COALESCED_SIZE
2001 defines a *maximum* size for aggregated packages, we just choose
2002 the maximum sensible value. FIXME? IP_MTU_DISCOVER / IP_MTU */
2003 winsock_val
= *(int *) optval
? USHRT_MAX
: 0;
2004 optval
= &winsock_val
;
2016 /* Call Winsock setsockopt (or not) */
2021 ret
= ::setsockopt (get_socket (), level
, optname
, (const char *) optval
,
2023 if (ret
== SOCKET_ERROR
)
2025 set_winsock_errno ();
2030 if (optlen
== (socklen_t
) sizeof (int))
2031 debug_printf ("setsockopt optval=%x", *(int *) optval
);
2033 /* Postprocessing setsockopt, setting fhandler_socket members, etc. */
2040 saw_reuseaddr (*(int *) optval
);
2044 rmem (*(int *) optval
);
2048 wmem (*(int *) optval
);
2064 fhandler_socket_inet::getsockopt (int level
, int optname
, const void *optval
,
2067 bool onebyte
= false;
2070 /* Preprocessing getsockopt. */
2077 set_errno (ENOPROTOOPT
);
2082 unsigned int *reuseaddr
= (unsigned int *) optval
;
2084 if (*optlen
< (socklen_t
) sizeof *reuseaddr
)
2089 *reuseaddr
= saw_reuseaddr();
2090 *optlen
= (socklen_t
) sizeof *reuseaddr
;
2097 struct timeval
*time_out
= (struct timeval
*) optval
;
2099 if (*optlen
< (socklen_t
) sizeof *time_out
)
2104 DWORD ms
= (optname
== SO_RCVTIMEO
) ? rcvtimeo () : sndtimeo ();
2105 if (ms
== 0 || ms
== INFINITE
)
2107 time_out
->tv_sec
= 0;
2108 time_out
->tv_usec
= 0;
2112 time_out
->tv_sec
= ms
/ MSPERSEC
;
2113 time_out
->tv_usec
= ((ms
% MSPERSEC
) * USPERSEC
) / MSPERSEC
;
2115 *optlen
= (socklen_t
) sizeof *time_out
;
2121 unsigned int *type
= (unsigned int *) optval
;
2122 *type
= get_socket_type ();
2123 *optlen
= (socklen_t
) sizeof *type
;
2128 *(int *) optval
= oobinline
? 1 : 0;
2140 /* Check for stream socket early on, so we don't have to do this for
2141 every option. Also, WinSock returns EINVAL. */
2142 if (type
!= SOCK_STREAM
)
2144 set_errno (EOPNOTSUPP
);
2151 *(int *) optval
= tcp_quickack
? 1 : 0;
2152 *optlen
= sizeof (int);
2156 /* Don't let this option slip through from user space. */
2157 set_errno (EOPNOTSUPP
);
2160 case TCP_USER_TIMEOUT
:
2161 /* Older systems don't support TCP_MAXRTMS, just call TCP_MAXRT. */
2162 if (!wincap
.has_tcp_maxrtms ())
2163 optname
= TCP_MAXRT
;
2167 /* Fake FastOpen on older systems */
2168 if (!wincap
.has_tcp_fastopen ())
2170 *(int *) optval
= tcp_fastopen
? 1 : 0;
2171 *optlen
= sizeof (int);
2177 /* Use stored value on older systems */
2178 if (!wincap
.has_linux_tcp_keepalive_sockopts ())
2180 *(int *) optval
= tcp_keepidle
;
2181 *optlen
= sizeof (int);
2187 /* Use stored value on older systems */
2188 if (!wincap
.has_linux_tcp_keepalive_sockopts ())
2190 *(int *) optval
= tcp_keepcnt
;
2191 *optlen
= sizeof (int);
2197 /* Use stored value on older systems */
2198 if (!wincap
.has_linux_tcp_keepalive_sockopts ())
2200 *(int *) optval
= tcp_keepintvl
;
2201 *optlen
= sizeof (int);
2212 /* Check for dgram socket early on, so we don't have to do this for
2213 every option. Also, WinSock returns EINVAL. */
2214 if (type
!= SOCK_DGRAM
)
2216 set_errno (EOPNOTSUPP
);
2225 /* Call Winsock getsockopt */
2226 ret
= ::getsockopt (get_socket (), level
, optname
, (char *) optval
,
2228 if (ret
== SOCKET_ERROR
)
2230 set_winsock_errno ();
2234 /* Postprocessing getsockopt, setting fhandler_socket members, etc. Set
2235 onebyte true for options returning BOOLEAN instead of a boolean DWORD. */
2243 int *e
= (int *) optval
;
2244 debug_printf ("WinSock SO_ERROR = %d", *e
);
2245 *e
= find_winsock_errno (*e
);
2266 case TCP_MAXRT
: /* After above conversion from TCP_USER_TIMEOUT */
2267 /* convert secs to msecs */
2268 *(unsigned int *) optval
*= MSPERSEC
;
2284 /* Convert to bool option */
2285 *(unsigned int *) optval
= *(unsigned int *) optval
? 1 : 0;
2299 /* Regression in 6.0 kernel and later: instead of a 4 byte BOOL value, a
2300 1 byte BOOLEAN value is returned, in contrast to older systems and
2301 the documentation. Since an int type is expected by the calling
2302 application, we convert the result here. */
2303 BOOLEAN
*in
= (BOOLEAN
*) optval
;
2304 int *out
= (int *) optval
;
2313 fhandler_socket_wsock::ioctl (unsigned int cmd
, void *p
)
2319 /* Here we handle only ioctl commands which are understood by Winsock.
2320 However, we have a problem, which is, the different size of u_long
2321 in Windows and 64 bit Cygwin. This affects the definitions of
2322 FIOASYNC, etc, because they are defined in terms of sizeof(u_long).
2323 So we have to use case labels which are independent of the sizeof
2324 u_long. Since we're redefining u_long at the start of this file to
2325 matching Winsock's idea of u_long, we can use the real definitions in
2326 calls to Windows. In theory we also have to make sure to convert the
2327 different ideas of u_long between the application and Winsock, but
2328 fortunately, the parameters defined as u_long pointers are on Linux
2329 and BSD systems defined as int pointer, so the applications will
2330 use a type of the expected size. Hopefully. */
2332 case _IOW('f', 125, u_long
):
2333 res
= WSAAsyncSelect (get_socket (), winmsg
, WM_ASYNCIO
,
2334 *(int *) p
? ASYNC_MASK
: 0);
2335 syscall_printf ("Async I/O on socket %s",
2336 *(int *) p
? "started" : "cancelled");
2337 async_io (*(int *) p
!= 0);
2338 /* If async_io is switched off, revert the event handling. */
2339 if (*(int *) p
== 0)
2340 WSAEventSelect (get_socket (), wsock_evt
, EVENT_MASK
);
2343 case _IOR('f', 127, u_long
):
2344 /* Make sure to use the Winsock definition of FIONREAD. */
2345 res
= ::ioctlsocket (get_socket (), _IOR('f', 127, u_long
), (u_long
*) p
);
2346 if (res
== SOCKET_ERROR
)
2347 set_winsock_errno ();
2351 /* Sockets are always non-blocking internally. So we just note the
2353 /* Convert the different idea of u_long in the definition of cmd. */
2354 if (((cmd
>> 16) & IOCPARM_MASK
) == sizeof (unsigned long))
2355 cmd
= (cmd
& ~(IOCPARM_MASK
<< 16)) | (sizeof (u_long
) << 16);
2358 syscall_printf ("socket is now %sblocking",
2359 *(int *) p
? "non" : "");
2360 set_nonblocking (*(int *) p
);
2364 res
= ::ioctlsocket (get_socket (), cmd
, (u_long
*) p
);
2365 /* In winsock, the return value of SIOCATMARK is FALSE if
2366 OOB data exists, TRUE otherwise. This is almost opposite
2368 /* SIOCATMARK = _IOR('s',7,u_long) */
2369 if (cmd
== _IOR('s',7,u_long
) && !res
)
2370 *(u_long
*)p
= !*(u_long
*)p
;
2373 res
= fhandler_socket::ioctl (cmd
, p
);
2376 syscall_printf ("%d = ioctl_socket(%x, %p)", res
, cmd
, p
);
2381 fhandler_socket_wsock::fcntl (int cmd
, intptr_t arg
)
2389 pid_t pid
= (pid_t
) arg
;
2391 wsock_events
->owner
= pid
;
2393 debug_printf ("owner set to %d", pid
);
2397 res
= wsock_events
->owner
;
2400 res
= fhandler_socket::fcntl (cmd
, arg
);