Cygwin: strptime: add release note
[newlib-cygwin.git] / winsup / cygwin / fhandler / socket_inet.cc
blob63cc498f17b6871f98fffac1bcbdd0197e5f2cfd
1 /* fhandler_socket_inet.cc.
3 See fhandler.h for a description of the fhandler classes.
5 This file is part of Cygwin.
7 This software is a copyrighted work licensed under the terms of the
8 Cygwin license. Please consult the file "CYGWIN_LICENSE" for
9 details. */
11 #define __INSIDE_CYGWIN_NET__
12 #define USE_SYS_TYPES_FD_SET
14 #include "winsup.h"
15 /* 2014-04-24: Current Mingw headers define sockaddr_in6 using u_long (8 byte)
16 because a redefinition for LP64 systems is missing. This leads to a wrong
17 definition and size of sockaddr_in6 when building with winsock headers.
18 This definition is also required to use the right u_long type in subsequent
19 function calls. */
20 #undef u_long
21 #define u_long __ms_u_long
22 #include <w32api/ws2tcpip.h>
23 #include <w32api/mswsock.h>
24 #include <w32api/mstcpip.h>
25 #include <netinet/tcp.h>
26 #include <netinet/udp.h>
27 #include <unistd.h>
28 #include <asm/byteorder.h>
29 #include <sys/socket.h>
30 #include <sys/param.h>
31 #include <sys/statvfs.h>
32 #include <cygwin/acl.h>
33 #include "cygerrno.h"
34 #include "path.h"
35 #include "fhandler.h"
36 #include "dtable.h"
37 #include "cygheap.h"
38 #include "shared_info.h"
39 #include "wininfo.h"
40 #include "tls_pbuf.h"
42 #define ASYNC_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT)
43 #define EVENT_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT|FD_CLOSE)
45 #define LOCK_EVENTS \
46 if (wsock_mtx && \
47 WaitForSingleObject (wsock_mtx, INFINITE) != WAIT_FAILED) \
50 #define UNLOCK_EVENTS \
51 ReleaseMutex (wsock_mtx); \
54 /* Maximum number of concurrently opened sockets from all Cygwin processes
55 per session. Note that shared sockets (through dup/fork/exec) are
56 counted as one socket. */
57 #define NUM_SOCKS 2048U
59 #define LOCK_EVENTS \
60 if (wsock_mtx && \
61 WaitForSingleObject (wsock_mtx, INFINITE) != WAIT_FAILED) \
64 #define UNLOCK_EVENTS \
65 ReleaseMutex (wsock_mtx); \
68 static wsa_event wsa_events[NUM_SOCKS] __attribute__((section (".cygwin_dll_common"), shared));
70 static LONG socket_serial_number __attribute__((section (".cygwin_dll_common"), shared));
72 static HANDLE wsa_slot_mtx;
74 static PWCHAR
75 sock_shared_name (PWCHAR buf, LONG num)
77 __small_swprintf (buf, L"socket.%d", num);
78 return buf;
81 static wsa_event *
82 search_wsa_event_slot (LONG new_serial_number)
84 WCHAR name[32], searchname[32];
85 UNICODE_STRING uname;
86 OBJECT_ATTRIBUTES attr;
87 NTSTATUS status;
89 if (!wsa_slot_mtx)
91 RtlInitUnicodeString (&uname, sock_shared_name (name, 0));
92 InitializeObjectAttributes (&attr, &uname, OBJ_INHERIT | OBJ_OPENIF,
93 get_session_parent_dir (),
94 everyone_sd (CYG_MUTANT_ACCESS));
95 status = NtCreateMutant (&wsa_slot_mtx, CYG_MUTANT_ACCESS, &attr, FALSE);
96 if (!NT_SUCCESS (status))
97 api_fatal ("Couldn't create/open shared socket mutex %S, %y",
98 &uname, status);
100 switch (WaitForSingleObject (wsa_slot_mtx, INFINITE))
102 case WAIT_OBJECT_0:
103 case WAIT_ABANDONED:
104 break;
105 default:
106 api_fatal ("WFSO failed for shared socket mutex, %E");
107 break;
109 unsigned int slot = new_serial_number % NUM_SOCKS;
110 while (wsa_events[slot].serial_number)
112 HANDLE searchmtx;
113 RtlInitUnicodeString (&uname, sock_shared_name (searchname,
114 wsa_events[slot].serial_number));
115 InitializeObjectAttributes (&attr, &uname, 0, get_session_parent_dir (),
116 NULL);
117 status = NtOpenMutant (&searchmtx, READ_CONTROL, &attr);
118 if (!NT_SUCCESS (status))
119 break;
120 /* Mutex still exists, attached socket is active, try next slot. */
121 NtClose (searchmtx);
122 slot = (slot + 1) % NUM_SOCKS;
123 if (slot == (new_serial_number % NUM_SOCKS))
125 /* Did the whole array once. Too bad. */
126 debug_printf ("No free socket slot");
127 ReleaseMutex (wsa_slot_mtx);
128 return NULL;
131 memset (&wsa_events[slot], 0, sizeof (wsa_event));
132 wsa_events[slot].serial_number = new_serial_number;
133 ReleaseMutex (wsa_slot_mtx);
134 return wsa_events + slot;
137 /* cygwin internal: map sockaddr into internet domain address */
138 static int
139 get_inet_addr_inet (const struct sockaddr *in, int inlen,
140 struct sockaddr_storage *out, int *outlen)
142 switch (in->sa_family)
144 case AF_INET:
145 memcpy (out, in, inlen);
146 *outlen = inlen;
147 /* If the peer address given in connect or sendto is the ANY address,
148 Winsock fails with WSAEADDRNOTAVAIL, while Linux converts that into
149 a connection/send attempt to LOOPBACK. We're doing the same here. */
150 if (((struct sockaddr_in *) out)->sin_addr.s_addr == htonl (INADDR_ANY))
151 ((struct sockaddr_in *) out)->sin_addr.s_addr = htonl (INADDR_LOOPBACK);
152 return 0;
153 case AF_INET6:
154 memcpy (out, in, inlen);
155 *outlen = inlen;
156 /* See comment in AF_INET case. */
157 if (IN6_IS_ADDR_UNSPECIFIED (&((struct sockaddr_in6 *) out)->sin6_addr))
158 ((struct sockaddr_in6 *) out)->sin6_addr = in6addr_loopback;
159 return 0;
160 default:
161 set_errno (EAFNOSUPPORT);
162 return SOCKET_ERROR;
166 /* There's no DLL which exports the symbol WSARecvMsg. One has to call
167 WSAIoctl as below to fetch the function pointer. Why on earth did the
168 MS developers decide not to export a normal symbol for these extension
169 functions? */
170 inline int
171 get_ext_funcptr (SOCKET sock, void *funcptr)
173 DWORD bret;
174 const GUID guid = WSAID_WSARECVMSG;
175 return WSAIoctl (sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
176 (void *) &guid, sizeof (GUID), funcptr, sizeof (void *),
177 &bret, NULL, NULL);
180 fhandler_socket_wsock::fhandler_socket_wsock () :
181 fhandler_socket (),
182 wsock_events (NULL),
183 wsock_mtx (NULL),
184 wsock_evt (NULL),
185 status (),
186 prot_info_ptr (NULL)
188 need_fork_fixup (true);
191 fhandler_socket_wsock::~fhandler_socket_wsock ()
193 if (prot_info_ptr)
194 cfree (prot_info_ptr);
197 bool
198 fhandler_socket_wsock::init_events ()
200 LONG new_serial_number;
201 WCHAR name[32];
202 UNICODE_STRING uname;
203 OBJECT_ATTRIBUTES attr;
204 NTSTATUS status;
208 new_serial_number =
209 InterlockedIncrement (&socket_serial_number);
210 if (!new_serial_number) /* 0 is reserved for global mutex */
211 InterlockedIncrement (&socket_serial_number);
212 set_ino (new_serial_number);
213 RtlInitUnicodeString (&uname, sock_shared_name (name, new_serial_number));
214 InitializeObjectAttributes (&attr, &uname, OBJ_INHERIT | OBJ_OPENIF,
215 get_session_parent_dir (),
216 everyone_sd (CYG_MUTANT_ACCESS));
217 status = NtCreateMutant (&wsock_mtx, CYG_MUTANT_ACCESS, &attr, FALSE);
218 if (!NT_SUCCESS (status))
220 debug_printf ("NtCreateMutant(%S), %y", &uname, status);
221 set_errno (ENOBUFS);
222 return false;
224 if (status == STATUS_OBJECT_NAME_EXISTS)
225 NtClose (wsock_mtx);
227 while (status == STATUS_OBJECT_NAME_EXISTS);
228 if ((wsock_evt = CreateEvent (&sec_all, TRUE, FALSE, NULL))
229 == WSA_INVALID_EVENT)
231 debug_printf ("CreateEvent, %E");
232 set_errno (ENOBUFS);
233 NtClose (wsock_mtx);
234 return false;
236 if (WSAEventSelect (get_socket (), wsock_evt, EVENT_MASK) == SOCKET_ERROR)
238 debug_printf ("WSAEventSelect, %E");
239 set_winsock_errno ();
240 NtClose (wsock_evt);
241 NtClose (wsock_mtx);
242 return false;
244 if (!(wsock_events = search_wsa_event_slot (new_serial_number)))
246 set_errno (ENOBUFS);
247 NtClose (wsock_evt);
248 NtClose (wsock_mtx);
249 return false;
251 if (get_socket_type () == SOCK_DGRAM)
252 wsock_events->events = FD_WRITE;
253 return true;
257 fhandler_socket_wsock::evaluate_events (const long event_mask, long &events,
258 const bool erase)
260 int ret = 0;
261 long events_now = 0;
263 WSANETWORKEVENTS evts = { 0 };
264 if (!(WSAEnumNetworkEvents (get_socket (), wsock_evt, &evts)))
266 if (evts.lNetworkEvents)
268 LOCK_EVENTS;
269 wsock_events->events |= evts.lNetworkEvents;
270 events_now = (wsock_events->events & event_mask);
271 if (evts.lNetworkEvents & FD_CONNECT)
273 wsock_events->connect_errorcode = evts.iErrorCode[FD_CONNECT_BIT];
275 /* Setting the connect_state and calling the AF_LOCAL handshake
276 here allows to handle this stuff from a single point. This
277 is independent of FD_CONNECT being requested. Consider a
278 server calling connect(2) and then immediately poll(2) with
279 only polling for POLLIN (example: postfix), or select(2) just
280 asking for descriptors ready to read.
282 Something weird occurs in Winsock: If you fork off and call
283 recv/send on the duplicated, already connected socket, another
284 FD_CONNECT event is generated in the child process. This
285 would trigger a call to af_local_connect which obviously fail.
286 Avoid this by calling set_connect_state only if connect_state
287 is connect_pending. */
288 if (connect_state () == connect_pending)
290 if (wsock_events->connect_errorcode)
291 connect_state (connect_failed);
292 else if (af_local_connect ())
294 wsock_events->connect_errorcode = WSAGetLastError ();
295 connect_state (connect_failed);
297 else
298 connect_state (connected);
301 UNLOCK_EVENTS;
302 if ((evts.lNetworkEvents & FD_OOB) && wsock_events->owner)
303 kill (wsock_events->owner, SIGURG);
307 LOCK_EVENTS;
308 if ((events = events_now) != 0
309 || (events = (wsock_events->events & event_mask)) != 0)
311 if (events & FD_CONNECT)
313 int wsa_err = wsock_events->connect_errorcode;
314 if (wsa_err)
316 /* CV 2014-04-23: This is really weird. If you call connect
317 asynchronously on a socket and then select, an error like
318 "Connection refused" is set in the event and in the SO_ERROR
319 socket option. If you call connect, then dup, then select,
320 the error is set in the event, but not in the SO_ERROR socket
321 option, despite the dup'ed socket handle referring to the same
322 socket. We're trying to workaround this problem here by
323 taking the connect errorcode from the event and write it back
324 into the SO_ERROR socket option.
326 CV 2014-06-16: Call WSASetLastError *after* setsockopt since,
327 apparently, setsockopt sets the last WSA error code to 0 on
328 success. */
329 ::setsockopt (get_socket (), SOL_SOCKET, SO_ERROR,
330 (const char *) &wsa_err, sizeof wsa_err);
331 WSASetLastError (wsa_err);
332 ret = SOCKET_ERROR;
334 /* Since FD_CONNECT is only given once, we have to keep FD_CONNECT
335 for connection failed sockets to have consistent behaviour in
336 programs calling poll/select multiple times. Example test to
337 non-listening port: curl -v 127.0.0.1:47 */
338 if (connect_state () != connect_failed)
339 wsock_events->events &= ~FD_CONNECT;
340 wsock_events->events |= FD_WRITE;
341 wsock_events->connect_errorcode = 0;
343 if (events & FD_CLOSE)
345 if (evts.iErrorCode[FD_CLOSE_BIT])
347 WSASetLastError (evts.iErrorCode[FD_CLOSE_BIT]);
348 ret = SOCKET_ERROR;
350 /* This test makes accept/connect behave as on Linux when accept/
351 connect is called on a socket for which shutdown has been called.
352 The second half of this code is in the shutdown method. Note that
353 we only do this when called from accept/connect, not from select.
354 In this case erase == false, just as with read (MSG_PEEK). */
355 if (erase)
357 if ((event_mask & FD_ACCEPT) && saw_shutdown_read ())
359 WSASetLastError (WSAEINVAL);
360 ret = SOCKET_ERROR;
362 if (event_mask & FD_CONNECT)
364 WSASetLastError (WSAECONNRESET);
365 ret = SOCKET_ERROR;
369 if (erase)
370 wsock_events->events &= ~(events & ~(FD_WRITE | FD_CLOSE));
372 UNLOCK_EVENTS;
374 return ret;
378 fhandler_socket_wsock::wait_for_events (const long event_mask,
379 const DWORD flags)
381 if (async_io ())
382 return 0;
384 int ret;
385 long events = 0;
386 DWORD wfmo_timeout = 50;
387 DWORD timeout;
389 WSAEVENT ev[3] = { wsock_evt, NULL, NULL };
390 wait_signal_arrived here (ev[1]);
391 DWORD ev_cnt = 2;
392 if ((ev[2] = pthread::get_cancel_event ()) != NULL)
393 ++ev_cnt;
395 if (is_nonblocking () || (flags & MSG_DONTWAIT))
396 timeout = 0;
397 else if (event_mask & FD_READ)
398 timeout = rcvtimeo ();
399 else if (event_mask & FD_WRITE)
400 timeout = sndtimeo ();
401 else
402 timeout = INFINITE;
404 while (!(ret = evaluate_events (event_mask, events, !(flags & MSG_PEEK)))
405 && !events)
407 if (timeout == 0)
409 WSASetLastError (WSAEWOULDBLOCK);
410 return SOCKET_ERROR;
413 if (timeout < wfmo_timeout)
414 wfmo_timeout = timeout;
415 switch (WSAWaitForMultipleEvents (ev_cnt, ev, FALSE, wfmo_timeout, FALSE))
417 case WSA_WAIT_TIMEOUT:
418 case WSA_WAIT_EVENT_0:
419 if (timeout != INFINITE)
420 timeout -= wfmo_timeout;
421 break;
423 case WSA_WAIT_EVENT_0 + 1:
424 if (_my_tls.call_signal_handler ())
425 break;
426 WSASetLastError (WSAEINTR);
427 return SOCKET_ERROR;
429 case WSA_WAIT_EVENT_0 + 2:
430 pthread::static_cancel_self ();
431 break;
433 default:
434 /* wsock_evt can be NULL. We're generating the same errno values
435 as for sockets on which shutdown has been called. */
436 if (WSAGetLastError () != WSA_INVALID_HANDLE)
437 WSASetLastError (WSAEFAULT);
438 else
439 WSASetLastError ((event_mask & FD_CONNECT) ? WSAECONNRESET
440 : WSAEINVAL);
441 return SOCKET_ERROR;
444 return ret;
447 void
448 fhandler_socket_wsock::release_events ()
450 if (WaitForSingleObject (wsock_mtx, INFINITE) != WAIT_FAILED)
452 HANDLE evt = wsock_evt;
453 HANDLE mtx = wsock_mtx;
455 wsock_evt = wsock_mtx = NULL;
456 ReleaseMutex (mtx);
457 NtClose (evt);
458 NtClose (mtx);
462 void
463 fhandler_socket_wsock::set_close_on_exec (bool val)
465 set_no_inheritance (wsock_mtx, val);
466 set_no_inheritance (wsock_evt, val);
467 if (need_fixup_before ())
469 close_on_exec (val);
470 debug_printf ("set close_on_exec for %s to %d", get_name (), val);
472 else
473 fhandler_base::set_close_on_exec (val);
476 /* Called if a freshly created socket is not inheritable. In that case we
477 have to use fixup_before_fork_exec. See comment in set_socket_handle for
478 a description of the problem. */
479 void
480 fhandler_socket_wsock::init_fixup_before ()
482 prot_info_ptr = (LPWSAPROTOCOL_INFOW)
483 cmalloc_abort (HEAP_BUF, sizeof (WSAPROTOCOL_INFOW));
484 cygheap->fdtab.inc_need_fixup_before ();
488 fhandler_socket_wsock::fixup_before_fork_exec (DWORD win_pid)
490 SOCKET ret = WSADuplicateSocketW (get_socket (), win_pid, prot_info_ptr);
491 if (ret)
492 set_winsock_errno ();
493 else
494 debug_printf ("WSADuplicateSocket succeeded (%x)", prot_info_ptr->dwProviderReserved);
495 return (int) ret;
498 void
499 fhandler_socket_wsock::fixup_after_fork (HANDLE parent)
501 fork_fixup (parent, wsock_mtx, "wsock_mtx");
502 fork_fixup (parent, wsock_evt, "wsock_evt");
504 if (!need_fixup_before ())
506 fhandler_base::fixup_after_fork (parent);
507 return;
510 SOCKET new_sock = WSASocketW (FROM_PROTOCOL_INFO, FROM_PROTOCOL_INFO,
511 FROM_PROTOCOL_INFO, prot_info_ptr, 0,
512 WSA_FLAG_OVERLAPPED);
513 if (new_sock == INVALID_SOCKET)
515 set_winsock_errno ();
516 set_handle ((HANDLE) INVALID_SOCKET);
518 else
520 /* Even though the original socket was not inheritable, the duplicated
521 socket is potentially inheritable again. */
522 SetHandleInformation ((HANDLE) new_sock, HANDLE_FLAG_INHERIT, 0);
523 set_handle ((HANDLE) new_sock);
524 debug_printf ("WSASocket succeeded (%p)", new_sock);
528 void
529 fhandler_socket_wsock::fixup_after_exec ()
531 if (need_fixup_before () && !close_on_exec ())
532 fixup_after_fork (NULL); /* No parent handle required. */
536 fhandler_socket_wsock::dup (fhandler_base *child, int flags)
538 debug_printf ("here");
539 fhandler_socket_wsock *fhs = (fhandler_socket_wsock *) child;
541 if (!DuplicateHandle (GetCurrentProcess (), wsock_mtx,
542 GetCurrentProcess (), &fhs->wsock_mtx,
543 0, TRUE, DUPLICATE_SAME_ACCESS))
545 __seterrno ();
546 return -1;
548 if (!DuplicateHandle (GetCurrentProcess (), wsock_evt,
549 GetCurrentProcess (), &fhs->wsock_evt,
550 0, TRUE, DUPLICATE_SAME_ACCESS))
552 __seterrno ();
553 NtClose (fhs->wsock_mtx);
554 return -1;
556 if (!need_fixup_before ())
558 int ret = fhandler_base::dup (child, flags);
559 if (ret)
561 NtClose (fhs->wsock_evt);
562 NtClose (fhs->wsock_mtx);
564 return ret;
567 cygheap->user.deimpersonate ();
568 fhs->init_fixup_before ();
569 fhs->set_handle (get_handle ());
570 int ret = fhs->fixup_before_fork_exec (GetCurrentProcessId ());
571 cygheap->user.reimpersonate ();
572 if (!ret)
574 fhs->fixup_after_fork (GetCurrentProcess ());
575 if (fhs->get_handle() != (HANDLE) INVALID_SOCKET)
576 return 0;
578 cygheap->fdtab.dec_need_fixup_before ();
579 NtClose (fhs->wsock_evt);
580 NtClose (fhs->wsock_mtx);
581 return -1;
585 fhandler_socket_wsock::set_socket_handle (SOCKET sock, int af, int type,
586 int flags)
588 DWORD hdl_flags;
589 bool lsp_fixup = false;
590 int file_flags = O_RDWR | O_BINARY;
592 /* Usually sockets are inheritable IFS objects. Unfortunately some virus
593 scanners or other network-oriented software replace normal sockets
594 with their own kind, which is running through a filter driver called
595 "layered service provider" (LSP) which, fortunately, are deprecated.
597 LSP sockets are not kernel objects. They are typically not marked as
598 inheritable, nor are they IFS handles. They are in fact not inheritable
599 to child processes, and it does not help to mark them inheritable via
600 SetHandleInformation. Subsequent socket calls in the child process fail
601 with error 10038, WSAENOTSOCK.
603 There's a neat way to workaround these annoying LSP sockets. WSAIoctl
604 allows to fetch the underlying base socket, which is a normal, inheritable
605 IFS handle. So we fetch the base socket, duplicate it, and close the
606 original socket. Now we have a standard IFS socket which (hopefully)
607 works as expected.
609 If that doesn't work for some reason, mark the sockets for duplication
610 via WSADuplicateSocket/WSASocket. This requires to start the child
611 process in SUSPENDED state so we only do this if really necessary. */
612 if (!GetHandleInformation ((HANDLE) sock, &hdl_flags)
613 || !(hdl_flags & HANDLE_FLAG_INHERIT))
615 int ret;
616 SOCKET base_sock;
617 DWORD bret;
619 lsp_fixup = true;
620 debug_printf ("LSP handle: %p", sock);
621 ret = WSAIoctl (sock, SIO_BASE_HANDLE, NULL, 0, (void *) &base_sock,
622 sizeof (base_sock), &bret, NULL, NULL);
623 if (ret)
624 debug_printf ("WSAIoctl: %u", WSAGetLastError ());
625 else if (base_sock != sock)
627 if (GetHandleInformation ((HANDLE) base_sock, &hdl_flags)
628 && (flags & HANDLE_FLAG_INHERIT))
630 if (!DuplicateHandle (GetCurrentProcess (), (HANDLE) base_sock,
631 GetCurrentProcess (), (PHANDLE) &base_sock,
632 0, TRUE, DUPLICATE_SAME_ACCESS))
633 debug_printf ("DuplicateHandle failed, %E");
634 else
636 ::closesocket (sock);
637 sock = base_sock;
638 lsp_fixup = false;
643 set_handle ((HANDLE) sock);
644 set_addr_family (af);
645 set_socket_type (type);
646 if (!init_events ())
647 return -1;
648 if (flags & SOCK_NONBLOCK)
649 file_flags |= O_NONBLOCK;
650 if (flags & SOCK_CLOEXEC)
652 set_close_on_exec (true);
653 file_flags |= O_CLOEXEC;
655 set_flags (file_flags);
656 if (lsp_fixup)
657 init_fixup_before ();
658 set_unique_id ();
659 if (get_socket_type () == SOCK_DGRAM)
661 /* Workaround the problem that a missing listener on a UDP socket
662 in a call to sendto will result in select/WSAEnumNetworkEvents
663 reporting that the socket has pending data and a subsequent call
664 to recvfrom will return -1 with error set to WSAECONNRESET.
666 This problem is a regression introduced in Windows 2000.
667 Instead of fixing the problem, a new socket IOCTL code has
668 been added, see http://support.microsoft.com/kb/263823 */
669 BOOL cr = FALSE;
670 DWORD blen;
671 if (WSAIoctl (sock, SIO_UDP_CONNRESET, &cr, sizeof cr, NULL, 0,
672 &blen, NULL, NULL) == SOCKET_ERROR)
673 debug_printf ("Reset SIO_UDP_CONNRESET: WinSock error %u",
674 WSAGetLastError ());
676 rmem () = 212992;
677 wmem () = 212992;
678 return 0;
681 fhandler_socket_inet::fhandler_socket_inet () :
682 fhandler_socket_wsock (),
683 oobinline (false),
684 tcp_quickack (false),
685 tcp_fastopen (false),
686 tcp_keepidle (7200), /* WinSock default */
687 tcp_keepcnt (10), /* WinSock default */
688 tcp_keepintvl (1) /* WinSock default */
692 fhandler_socket_inet::~fhandler_socket_inet ()
697 fhandler_socket_inet::socket (int af, int type, int protocol, int flags)
699 SOCKET sock;
700 int ret;
702 /* This test should be covered by ::socket, but make sure we don't
703 accidentally try anything else. */
704 if (type != SOCK_STREAM && type != SOCK_DGRAM && type != SOCK_RAW)
706 set_errno (EINVAL);
707 return -1;
709 sock = ::socket (af, type, protocol);
710 if (sock == INVALID_SOCKET)
712 set_winsock_errno ();
713 return -1;
715 ret = set_socket_handle (sock, af, type, flags);
716 if (ret < 0)
717 ::closesocket (sock);
718 return ret;
722 fhandler_socket_inet::socketpair (int af, int type, int protocol, int flags,
723 fhandler_socket *fh_out)
725 set_errno (EAFNOSUPPORT);
726 return -1;
730 fhandler_socket_inet::bind (const struct sockaddr *name, int namelen)
732 int res = -1;
734 if (!saw_reuseaddr ())
736 /* If the application didn't explicitely request SO_REUSEADDR,
737 enforce POSIX standard socket binding behaviour by setting the
738 SO_EXCLUSIVEADDRUSE socket option. See cygwin_setsockopt()
739 for a more detailed description. */
740 int on = 1;
741 int ret = ::setsockopt (get_socket (), SOL_SOCKET,
742 SO_EXCLUSIVEADDRUSE,
743 (const char *) &on, sizeof on);
744 debug_printf ("%d = setsockopt(SO_EXCLUSIVEADDRUSE), %E", ret);
746 if (::bind (get_socket (), name, namelen))
747 set_winsock_errno ();
748 else
749 res = 0;
751 return res;
755 fhandler_socket_inet::connect (const struct sockaddr *name, int namelen)
757 struct sockaddr_storage sst;
758 bool reset = (name->sa_family == AF_UNSPEC
759 && get_socket_type () == SOCK_DGRAM);
761 if (reset)
763 if (connect_state () == unconnected)
764 return 0;
765 /* To reset a connected DGRAM socket, call Winsock's connect
766 function with the address member of the sockaddr structure
767 filled with zeroes. */
768 memset (&sst, 0, sizeof sst);
769 sst.ss_family = get_addr_family ();
771 else if (get_inet_addr_inet (name, namelen, &sst, &namelen) == SOCKET_ERROR)
772 return SOCKET_ERROR;
774 /* Initialize connect state to "connect_pending". In the SOCK_STREAM
775 case, the state is ultimately set to "connected" or "connect_failed" in
776 wait_for_events when the FD_CONNECT event occurs. Note that the
777 underlying OS sockets are always non-blocking in this case and a
778 successfully initiated non-blocking Winsock connect always returns
779 WSAEWOULDBLOCK. Thus it's safe to rely on event handling. For DGRAM
780 sockets, however, connect can return immediately.
782 Check for either unconnected or connect_failed since in both cases it's
783 allowed to retry connecting the socket. It's also ok (albeit ugly) to
784 call connect to check if a previous non-blocking connect finished.
786 Set connect_state before calling connect, otherwise a race condition with
787 an already running select or poll might occur. */
788 if (connect_state () == unconnected || connect_state () == connect_failed)
789 connect_state (connect_pending);
791 int res = ::connect (get_socket (), (struct sockaddr *) &sst, namelen);
792 if (!res)
794 if (reset)
795 connect_state (unconnected);
796 else
797 connect_state (connected);
799 else if (!is_nonblocking ()
800 && res == SOCKET_ERROR
801 && WSAGetLastError () == WSAEWOULDBLOCK)
802 res = wait_for_events (FD_CONNECT | FD_CLOSE, 0);
804 if (res)
806 DWORD err = WSAGetLastError ();
808 /* Some applications use the ugly technique to check if a non-blocking
809 connect succeeded by calling connect again, until it returns EISCONN.
810 This circumvents the event handling and connect_state is never set.
811 Thus we check for this situation here. */
812 if (err == WSAEISCONN)
813 connect_state (connected);
814 /* Winsock returns WSAEWOULDBLOCK if the non-blocking socket cannot be
815 conected immediately. Convert to POSIX/Linux compliant EINPROGRESS. */
816 else if (is_nonblocking () && err == WSAEWOULDBLOCK)
817 WSASetLastError (WSAEINPROGRESS);
818 /* Winsock returns WSAEINVAL if the socket is already a listener.
819 Convert to POSIX/Linux compliant EISCONN. */
820 else if (err == WSAEINVAL && connect_state () == listener)
821 WSASetLastError (WSAEISCONN);
822 /* Any other error except WSAEALREADY means the connect failed. */
823 else if (connect_state () == connect_pending && err != WSAEALREADY)
824 connect_state (connect_failed);
825 set_winsock_errno ();
828 return res;
832 fhandler_socket_inet::listen (int backlog)
834 int res = ::listen (get_socket (), backlog);
835 if (res && WSAGetLastError () == WSAEINVAL)
837 /* It's perfectly valid to call listen on an unbound INET socket.
838 In this case the socket is automatically bound to an unused
839 port number, listening on all interfaces. On WinSock, listen
840 fails with WSAEINVAL when it's called on an unbound socket.
841 So we have to bind manually here to have POSIX semantics. */
842 if (get_addr_family () == AF_INET)
844 struct sockaddr_in sin;
845 sin.sin_family = AF_INET;
846 sin.sin_port = 0;
847 sin.sin_addr.s_addr = INADDR_ANY;
848 if (!::bind (get_socket (), (struct sockaddr *) &sin, sizeof sin))
849 res = ::listen (get_socket (), backlog);
851 else if (get_addr_family () == AF_INET6)
853 struct sockaddr_in6 sin6;
854 memset (&sin6, 0, sizeof sin6);
855 sin6.sin6_family = AF_INET6;
856 if (!::bind (get_socket (), (struct sockaddr *) &sin6, sizeof sin6))
857 res = ::listen (get_socket (), backlog);
860 if (!res)
861 connect_state (listener); /* gets set to connected on accepted socket. */
862 else
863 set_winsock_errno ();
864 return res;
868 fhandler_socket_inet::accept4 (struct sockaddr *peer, int *len, int flags)
870 int ret = -1;
871 /* Allows NULL peer and len parameters. */
872 struct sockaddr_storage lpeer;
873 int llen = sizeof (struct sockaddr_storage);
875 /* Windows event handling does not check for the validity of the desired
876 flags so we have to do it here. */
877 if (connect_state () != listener)
879 WSASetLastError (WSAEINVAL);
880 set_winsock_errno ();
881 return -1;
884 SOCKET res = INVALID_SOCKET;
885 while (!(res = wait_for_events (FD_ACCEPT | FD_CLOSE, 0))
886 && (res = ::accept (get_socket (), (struct sockaddr *) &lpeer, &llen))
887 == INVALID_SOCKET
888 && WSAGetLastError () == WSAEWOULDBLOCK)
890 if (res == INVALID_SOCKET)
891 set_winsock_errno ();
892 else
894 cygheap_fdnew fd;
896 if (fd >= 0)
898 fhandler_socket_inet *sock = (fhandler_socket_inet *)
899 build_fh_dev (dev ());
900 if (sock && sock->set_socket_handle (res, get_addr_family (),
901 get_socket_type (),
902 get_socket_flags ()) == 0)
904 sock->async_io (false); /* set_socket_handle disables async. */
905 /* No locking necessary at this point. */
906 sock->wsock_events->events = wsock_events->events | FD_WRITE;
907 sock->wsock_events->owner = wsock_events->owner;
908 sock->connect_state (connected);
909 fd = sock;
910 if (fd <= 2)
911 set_std_handle (fd);
912 ret = fd;
913 if (peer)
915 memcpy (peer, &lpeer, MIN (*len, llen));
916 *len = llen;
919 else
920 delete sock;
922 if (ret == -1)
923 ::closesocket (res);
925 return ret;
929 fhandler_socket_inet::getsockname (struct sockaddr *name, int *namelen)
931 int res = -1;
933 /* WinSock just returns WSAEFAULT if the buffer is too small. Use a
934 big enough local buffer and truncate later as necessary, per POSIX. */
935 struct sockaddr_storage sock;
936 int len = sizeof sock;
937 res = ::getsockname (get_socket (), (struct sockaddr *) &sock, &len);
938 if (!res)
940 memcpy (name, &sock, MIN (*namelen, len));
941 *namelen = len;
943 else
945 if (WSAGetLastError () == WSAEINVAL)
947 /* WinSock returns WSAEINVAL if the socket is locally
948 unbound. Per SUSv3 this is not an error condition.
949 We're faking a valid return value here by creating the
950 same content in the sockaddr structure as on Linux. */
951 memset (&sock, 0, sizeof sock);
952 sock.ss_family = get_addr_family ();
953 switch (get_addr_family ())
955 case AF_INET:
956 res = 0;
957 len = (int) sizeof (struct sockaddr_in);
958 break;
959 case AF_INET6:
960 res = 0;
961 len = (int) sizeof (struct sockaddr_in6);
962 break;
963 default:
964 WSASetLastError (WSAEOPNOTSUPP);
965 break;
967 if (!res)
969 memcpy (name, &sock, MIN (*namelen, len));
970 *namelen = len;
973 if (res)
974 set_winsock_errno ();
976 return res;
980 fhandler_socket_inet::getpeername (struct sockaddr *name, int *namelen)
982 /* Always use a local big enough buffer and truncate later as necessary
983 per POSIX. WinSock unfortunately only returns WSAEFAULT if the buffer
984 is too small. */
985 struct sockaddr_storage sock;
986 int len = sizeof sock;
987 int res = ::getpeername (get_socket (), (struct sockaddr *) &sock, &len);
988 if (res)
989 set_winsock_errno ();
990 else
992 memcpy (name, &sock, MIN (*namelen, len));
993 *namelen = len;
995 return res;
999 fhandler_socket_wsock::shutdown (int how)
1001 int res = ::shutdown (get_socket (), how);
1003 /* Linux allows to call shutdown for any socket, even if it's not connected.
1004 This also disables to call accept on this socket, if shutdown has been
1005 called with the SHUT_RD or SHUT_RDWR parameter. In contrast, WinSock
1006 only allows to call shutdown on a connected socket. The accept function
1007 is in no way affected. So, what we do here is to fake success, and to
1008 change the event settings so that an FD_CLOSE event is triggered for the
1009 calling Cygwin function. The evaluate_events method handles the call
1010 from accept specially to generate a Linux-compatible behaviour. */
1011 if (res && WSAGetLastError () != WSAENOTCONN)
1012 set_winsock_errno ();
1013 else
1015 res = 0;
1016 switch (how)
1018 case SHUT_RD:
1019 saw_shutdown_read (true);
1020 wsock_events->events |= FD_CLOSE;
1021 SetEvent (wsock_evt);
1022 break;
1023 case SHUT_WR:
1024 saw_shutdown_write (true);
1025 break;
1026 case SHUT_RDWR:
1027 saw_shutdown_read (true);
1028 saw_shutdown_write (true);
1029 wsock_events->events |= FD_CLOSE;
1030 SetEvent (wsock_evt);
1031 break;
1034 return res;
1038 fhandler_socket_wsock::close ()
1040 int res = 0;
1042 release_events ();
1043 while ((res = ::closesocket (get_socket ())) != 0)
1045 if (WSAGetLastError () != WSAEWOULDBLOCK)
1047 set_winsock_errno ();
1048 res = -1;
1049 break;
1051 if (cygwait (10) == WAIT_SIGNALED)
1053 set_errno (EINTR);
1054 res = -1;
1055 break;
1057 WSASetLastError (0);
1059 return res;
1062 ssize_t
1063 fhandler_socket_inet::recv_internal (LPWSAMSG wsamsg, bool use_recvmsg)
1065 ssize_t res = 0;
1066 DWORD ret = 0, wret;
1067 int evt_mask = (wsamsg->dwFlags & MSG_OOB) ? FD_OOB : FD_READ;
1068 LPWSABUF &wsabuf = wsamsg->lpBuffers;
1069 ULONG &wsacnt = wsamsg->dwBufferCount;
1070 static NO_COPY LPFN_WSARECVMSG WSARecvMsg;
1071 bool read_oob = false;
1073 /* CV 2014-10-26: Do not check for the connect_state at this point. In
1074 certain scenarios there's no way to check the connect state reliably.
1075 Example (hexchat): Parent process creates socket, forks, child process
1076 calls connect, parent process calls read. Even if the event handling
1077 allows to check for FD_CONNECT in the parent, there is always yet another
1078 scenario we can easily break. */
1080 DWORD wait_flags = wsamsg->dwFlags;
1081 bool waitall = !!(wait_flags & MSG_WAITALL);
1082 wsamsg->dwFlags &= (MSG_OOB | MSG_PEEK | MSG_DONTROUTE);
1083 if (use_recvmsg)
1085 if (!WSARecvMsg
1086 && get_ext_funcptr (get_socket (), &WSARecvMsg) == SOCKET_ERROR)
1088 if (wsamsg->Control.len > 0)
1090 set_winsock_errno ();
1091 return SOCKET_ERROR;
1093 use_recvmsg = false;
1095 else /* Only MSG_PEEK is supported by WSARecvMsg. */
1096 wsamsg->dwFlags &= MSG_PEEK;
1098 if (waitall)
1100 if (get_socket_type () != SOCK_STREAM)
1102 WSASetLastError (WSAEOPNOTSUPP);
1103 set_winsock_errno ();
1104 return SOCKET_ERROR;
1106 if (is_nonblocking () || (wsamsg->dwFlags & (MSG_OOB | MSG_PEEK)))
1107 waitall = false;
1110 /* recv() returns EINVAL if MSG_OOB flag is set in inline mode. */
1111 if (oobinline && (wsamsg->dwFlags & MSG_OOB))
1113 set_errno (EINVAL);
1114 return SOCKET_ERROR;
1117 /* Check whether OOB data is ready or not */
1118 if (get_socket_type () == SOCK_STREAM)
1119 if ((wsamsg->dwFlags & MSG_OOB) || oobinline)
1121 u_long atmark = 0;
1122 /* SIOCATMARK = _IOR('s',7,u_long) */
1123 int err = ::ioctlsocket (get_socket (), _IOR('s',7,u_long), &atmark);
1124 if (err)
1126 set_winsock_errno ();
1127 return SOCKET_ERROR;
1129 /* If there is no OOB data, recv() with MSG_OOB returns EINVAL.
1130 Note: The return value of SIOCATMARK in non-inline mode of
1131 winsock is FALSE if OOB data exists, TRUE otherwise. */
1132 if (atmark && (wsamsg->dwFlags & MSG_OOB))
1134 /* No OOB data */
1135 set_errno (EINVAL);
1136 return SOCKET_ERROR;
1138 /* Inline mode for out-of-band (OOB) data of winsock is
1139 completely broken. That is, SIOCATMARK always returns
1140 TRUE in inline mode. Due to this problem, application
1141 cannot determine OOB data at all. Therefore the behavior
1142 of a socket with SO_OOBINLINE set is simulated using
1143 a socket with SO_OOBINLINE not set. In this fake inline
1144 mode, the order of the OOB and non-OOB data is not
1145 preserved. OOB data is read before non-OOB data sent
1146 prior to the OOB data. However, this most likely is
1147 not a problem in most cases. */
1148 /* If there is OOB data, read OOB data using MSG_OOB in
1149 fake inline mode. */
1150 if (!atmark && oobinline)
1152 read_oob = true;
1153 evt_mask = FD_OOB;
1157 /* Note: Don't call WSARecvFrom(MSG_PEEK) without actually having data
1158 waiting in the buffers, otherwise the event handling gets messed up
1159 for some reason. */
1160 while (!(res = wait_for_events (evt_mask | FD_CLOSE, wait_flags))
1161 || saw_shutdown_read ())
1163 DWORD dwFlags = wsamsg->dwFlags | (read_oob ? MSG_OOB : 0);
1164 if (use_recvmsg)
1165 res = WSARecvMsg (get_socket (), wsamsg, &wret, NULL, NULL);
1166 /* This is working around a really weird problem in WinSock.
1168 Assume you create a socket, fork the process (thus duplicating
1169 the socket), connect the socket in the child, then call recv
1170 on the original socket handle in the parent process.
1171 In this scenario, calls to WinSock's recvfrom and WSARecvFrom
1172 in the parent will fail with WSAEINVAL, regardless whether both
1173 address parameters, name and namelen, are NULL or point to valid
1174 storage. However, calls to recv and WSARecv succeed as expected.
1175 Per MSDN, WSAEINVAL in the context of recv means "The socket has not
1176 been bound". It is as if the recvfrom functions test if the socket
1177 is bound locally, but in the parent process, WinSock doesn't know
1178 about that and fails, while the same test is omitted in the recv
1179 functions.
1181 This also covers another weird case: WinSock returns WSAEFAULT if
1182 namelen is a valid pointer while name is NULL. Both parameters are
1183 ignored for TCP sockets, so this only occurs when using UDP socket. */
1184 else if (!wsamsg->name || get_socket_type () == SOCK_STREAM)
1185 res = WSARecv (get_socket (), wsabuf, wsacnt, &wret, &dwFlags,
1186 NULL, NULL);
1187 else
1188 res = WSARecvFrom (get_socket (), wsabuf, wsacnt, &wret,
1189 &dwFlags, wsamsg->name, &wsamsg->namelen,
1190 NULL, NULL);
1191 if (!res)
1193 ret += wret;
1194 if (!waitall)
1195 break;
1196 while (wret && wsacnt)
1198 if (wsabuf->len > wret)
1200 wsabuf->len -= wret;
1201 wsabuf->buf += wret;
1202 wret = 0;
1204 else
1206 wret -= wsabuf->len;
1207 ++wsabuf;
1208 --wsacnt;
1211 if (!wsacnt)
1212 break;
1214 else if (WSAGetLastError () != WSAEWOULDBLOCK)
1215 break;
1218 if (res)
1220 /* According to SUSv3, errno isn't set in that case and no error
1221 condition is returned. */
1222 if (WSAGetLastError () == WSAEMSGSIZE)
1223 ret += wret;
1224 else if (!ret)
1226 /* ESHUTDOWN isn't defined for recv in SUSv3. Simply EOF is returned
1227 in this case. */
1228 if (WSAGetLastError () == WSAESHUTDOWN)
1229 ret = 0;
1230 else
1232 set_winsock_errno ();
1233 return SOCKET_ERROR;
1238 return ret;
1241 ssize_t
1242 fhandler_socket_wsock::recvfrom (void *in_ptr, size_t len, int flags,
1243 struct sockaddr *from, int *fromlen)
1245 char *ptr = (char *) in_ptr;
1247 /* size_t is 64 bit, but the len member in WSABUF is 32 bit.
1248 Split buffer if necessary. */
1249 DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0);
1250 WSABUF wsabuf[bufcnt];
1251 WSAMSG wsamsg = { from, from && fromlen ? *fromlen : 0,
1252 wsabuf, bufcnt,
1253 { 0, NULL },
1254 (DWORD) flags };
1255 /* Don't use len as loop condition, it could be 0. */
1256 for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr)
1258 wsaptr->len = MIN (len, UINT32_MAX);
1259 wsaptr->buf = ptr;
1260 len -= wsaptr->len;
1261 ptr += wsaptr->len;
1263 ssize_t ret = recv_internal (&wsamsg, false);
1264 if (fromlen)
1265 *fromlen = wsamsg.namelen;
1266 return ret;
1269 ssize_t
1270 fhandler_socket_wsock::recvmsg (struct msghdr *msg, int flags)
1272 /* Disappointing but true: Even if WSARecvMsg is supported, it's only
1273 supported for datagram and raw sockets. */
1274 bool use_recvmsg = true;
1275 if (get_socket_type () == SOCK_STREAM || get_addr_family () == AF_LOCAL)
1277 use_recvmsg = false;
1278 msg->msg_controllen = 0;
1281 WSABUF wsabuf[msg->msg_iovlen];
1282 WSABUF *wsaptr = wsabuf + msg->msg_iovlen;
1283 const struct iovec *iovptr = msg->msg_iov + msg->msg_iovlen;
1284 while (--wsaptr >= wsabuf)
1286 wsaptr->len = (--iovptr)->iov_len;
1287 wsaptr->buf = (char *) iovptr->iov_base;
1289 WSAMSG wsamsg = { (struct sockaddr *) msg->msg_name, msg->msg_namelen,
1290 wsabuf, (DWORD) msg->msg_iovlen,
1291 { (DWORD) msg->msg_controllen, (char *) msg->msg_control },
1292 (DWORD) flags };
1293 ssize_t ret = recv_internal (&wsamsg, use_recvmsg);
1294 if (ret >= 0)
1296 msg->msg_namelen = wsamsg.namelen;
1297 msg->msg_controllen = wsamsg.Control.len;
1298 msg->msg_flags = wsamsg.dwFlags;
1299 /* if a UDP_GRO packet is present, convert gso_size from Windows DWORD
1300 to Linux-compatible uint16_t. We don't have to change the
1301 msg_control block layout for that, assuming applications do as they
1302 have been told and only use CMSG_FIRSTHDR/CMSG_NXTHDR/CMSG_DATA to
1303 access control messages. The cmsghdr alignment saves our ass here! */
1304 if (msg->msg_controllen && get_socket_type () == SOCK_DGRAM
1305 && (get_addr_family () == AF_INET || get_addr_family () == AF_INET6))
1307 struct cmsghdr *cmsg;
1309 for (cmsg = CMSG_FIRSTHDR (msg);
1310 cmsg;
1311 cmsg = CMSG_NXTHDR (msg, cmsg))
1313 if (cmsg->cmsg_level == SOL_UDP
1314 && cmsg->cmsg_type == UDP_GRO)
1316 PDWORD gso_size_win = (PDWORD) CMSG_DATA(cmsg);
1317 uint16_t *gso_size_cyg = (uint16_t *) CMSG_DATA(cmsg);
1318 uint16_t gso_size = (uint16_t) *gso_size_win;
1319 *gso_size_cyg = gso_size;
1320 break;
1325 return ret;
1328 void
1329 fhandler_socket_wsock::read (void *in_ptr, size_t& len)
1331 char *ptr = (char *) in_ptr;
1333 /* size_t is 64 bit, but the len member in WSABUF is 32 bit.
1334 Split buffer if necessary. */
1335 DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0);
1336 WSABUF wsabuf[bufcnt];
1337 WSAMSG wsamsg = { NULL, 0, wsabuf, bufcnt, { 0, NULL }, 0 };
1338 /* Don't use len as loop condition, it could be 0. */
1339 for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr)
1341 wsaptr->len = MIN (len, UINT32_MAX);
1342 wsaptr->buf = ptr;
1343 len -= wsaptr->len;
1344 ptr += wsaptr->len;
1346 len = recv_internal (&wsamsg, false);
1349 ssize_t
1350 fhandler_socket_wsock::readv (const struct iovec *const iov, const int iovcnt,
1351 ssize_t tot)
1353 WSABUF wsabuf[iovcnt];
1354 WSABUF *wsaptr = wsabuf + iovcnt;
1355 const struct iovec *iovptr = iov + iovcnt;
1356 while (--wsaptr >= wsabuf)
1358 wsaptr->len = (--iovptr)->iov_len;
1359 wsaptr->buf = (char *) iovptr->iov_base;
1361 WSAMSG wsamsg = { NULL, 0, wsabuf, (DWORD) iovcnt, { 0, NULL}, 0 };
1362 return recv_internal (&wsamsg, false);
1365 ssize_t
1366 fhandler_socket_wsock::send_internal (struct _WSAMSG *wsamsg, int flags)
1368 ssize_t res = 0;
1369 DWORD ret = 0, sum = 0;
1370 WSABUF out_buf[wsamsg->dwBufferCount];
1371 bool use_sendmsg = false;
1372 DWORD wait_flags = flags & MSG_DONTWAIT;
1373 bool nosignal = !!(flags & MSG_NOSIGNAL);
1375 /* MSG_EOR not supported by any protocol */
1376 if (flags & MSG_EOR)
1378 set_errno (EOPNOTSUPP);
1379 return SOCKET_ERROR;
1382 flags &= (MSG_OOB | MSG_DONTROUTE);
1383 if (wsamsg->Control.len > 0)
1384 use_sendmsg = true;
1385 /* Workaround for MSDN KB 823764: Split a message into chunks <= SO_SNDBUF.
1386 in_idx is the index of the current lpBuffers from the input wsamsg buffer.
1387 in_off is used to keep track of the next byte to write from a wsamsg
1388 buffer which only gets partially written. */
1389 for (DWORD in_idx = 0, in_off = 0;
1390 in_idx < wsamsg->dwBufferCount;
1391 in_off >= wsamsg->lpBuffers[in_idx].len && (++in_idx, (in_off = 0)))
1393 /* Split a message into the least number of pieces to minimize the
1394 number of WsaSendTo calls. Don't split datagram messages (bad idea).
1395 out_idx is the index of the next buffer in the out_buf WSABUF,
1396 also the number of buffers given to WSASendTo.
1397 out_len is the number of bytes in the buffers given to WSASendTo.
1398 Don't split datagram messages (very bad idea). */
1399 DWORD out_idx = 0;
1400 DWORD out_len = 0;
1401 if (get_socket_type () == SOCK_STREAM)
1405 out_buf[out_idx].buf = wsamsg->lpBuffers[in_idx].buf + in_off;
1406 out_buf[out_idx].len = wsamsg->lpBuffers[in_idx].len - in_off;
1407 out_len += out_buf[out_idx].len;
1408 out_idx++;
1410 while (out_len < (unsigned) wmem ()
1411 && (in_off = 0, ++in_idx < wsamsg->dwBufferCount));
1412 /* Tweak len of the last out_buf buffer so the entire number of bytes
1413 is (less than or) equal to wmem (). Fix out_len as well since it's
1414 used in a subsequent test expression. */
1415 if (out_len > (unsigned) wmem ())
1417 out_buf[out_idx - 1].len -= out_len - (unsigned) wmem ();
1418 out_len = (unsigned) wmem ();
1420 /* Add the bytes written from the current last buffer to in_off,
1421 so in_off points to the next byte to be written from that buffer,
1422 or beyond which lets the outper loop skip to the next buffer. */
1423 in_off += out_buf[out_idx - 1].len;
1428 if (use_sendmsg)
1429 res = WSASendMsg (get_socket (), wsamsg, flags, &ret, NULL, NULL);
1430 else if (get_socket_type () == SOCK_STREAM)
1431 res = WSASendTo (get_socket (), out_buf, out_idx, &ret, flags,
1432 wsamsg->name, wsamsg->namelen, NULL, NULL);
1433 else
1434 res = WSASendTo (get_socket (), wsamsg->lpBuffers,
1435 wsamsg->dwBufferCount, &ret, flags,
1436 wsamsg->name, wsamsg->namelen, NULL, NULL);
1437 if (res && (WSAGetLastError () == WSAEWOULDBLOCK))
1439 LOCK_EVENTS;
1440 wsock_events->events &= ~FD_WRITE;
1441 UNLOCK_EVENTS;
1444 while (res && (WSAGetLastError () == WSAEWOULDBLOCK)
1445 && !(res = wait_for_events (FD_WRITE | FD_CLOSE, wait_flags)));
1447 if (!res)
1449 sum += ret;
1450 /* For streams, return to application if the number of bytes written
1451 is less than the number of bytes we intended to write in a single
1452 call to WSASendTo. Otherwise we would have to add code to
1453 backtrack in the input buffers, which is questionable. There was
1454 probably a good reason we couldn't write more. */
1455 if (get_socket_type () != SOCK_STREAM || ret < out_len)
1456 break;
1458 else if (is_nonblocking () || WSAGetLastError() != WSAEWOULDBLOCK)
1459 break;
1462 if (sum)
1463 res = sum;
1464 else if (res == SOCKET_ERROR)
1466 set_winsock_errno ();
1468 /* Special handling for EPIPE and SIGPIPE.
1470 EPIPE is generated if the local end has been shut down on a connection
1471 oriented socket. In this case the process will also receive a SIGPIPE
1472 unless MSG_NOSIGNAL is set. */
1473 if ((get_errno () == ECONNABORTED || get_errno () == ESHUTDOWN)
1474 && get_socket_type () == SOCK_STREAM)
1476 set_errno (EPIPE);
1477 if (!nosignal)
1478 raise (SIGPIPE);
1482 return res;
1485 ssize_t
1486 fhandler_socket_inet::sendto (const void *in_ptr, size_t len, int flags,
1487 const struct sockaddr *to, int tolen)
1489 char *ptr = (char *) in_ptr;
1490 struct sockaddr_storage sst;
1492 if (to && get_inet_addr_inet (to, tolen, &sst, &tolen) == SOCKET_ERROR)
1493 return SOCKET_ERROR;
1495 /* size_t is 64 bit, but the len member in WSABUF is 32 bit.
1496 Split buffer if necessary. */
1497 DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0);
1498 WSABUF wsabuf[bufcnt];
1499 WSAMSG wsamsg = { to ? (struct sockaddr *) &sst : NULL, tolen,
1500 wsabuf, bufcnt,
1501 { 0, NULL },
1502 0 };
1503 /* Don't use len as loop condition, it could be 0. */
1504 for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr)
1506 wsaptr->len = MIN (len, UINT32_MAX);
1507 wsaptr->buf = ptr;
1508 len -= wsaptr->len;
1509 ptr += wsaptr->len;
1511 return send_internal (&wsamsg, flags);
1514 ssize_t
1515 fhandler_socket_inet::sendmsg (const struct msghdr *in_msg, int flags)
1517 struct sockaddr_storage sst;
1518 int len = 0;
1519 DWORD old_gso_size = MAXDWORD;
1520 ssize_t ret;
1522 /* Copy incoming msghdr into a local copy. We only access this from
1523 here on. Thus, make sure not to manipulate user space data. */
1524 struct msghdr local_msg = *in_msg;
1525 struct msghdr *msg = &local_msg;
1527 if (msg->msg_name
1528 && get_inet_addr_inet ((struct sockaddr *) msg->msg_name,
1529 msg->msg_namelen, &sst, &len) == SOCKET_ERROR)
1530 return SOCKET_ERROR;
1532 /* Check for our optmem_max value */
1533 if (msg->msg_controllen > NT_MAX_PATH)
1535 set_errno (ENOBUFS);
1536 return SOCKET_ERROR;
1539 /* WSASendMsg is supported only for datagram and raw sockets. */
1540 if (get_socket_type () != SOCK_DGRAM && get_socket_type () != SOCK_RAW)
1541 msg->msg_controllen = 0;
1543 /* If we actually have control data, copy it to local storage. Control
1544 messages only handled by us have to be dropped from the msg_control
1545 block, and we don't want to change user space data. */
1546 tmp_pathbuf tp;
1547 if (msg->msg_controllen)
1549 void *local_cmsg = tp.c_get ();
1550 memcpy (local_cmsg, msg->msg_control, msg->msg_controllen);
1551 msg->msg_control = local_cmsg;
1554 /* Check for control message we handle inside Cygwin. Right now this
1555 only affects UDP sockets, so check here early. */
1556 if (msg->msg_controllen && get_socket_type () == SOCK_DGRAM)
1558 struct cmsghdr *cmsg;
1559 bool dropped = false;
1561 for (cmsg = CMSG_FIRSTHDR (msg);
1562 cmsg;
1563 cmsg = dropped ? cmsg : CMSG_NXTHDR (msg, cmsg))
1565 dropped = false;
1566 /* cmsg within bounds? */
1567 if (cmsg->cmsg_len < sizeof (struct cmsghdr)
1568 || cmsg->cmsg_len > (size_t) msg->msg_controllen
1569 - ((uintptr_t) cmsg
1570 - (uintptr_t) msg->msg_control))
1572 set_errno (EINVAL);
1573 return SOCKET_ERROR;
1575 /* UDP_SEGMENT? Override gso_size for this single sendmsg. */
1576 if (cmsg->cmsg_level == SOL_UDP && cmsg->cmsg_type == UDP_SEGMENT)
1578 /* 16 bit unsigned, as on Linux */
1579 DWORD gso_size = *(uint16_t *) CMSG_DATA(cmsg);
1580 int size = sizeof old_gso_size;
1581 /* Save the old gso_size and set the requested one. */
1582 if (::getsockopt (get_socket (), IPPROTO_UDP, UDP_SEGMENT,
1583 (char *) &old_gso_size, &size) == SOCKET_ERROR
1584 || ::setsockopt (get_socket (), IPPROTO_UDP, UDP_SEGMENT,
1585 (char *) &gso_size, sizeof gso_size)
1586 == SOCKET_ERROR)
1588 set_winsock_errno ();
1589 return SOCKET_ERROR;
1591 /* Drop message from msgbuf, Windows doesn't know it. */
1592 size_t cmsg_size = CMSG_ALIGN (cmsg->cmsg_len);
1593 struct cmsghdr *cmsg_next = CMSG_NXTHDR (msg, cmsg);
1594 if (cmsg_next)
1595 memmove (cmsg, cmsg_next, (char *) msg->msg_control
1596 + msg->msg_controllen
1597 - (char *) cmsg_next);
1598 msg->msg_controllen -= cmsg_size;
1599 dropped = true;
1600 /* Avoid infinite loop */
1601 if (msg->msg_controllen <= 0)
1603 cmsg = NULL;
1604 msg->msg_controllen = 0;
1610 /* Copy over msg_iov into an equivalent WSABUF array. */
1611 WSABUF wsabuf[msg->msg_iovlen];
1612 WSABUF *wsaptr = wsabuf;
1613 const struct iovec *iovptr = msg->msg_iov;
1614 for (int i = 0; i < msg->msg_iovlen; ++i)
1616 wsaptr->len = iovptr->iov_len;
1617 (wsaptr++)->buf = (char *) (iovptr++)->iov_base;
1620 /* Eventually copy over to a WSAMSG and call send_internal with that. */
1621 WSAMSG wsamsg = { msg->msg_name ? (struct sockaddr *) &sst : NULL, len,
1622 wsabuf, (DWORD) msg->msg_iovlen,
1623 { (DWORD) msg->msg_controllen,
1624 msg->msg_controllen ? (char *) msg->msg_control : NULL },
1625 0 };
1626 ret = send_internal (&wsamsg, flags);
1627 if (old_gso_size != MAXDWORD)
1628 ::setsockopt (get_socket (), IPPROTO_UDP, UDP_SEGMENT,
1629 (char *) &old_gso_size, sizeof old_gso_size);
1630 return ret;
1633 ssize_t
1634 fhandler_socket_wsock::write (const void *in_ptr, size_t len)
1636 char *ptr = (char *) in_ptr;
1638 /* size_t is 64 bit, but the len member in WSABUF is 32 bit.
1639 Split buffer if necessary. */
1640 DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0);
1641 WSABUF wsabuf[bufcnt];
1642 WSAMSG wsamsg = { NULL, 0, wsabuf, bufcnt, { 0, NULL }, 0 };
1643 /* Don't use len as loop condition, it could be 0. */
1644 for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr)
1646 wsaptr->len = MIN (len, UINT32_MAX);
1647 wsaptr->buf = ptr;
1648 len -= wsaptr->len;
1649 ptr += wsaptr->len;
1651 return send_internal (&wsamsg, 0);
1654 ssize_t
1655 fhandler_socket_wsock::writev (const struct iovec *const iov, const int iovcnt,
1656 ssize_t tot)
1658 WSABUF wsabuf[iovcnt];
1659 WSABUF *wsaptr = wsabuf;
1660 const struct iovec *iovptr = iov;
1661 for (int i = 0; i < iovcnt; ++i)
1663 wsaptr->len = iovptr->iov_len;
1664 (wsaptr++)->buf = (char *) (iovptr++)->iov_base;
1666 WSAMSG wsamsg = { NULL, 0, wsabuf, (DWORD) iovcnt, { 0, NULL}, 0 };
1667 return send_internal (&wsamsg, 0);
1670 #define TCP_MAXRT 5 /* Older systems don't support TCP_MAXRTMS
1671 TCP_MAXRT takes secs, not msecs. */
1673 #ifndef SIO_TCP_SET_ACK_FREQUENCY
1674 #define SIO_TCP_SET_ACK_FREQUENCY _WSAIOW(IOC_VENDOR,23)
1675 #endif
1677 #define MAX_TCP_KEEPIDLE 32767
1678 #define MAX_TCP_KEEPCNT 255
1679 #define MAX_TCP_KEEPINTVL 32767
1681 #define FIXED_WSOCK_TCP_KEEPCNT 10
1684 fhandler_socket_inet::set_keepalive (int keepidle, int keepcnt, int keepintvl)
1686 struct tcp_keepalive tka;
1687 int so_keepalive = 0;
1688 int len = sizeof so_keepalive;
1689 int ret;
1690 DWORD dummy;
1692 /* Per MSDN,
1693 https://docs.microsoft.com/en-us/windows/win32/winsock/sio-keepalive-vals
1694 the subsequent keep-alive settings in struct tcp_keepalive are only used
1695 if the onoff member is != 0. Request the current state of SO_KEEPALIVE,
1696 then set the keep-alive options with onoff set to 1. On success, if
1697 SO_KEEPALIVE was 0, restore to the original SO_KEEPALIVE setting. Per
1698 the above MSDN doc, the SIO_KEEPALIVE_VALS settings are persistent
1699 across switching SO_KEEPALIVE. */
1700 ret = ::getsockopt (get_socket (), SOL_SOCKET, SO_KEEPALIVE,
1701 (char *) &so_keepalive, &len);
1702 if (ret == SOCKET_ERROR)
1703 debug_printf ("getsockopt (SO_KEEPALIVE) failed, %u\n", WSAGetLastError ());
1704 tka.onoff = 1;
1705 tka.keepalivetime = keepidle * MSPERSEC;
1706 /* WinSock TCP_KEEPCNT is fixed. But we still want that the keep-alive
1707 times out after TCP_KEEPIDLE + TCP_KEEPCNT * TCP_KEEPINTVL secs.
1708 To that end, we set keepaliveinterval so that
1710 keepaliveinterval * FIXED_WSOCK_TCP_KEEPCNT == TCP_KEEPINTVL * TCP_KEEPCNT
1712 FIXME? Does that make sense?
1714 Sidenote: Given the max values, the entire operation fits into an int. */
1715 tka.keepaliveinterval = MSPERSEC / FIXED_WSOCK_TCP_KEEPCNT * keepcnt
1716 * keepintvl;
1717 if (WSAIoctl (get_socket (), SIO_KEEPALIVE_VALS, (LPVOID) &tka, sizeof tka,
1718 NULL, 0, &dummy, NULL, NULL) == SOCKET_ERROR)
1720 set_winsock_errno ();
1721 return -1;
1723 if (!so_keepalive)
1725 ret = ::setsockopt (get_socket (), SOL_SOCKET, SO_KEEPALIVE,
1726 (const char *) &so_keepalive, sizeof so_keepalive);
1727 if (ret == SOCKET_ERROR)
1728 debug_printf ("setsockopt (SO_KEEPALIVE) failed, %u\n",
1729 WSAGetLastError ());
1731 return 0;
1735 fhandler_socket_inet::setsockopt (int level, int optname, const void *optval,
1736 socklen_t optlen)
1738 bool ignore = false;
1739 int ret = -1;
1740 unsigned int winsock_val;
1742 /* Preprocessing setsockopt. Set ignore to true if setsockopt call should
1743 get skipped entirely. */
1744 switch (level)
1746 case SOL_SOCKET:
1747 switch (optname)
1749 case SO_PEERCRED:
1750 set_errno (ENOPROTOOPT);
1751 return -1;
1753 case SO_REUSEADDR:
1754 /* Per POSIX we must not be able to reuse a complete duplicate of a
1755 local TCP address (same IP, same port), even if SO_REUSEADDR has
1756 been set. This behaviour is maintained in WinSock for backward
1757 compatibility, while the WinSock standard behaviour of stream
1758 socket binding is equivalent to the POSIX behaviour as if
1759 SO_REUSEADDR has been set. The SO_EXCLUSIVEADDRUSE option has
1760 been added to allow an application to request POSIX standard
1761 behaviour in the non-SO_REUSEADDR case.
1763 To emulate POSIX socket binding behaviour, note that SO_REUSEADDR
1764 has been set but don't call setsockopt. Instead
1765 fhandler_socket::bind sets SO_EXCLUSIVEADDRUSE if the application
1766 did not set SO_REUSEADDR. */
1767 if (optlen < (socklen_t) sizeof (int))
1769 set_errno (EINVAL);
1770 return ret;
1772 if (get_socket_type () == SOCK_STREAM)
1773 ignore = true;
1774 break;
1776 case SO_RCVTIMEO:
1777 case SO_SNDTIMEO:
1778 if (optlen < (socklen_t) sizeof (struct timeval))
1780 set_errno (EINVAL);
1781 return ret;
1783 if (timeval_to_ms ((struct timeval *) optval,
1784 (optname == SO_RCVTIMEO) ? rcvtimeo ()
1785 : sndtimeo ()))
1786 ret = 0;
1787 else
1788 set_errno (EDOM);
1789 return ret;
1791 case SO_OOBINLINE:
1792 /* Inline mode for out-of-band (OOB) data of winsock is
1793 completely broken. That is, SIOCATMARK always returns
1794 TRUE in inline mode. Due to this problem, application
1795 cannot determine OOB data at all. Therefore the behavior
1796 of a socket with SO_OOBINLINE set is simulated using
1797 a socket with SO_OOBINLINE not set. In this fake inline
1798 mode, the order of the OOB and non-OOB data is not
1799 preserved. OOB data is read before non-OOB data sent
1800 prior to the OOB data. However, this most likely is
1801 not a problem in most cases. */
1802 /* Here, instead of actually setting inline mode, simply
1803 set the variable oobinline. */
1804 oobinline = *(int *) optval ? true : false;
1805 ignore = true;
1806 break;
1808 default:
1809 break;
1811 break;
1813 case IPPROTO_IP:
1814 switch (optname)
1816 case IP_TOS:
1817 /* Winsock doesn't support setting the IP_TOS field with setsockopt
1818 and TOS was never implemented for TCP anyway. setsockopt returns
1819 WinSock error 10022, WSAEINVAL when trying to set the IP_TOS
1820 field. We just return 0 instead. */
1821 ignore = true;
1822 break;
1824 default:
1825 break;
1827 break;
1829 case IPPROTO_IPV6:
1830 switch (optname)
1832 case IPV6_TCLASS:
1833 /* Unsupported */
1834 ignore = true;
1835 break;
1837 default:
1838 break;
1840 break;
1842 case IPPROTO_TCP:
1843 /* Check for stream socket early on, so we don't have to do this for
1844 every option. Also, WinSock returns EINVAL. */
1845 if (type != SOCK_STREAM)
1847 set_errno (EOPNOTSUPP);
1848 return -1;
1851 switch (optname)
1853 case TCP_MAXSEG:
1854 /* Winsock doesn't support setting TCP_MAXSEG, only requesting it
1855 via getsockopt. Make this a no-op. */
1856 ignore = true;
1857 break;
1859 case TCP_QUICKACK:
1860 /* Various sources on the net claim that TCP_QUICKACK is supported
1861 by Windows, even using the same optname value of 12. However,
1862 the ws2ipdef.h header calls this option TCP_CONGESTION_ALGORITHM
1863 and there's no official statement, nor official documentation
1864 confirming or denying this option is equivalent to Linux'
1865 TCP_QUICKACK. Also, weirdly, this option takes values from 0..7.
1867 There is another undocumented option to WSAIoctl called
1868 SIO_TCP_SET_ACK_FREQUENCY which is already used by some
1869 projects, so we're going to use it here, too, for now.
1871 There's an open issue in the dotnet github,
1872 https://github.com/dotnet/runtime/issues/798
1873 Hopefully this clarifies the situation in the not too distant
1874 future... */
1876 DWORD dummy;
1877 /* https://stackoverflow.com/questions/55034112/c-disable-delayed-ack-on-windows
1878 claims that valid values for SIO_TCP_SET_ACK_FREQUENCY are
1879 1..255. In contrast to that, my own testing shows that
1880 valid values are 0 and 1 exclusively. */
1881 int freq = !!*(int *) optval;
1882 if (WSAIoctl (get_socket (), SIO_TCP_SET_ACK_FREQUENCY, &freq,
1883 sizeof freq, NULL, 0, &dummy, NULL, NULL)
1884 == SOCKET_ERROR)
1886 set_winsock_errno ();
1887 return -1;
1889 ignore = true;
1890 tcp_quickack = freq ? true : false;
1892 break;
1894 case TCP_MAXRT:
1895 /* Don't let this option slip through from user space. */
1896 set_errno (EOPNOTSUPP);
1897 return -1;
1899 case TCP_USER_TIMEOUT:
1900 if (!wincap.has_tcp_maxrtms ())
1902 /* convert msecs to secs. Values < 1000 ms are converted to
1903 0 secs, just as in WinSock. */
1904 winsock_val = *(unsigned int *) optval / MSPERSEC;
1905 optname = TCP_MAXRT;
1906 optval = (const void *) &winsock_val;
1908 break;
1910 case TCP_FASTOPEN:
1911 /* Fake FastOpen on older systems. */
1912 if (!wincap.has_tcp_fastopen ())
1914 ignore = true;
1915 tcp_fastopen = *(int *) optval ? true : false;
1917 break;
1919 case TCP_KEEPIDLE:
1920 /* Handle TCP_KEEPIDLE on older systems. */
1921 if (!wincap.has_linux_tcp_keepalive_sockopts ())
1923 if (*(int *) optval < 1 || *(int *) optval > MAX_TCP_KEEPIDLE)
1925 set_errno (EINVAL);
1926 return -1;
1928 if (set_keepalive (*(int *) optval, tcp_keepcnt, tcp_keepintvl))
1929 return -1;
1930 ignore = true;
1931 tcp_keepidle = *(int *) optval;
1933 break;
1935 case TCP_KEEPCNT:
1936 /* Fake TCP_KEEPCNT on older systems. */
1937 if (!wincap.has_linux_tcp_keepalive_sockopts ())
1939 if (*(int *) optval < 1 || *(int *) optval > MAX_TCP_KEEPCNT)
1941 set_errno (EINVAL);
1942 return -1;
1944 if (set_keepalive (tcp_keepidle, *(int *) optval, tcp_keepintvl))
1945 return -1;
1946 ignore = true;
1947 tcp_keepcnt = *(int *) optval;
1949 break;
1951 case TCP_KEEPINTVL:
1952 /* Handle TCP_KEEPINTVL on older systems. */
1953 if (!wincap.has_linux_tcp_keepalive_sockopts ())
1955 if (*(int *) optval < 1 || *(int *) optval > MAX_TCP_KEEPINTVL)
1957 set_errno (EINVAL);
1958 return -1;
1960 if (set_keepalive (tcp_keepidle, tcp_keepcnt, *(int *) optval))
1961 return -1;
1962 ignore = true;
1963 tcp_keepintvl = *(int *) optval;
1965 break;
1967 default:
1968 break;
1970 break;
1972 case IPPROTO_UDP:
1973 /* Check for dgram socket early on, so we don't have to do this for
1974 every option. Also, WinSock returns EINVAL. */
1975 if (type != SOCK_DGRAM)
1977 set_errno (EOPNOTSUPP);
1978 return -1;
1980 if (optlen < (socklen_t) sizeof (int))
1982 set_errno (EINVAL);
1983 return ret;
1985 switch (optname)
1987 case UDP_SEGMENT:
1988 if (*(int *) optval < 0 || *(int *) optval > USHRT_MAX)
1990 set_errno (EINVAL);
1991 return -1;
1993 break;
1995 case UDP_GRO:
1996 /* In contrast to Windows' UDP_RECV_MAX_COALESCED_SIZE option,
1997 Linux' UDP_GRO option is just a bool. The max. packet size
1998 is dynamically evaluated from the MRU. There's no easy,
1999 reliable way to get the MRU. We assume that this is what Windows
2000 will do internally anyway and, given UDP_RECV_MAX_COALESCED_SIZE
2001 defines a *maximum* size for aggregated packages, we just choose
2002 the maximum sensible value. FIXME? IP_MTU_DISCOVER / IP_MTU */
2003 winsock_val = *(int *) optval ? USHRT_MAX : 0;
2004 optval = &winsock_val;
2005 break;
2007 default:
2008 break;
2010 break;
2012 default:
2013 break;
2016 /* Call Winsock setsockopt (or not) */
2017 if (ignore)
2018 ret = 0;
2019 else
2021 ret = ::setsockopt (get_socket (), level, optname, (const char *) optval,
2022 optlen);
2023 if (ret == SOCKET_ERROR)
2025 set_winsock_errno ();
2026 return ret;
2030 if (optlen == (socklen_t) sizeof (int))
2031 debug_printf ("setsockopt optval=%x", *(int *) optval);
2033 /* Postprocessing setsockopt, setting fhandler_socket members, etc. */
2034 switch (level)
2036 case SOL_SOCKET:
2037 switch (optname)
2039 case SO_REUSEADDR:
2040 saw_reuseaddr (*(int *) optval);
2041 break;
2043 case SO_RCVBUF:
2044 rmem (*(int *) optval);
2045 break;
2047 case SO_SNDBUF:
2048 wmem (*(int *) optval);
2049 break;
2051 default:
2052 break;
2054 break;
2056 default:
2057 break;
2060 return ret;
2064 fhandler_socket_inet::getsockopt (int level, int optname, const void *optval,
2065 socklen_t *optlen)
2067 bool onebyte = false;
2068 int ret = -1;
2070 /* Preprocessing getsockopt. */
2071 switch (level)
2073 case SOL_SOCKET:
2074 switch (optname)
2076 case SO_PEERCRED:
2077 set_errno (ENOPROTOOPT);
2078 return -1;
2080 case SO_REUSEADDR:
2082 unsigned int *reuseaddr = (unsigned int *) optval;
2084 if (*optlen < (socklen_t) sizeof *reuseaddr)
2086 set_errno (EINVAL);
2087 return -1;
2089 *reuseaddr = saw_reuseaddr();
2090 *optlen = (socklen_t) sizeof *reuseaddr;
2091 return 0;
2094 case SO_RCVTIMEO:
2095 case SO_SNDTIMEO:
2097 struct timeval *time_out = (struct timeval *) optval;
2099 if (*optlen < (socklen_t) sizeof *time_out)
2101 set_errno (EINVAL);
2102 return -1;
2104 DWORD ms = (optname == SO_RCVTIMEO) ? rcvtimeo () : sndtimeo ();
2105 if (ms == 0 || ms == INFINITE)
2107 time_out->tv_sec = 0;
2108 time_out->tv_usec = 0;
2110 else
2112 time_out->tv_sec = ms / MSPERSEC;
2113 time_out->tv_usec = ((ms % MSPERSEC) * USPERSEC) / MSPERSEC;
2115 *optlen = (socklen_t) sizeof *time_out;
2116 return 0;
2119 case SO_TYPE:
2121 unsigned int *type = (unsigned int *) optval;
2122 *type = get_socket_type ();
2123 *optlen = (socklen_t) sizeof *type;
2124 return 0;
2127 case SO_OOBINLINE:
2128 *(int *) optval = oobinline ? 1 : 0;
2129 return 0;
2131 default:
2132 break;
2134 break;
2136 case IPPROTO_IP:
2137 break;
2139 case IPPROTO_TCP:
2140 /* Check for stream socket early on, so we don't have to do this for
2141 every option. Also, WinSock returns EINVAL. */
2142 if (type != SOCK_STREAM)
2144 set_errno (EOPNOTSUPP);
2145 return -1;
2148 switch (optname)
2150 case TCP_QUICKACK:
2151 *(int *) optval = tcp_quickack ? 1 : 0;
2152 *optlen = sizeof (int);
2153 return 0;
2155 case TCP_MAXRT:
2156 /* Don't let this option slip through from user space. */
2157 set_errno (EOPNOTSUPP);
2158 return -1;
2160 case TCP_USER_TIMEOUT:
2161 /* Older systems don't support TCP_MAXRTMS, just call TCP_MAXRT. */
2162 if (!wincap.has_tcp_maxrtms ())
2163 optname = TCP_MAXRT;
2164 break;
2166 case TCP_FASTOPEN:
2167 /* Fake FastOpen on older systems */
2168 if (!wincap.has_tcp_fastopen ())
2170 *(int *) optval = tcp_fastopen ? 1 : 0;
2171 *optlen = sizeof (int);
2172 return 0;
2174 break;
2176 case TCP_KEEPIDLE:
2177 /* Use stored value on older systems */
2178 if (!wincap.has_linux_tcp_keepalive_sockopts ())
2180 *(int *) optval = tcp_keepidle;
2181 *optlen = sizeof (int);
2182 return 0;
2184 break;
2186 case TCP_KEEPCNT:
2187 /* Use stored value on older systems */
2188 if (!wincap.has_linux_tcp_keepalive_sockopts ())
2190 *(int *) optval = tcp_keepcnt;
2191 *optlen = sizeof (int);
2192 return 0;
2194 break;
2196 case TCP_KEEPINTVL:
2197 /* Use stored value on older systems */
2198 if (!wincap.has_linux_tcp_keepalive_sockopts ())
2200 *(int *) optval = tcp_keepintvl;
2201 *optlen = sizeof (int);
2202 return 0;
2204 break;
2206 default:
2207 break;
2209 break;
2211 case IPPROTO_UDP:
2212 /* Check for dgram socket early on, so we don't have to do this for
2213 every option. Also, WinSock returns EINVAL. */
2214 if (type != SOCK_DGRAM)
2216 set_errno (EOPNOTSUPP);
2217 return -1;
2219 break;
2221 default:
2222 break;
2225 /* Call Winsock getsockopt */
2226 ret = ::getsockopt (get_socket (), level, optname, (char *) optval,
2227 (int *) optlen);
2228 if (ret == SOCKET_ERROR)
2230 set_winsock_errno ();
2231 return ret;
2234 /* Postprocessing getsockopt, setting fhandler_socket members, etc. Set
2235 onebyte true for options returning BOOLEAN instead of a boolean DWORD. */
2236 switch (level)
2238 case SOL_SOCKET:
2239 switch (optname)
2241 case SO_ERROR:
2243 int *e = (int *) optval;
2244 debug_printf ("WinSock SO_ERROR = %d", *e);
2245 *e = find_winsock_errno (*e);
2247 break;
2249 case SO_KEEPALIVE:
2250 case SO_DONTROUTE:
2251 onebyte = true;
2252 break;
2254 default:
2255 break;
2257 break;
2259 case IPPROTO_TCP:
2260 switch (optname)
2262 case TCP_NODELAY:
2263 onebyte = true;
2264 break;
2266 case TCP_MAXRT: /* After above conversion from TCP_USER_TIMEOUT */
2267 /* convert secs to msecs */
2268 *(unsigned int *) optval *= MSPERSEC;
2269 break;
2271 case TCP_FASTOPEN:
2272 onebyte = true;
2273 break;
2275 default:
2276 break;
2278 break;
2280 case IPPROTO_UDP:
2281 switch (optname)
2283 case UDP_GRO:
2284 /* Convert to bool option */
2285 *(unsigned int *) optval = *(unsigned int *) optval ? 1 : 0;
2286 break;
2288 default:
2289 break;
2291 break;
2293 default:
2294 break;
2297 if (onebyte)
2299 /* Regression in 6.0 kernel and later: instead of a 4 byte BOOL value, a
2300 1 byte BOOLEAN value is returned, in contrast to older systems and
2301 the documentation. Since an int type is expected by the calling
2302 application, we convert the result here. */
2303 BOOLEAN *in = (BOOLEAN *) optval;
2304 int *out = (int *) optval;
2305 *out = *in;
2306 *optlen = 4;
2309 return ret;
2313 fhandler_socket_wsock::ioctl (unsigned int cmd, void *p)
2315 int res;
2317 switch (cmd)
2319 /* Here we handle only ioctl commands which are understood by Winsock.
2320 However, we have a problem, which is, the different size of u_long
2321 in Windows and 64 bit Cygwin. This affects the definitions of
2322 FIOASYNC, etc, because they are defined in terms of sizeof(u_long).
2323 So we have to use case labels which are independent of the sizeof
2324 u_long. Since we're redefining u_long at the start of this file to
2325 matching Winsock's idea of u_long, we can use the real definitions in
2326 calls to Windows. In theory we also have to make sure to convert the
2327 different ideas of u_long between the application and Winsock, but
2328 fortunately, the parameters defined as u_long pointers are on Linux
2329 and BSD systems defined as int pointer, so the applications will
2330 use a type of the expected size. Hopefully. */
2331 case FIOASYNC:
2332 case _IOW('f', 125, u_long):
2333 res = WSAAsyncSelect (get_socket (), winmsg, WM_ASYNCIO,
2334 *(int *) p ? ASYNC_MASK : 0);
2335 syscall_printf ("Async I/O on socket %s",
2336 *(int *) p ? "started" : "cancelled");
2337 async_io (*(int *) p != 0);
2338 /* If async_io is switched off, revert the event handling. */
2339 if (*(int *) p == 0)
2340 WSAEventSelect (get_socket (), wsock_evt, EVENT_MASK);
2341 break;
2342 case FIONREAD:
2343 case _IOR('f', 127, u_long):
2344 /* Make sure to use the Winsock definition of FIONREAD. */
2345 res = ::ioctlsocket (get_socket (), _IOR('f', 127, u_long), (u_long *) p);
2346 if (res == SOCKET_ERROR)
2347 set_winsock_errno ();
2348 break;
2349 case FIONBIO:
2350 case SIOCATMARK:
2351 /* Sockets are always non-blocking internally. So we just note the
2352 state here. */
2353 /* Convert the different idea of u_long in the definition of cmd. */
2354 if (((cmd >> 16) & IOCPARM_MASK) == sizeof (unsigned long))
2355 cmd = (cmd & ~(IOCPARM_MASK << 16)) | (sizeof (u_long) << 16);
2356 if (cmd == FIONBIO)
2358 syscall_printf ("socket is now %sblocking",
2359 *(int *) p ? "non" : "");
2360 set_nonblocking (*(int *) p);
2361 res = 0;
2363 else
2364 res = ::ioctlsocket (get_socket (), cmd, (u_long *) p);
2365 /* In winsock, the return value of SIOCATMARK is FALSE if
2366 OOB data exists, TRUE otherwise. This is almost opposite
2367 to expectation. */
2368 /* SIOCATMARK = _IOR('s',7,u_long) */
2369 if (cmd == _IOR('s',7,u_long) && !res)
2370 *(u_long *)p = !*(u_long *)p;
2371 break;
2372 default:
2373 res = fhandler_socket::ioctl (cmd, p);
2374 break;
2376 syscall_printf ("%d = ioctl_socket(%x, %p)", res, cmd, p);
2377 return res;
2381 fhandler_socket_wsock::fcntl (int cmd, intptr_t arg)
2383 int res = 0;
2385 switch (cmd)
2387 case F_SETOWN:
2389 pid_t pid = (pid_t) arg;
2390 LOCK_EVENTS;
2391 wsock_events->owner = pid;
2392 UNLOCK_EVENTS;
2393 debug_printf ("owner set to %d", pid);
2395 break;
2396 case F_GETOWN:
2397 res = wsock_events->owner;
2398 break;
2399 default:
2400 res = fhandler_socket::fcntl (cmd, arg);
2401 break;
2403 return res;