No empty .Rs/.Re
[netbsd-mini2440.git] / external / bsd / bind / dist / lib / isc / win32 / socket.c
blob2823259ed00deb7e42f1b83de91ecddbd8efdc31
1 /* $NetBSD$ */
3 /*
4 * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
5 * Copyright (C) 2000-2003 Internet Software Consortium.
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
20 /* Id: socket.c,v 1.81 2009/11/10 18:31:47 each Exp */
22 /* This code uses functions which are only available on Server 2003 and
23 * higher, and Windows XP and higher.
25 * This code is by nature multithreaded and takes advantage of various
26 * features to pass on information through the completion port for
27 * when I/O is completed. All sends, receives, accepts, and connects are
28 * completed through the completion port.
30 * The number of Completion Port Worker threads used is the total number
31 * of CPU's + 1. This increases the likelihood that a Worker Thread is
32 * available for processing a completed request.
34 * XXXPDM 5 August, 2002
37 #define MAKE_EXTERNAL 1
38 #include <config.h>
40 #include <sys/types.h>
42 #ifndef _WINSOCKAPI_
43 #define _WINSOCKAPI_ /* Prevent inclusion of winsock.h in windows.h */
44 #endif
46 #include <errno.h>
47 #include <stddef.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <io.h>
52 #include <fcntl.h>
53 #include <process.h>
55 #include <isc/buffer.h>
56 #include <isc/bufferlist.h>
57 #include <isc/condition.h>
58 #include <isc/list.h>
59 #include <isc/log.h>
60 #include <isc/mem.h>
61 #include <isc/msgs.h>
62 #include <isc/mutex.h>
63 #include <isc/net.h>
64 #include <isc/once.h>
65 #include <isc/os.h>
66 #include <isc/platform.h>
67 #include <isc/print.h>
68 #include <isc/region.h>
69 #include <isc/socket.h>
70 #include <isc/stats.h>
71 #include <isc/strerror.h>
72 #include <isc/syslog.h>
73 #include <isc/task.h>
74 #include <isc/thread.h>
75 #include <isc/util.h>
76 #include <isc/win32os.h>
78 #include <mswsock.h>
80 #include "errno2result.h"
83 * How in the world can Microsoft exist with APIs like this?
84 * We can't actually call this directly, because it turns out
85 * no library exports this function. Instead, we need to
86 * issue a runtime call to get the address.
88 LPFN_CONNECTEX ISCConnectEx;
89 LPFN_ACCEPTEX ISCAcceptEx;
90 LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs;
93 * Run expensive internal consistency checks.
95 #ifdef ISC_SOCKET_CONSISTENCY_CHECKS
96 #define CONSISTENT(sock) consistent(sock)
97 #else
98 #define CONSISTENT(sock) do {} while (0)
99 #endif
100 static void consistent(isc_socket_t *sock);
103 * Define this macro to control the behavior of connection
104 * resets on UDP sockets. See Microsoft KnowledgeBase Article Q263823
105 * for details.
106 * NOTE: This requires that Windows 2000 systems install Service Pack 2
107 * or later.
109 #ifndef SIO_UDP_CONNRESET
110 #define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12)
111 #endif
114 * Some systems define the socket length argument as an int, some as size_t,
115 * some as socklen_t. This is here so it can be easily changed if needed.
117 #ifndef ISC_SOCKADDR_LEN_T
118 #define ISC_SOCKADDR_LEN_T unsigned int
119 #endif
122 * Define what the possible "soft" errors can be. These are non-fatal returns
123 * of various network related functions, like recv() and so on.
125 #define SOFT_ERROR(e) ((e) == WSAEINTR || \
126 (e) == WSAEWOULDBLOCK || \
127 (e) == EWOULDBLOCK || \
128 (e) == EINTR || \
129 (e) == EAGAIN || \
130 (e) == 0)
133 * Pending errors are not really errors and should be
134 * kept separate
136 #define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0)
138 #define DOIO_SUCCESS 0 /* i/o ok, event sent */
139 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */
140 #define DOIO_HARD 2 /* i/o error, event sent */
141 #define DOIO_EOF 3 /* EOF, no event sent */
142 #define DOIO_PENDING 4 /* status when i/o is in process */
143 #define DOIO_NEEDMORE 5 /* IO was processed, but we need more due to minimum */
145 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
148 * DLVL(90) -- Function entry/exit and other tracing.
149 * DLVL(70) -- Socket "correctness" -- including returning of events, etc.
150 * DLVL(60) -- Socket data send/receive
151 * DLVL(50) -- Event tracing, including receiving/sending completion events.
152 * DLVL(20) -- Socket creation/destruction.
154 #define TRACE_LEVEL 90
155 #define CORRECTNESS_LEVEL 70
156 #define IOEVENT_LEVEL 60
157 #define EVENT_LEVEL 50
158 #define CREATION_LEVEL 20
160 #define TRACE DLVL(TRACE_LEVEL)
161 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL)
162 #define IOEVENT DLVL(IOEVENT_LEVEL)
163 #define EVENT DLVL(EVENT_LEVEL)
164 #define CREATION DLVL(CREATION_LEVEL)
166 typedef isc_event_t intev_t;
169 * Socket State
171 enum {
172 SOCK_INITIALIZED, /* Socket Initialized */
173 SOCK_OPEN, /* Socket opened but nothing yet to do */
174 SOCK_DATA, /* Socket sending or receiving data */
175 SOCK_LISTEN, /* TCP Socket listening for connects */
176 SOCK_ACCEPT, /* TCP socket is waiting to accept */
177 SOCK_CONNECT, /* TCP Socket connecting */
178 SOCK_CLOSED, /* Socket has been closed */
181 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o')
182 #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC)
185 * IPv6 control information. If the socket is an IPv6 socket we want
186 * to collect the destination address and interface so the client can
187 * set them on outgoing packets.
189 #ifdef ISC_PLATFORM_HAVEIPV6
190 #ifndef USE_CMSG
191 #define USE_CMSG 1
192 #endif
193 #endif
196 * We really don't want to try and use these control messages. Win32
197 * doesn't have this mechanism before XP.
199 #undef USE_CMSG
202 * Message header for recvmsg and sendmsg calls.
203 * Used value-result for recvmsg, value only for sendmsg.
205 struct msghdr {
206 SOCKADDR_STORAGE to_addr; /* UDP send/recv address */
207 int to_addr_len; /* length of the address */
208 WSABUF *msg_iov; /* scatter/gather array */
209 u_int msg_iovlen; /* # elements in msg_iov */
210 void *msg_control; /* ancillary data, see below */
211 u_int msg_controllen; /* ancillary data buffer len */
212 int msg_totallen; /* total length of this message */
213 } msghdr;
216 * The size to raise the receive buffer to.
218 #define RCVBUFSIZE (32*1024)
221 * The number of times a send operation is repeated if the result
222 * is WSAEINTR.
224 #define NRETRIES 10
226 struct isc_socket {
227 /* Not locked. */
228 unsigned int magic;
229 isc_socketmgr_t *manager;
230 isc_mutex_t lock;
231 isc_sockettype_t type;
233 /* Pointers to scatter/gather buffers */
234 WSABUF iov[ISC_SOCKET_MAXSCATTERGATHER];
236 /* Locked by socket lock. */
237 ISC_LINK(isc_socket_t) link;
238 unsigned int references; /* EXTERNAL references */
239 SOCKET fd; /* file handle */
240 int pf; /* protocol family */
241 char name[16];
242 void * tag;
245 * Each recv() call uses this buffer. It is a per-socket receive
246 * buffer that allows us to decouple the system recv() from the
247 * recv_list done events. This means the items on the recv_list
248 * can be removed without having to cancel pending system recv()
249 * calls. It also allows us to read-ahead in some cases.
251 struct {
252 SOCKADDR_STORAGE from_addr; // UDP send/recv address
253 int from_addr_len; // length of the address
254 char *base; // the base of the buffer
255 char *consume_position; // where to start copying data from next
256 unsigned int len; // the actual size of this buffer
257 unsigned int remaining; // the number of bytes remaining
258 } recvbuf;
260 ISC_LIST(isc_socketevent_t) send_list;
261 ISC_LIST(isc_socketevent_t) recv_list;
262 ISC_LIST(isc_socket_newconnev_t) accept_list;
263 isc_socket_connev_t *connect_ev;
265 isc_sockaddr_t address; /* remote address */
267 unsigned int listener : 1, /* listener socket */
268 connected : 1,
269 pending_connect : 1, /* connect pending */
270 bound : 1; /* bound to local addr */
271 unsigned int pending_iocp; /* Should equal the counters below. Debug. */
272 unsigned int pending_recv; /* Number of outstanding recv() calls. */
273 unsigned int pending_send; /* Number of outstanding send() calls. */
274 unsigned int pending_accept; /* Number of outstanding accept() calls. */
275 unsigned int state; /* Socket state. Debugging and consistency checking. */
276 int state_lineno; /* line which last touched state */
277 int in_recovery_cnt; /* avoid recovery loop. */
280 #define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0)
283 * Buffer structure
285 typedef struct buflist buflist_t;
287 struct buflist {
288 void *buf;
289 unsigned int buflen;
290 ISC_LINK(buflist_t) link;
294 * I/O Completion ports Info structures
297 static HANDLE hHeapHandle = NULL;
298 typedef struct IoCompletionInfo {
299 OVERLAPPED overlapped;
300 isc_socketevent_t *dev; /* send()/recv() done event */
301 isc_socket_connev_t *cdev; /* connect() done event */
302 isc_socket_newconnev_t *adev; /* accept() done event */
303 void *acceptbuffer;
304 DWORD received_bytes;
305 int request_type;
306 struct msghdr messagehdr;
307 ISC_LIST(buflist_t) bufferlist; /*%< list of buffers */
308 } IoCompletionInfo;
311 * Define a maximum number of I/O Completion Port worker threads
312 * to handle the load on the Completion Port. The actual number
313 * used is the number of CPU's + 1.
315 #define MAX_IOCPTHREADS 20
317 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g')
318 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
320 struct isc_socketmgr {
321 /* Not locked. */
322 unsigned int magic;
323 isc_mem_t *mctx;
324 isc_mutex_t lock;
325 isc_stats_t *stats;
327 /* Locked by manager lock. */
328 ISC_LIST(isc_socket_t) socklist;
329 isc_boolean_t bShutdown;
330 isc_condition_t shutdown_ok;
331 HANDLE hIoCompletionPort;
332 int maxIOCPThreads;
333 HANDLE hIOCPThreads[MAX_IOCPTHREADS];
334 DWORD dwIOCPThreadIds[MAX_IOCPTHREADS];
337 * Debugging.
338 * Modified by InterlockedIncrement() and InterlockedDecrement()
340 LONG totalSockets;
341 LONG iocp_total;
344 enum {
345 SOCKET_RECV,
346 SOCKET_SEND,
347 SOCKET_ACCEPT,
348 SOCKET_CONNECT
352 * send() and recv() iovec counts
354 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
355 #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
357 static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext);
358 static void maybe_free_socket(isc_socket_t **, int);
359 static void free_socket(isc_socket_t **, int);
360 static isc_boolean_t senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev);
361 static isc_boolean_t acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev);
362 static isc_boolean_t connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev);
363 static void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev);
364 static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev);
365 static void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev);
366 static void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev);
367 static void send_recvdone_abort(isc_socket_t *sock, isc_result_t result);
368 static void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev);
369 static void queue_receive_request(isc_socket_t *sock);
370 static void hard_recover_receive_request(isc_socket_t *sock);
371 static void recover_receive_request(isc_socket_t *sock, void **lplpo);
374 * This is used to dump the contents of the sock structure
375 * You should make sure that the sock is locked before
376 * dumping it. Since the code uses simple printf() statements
377 * it should only be used interactively.
379 void
380 sock_dump(isc_socket_t *sock) {
381 isc_socketevent_t *ldev;
382 isc_socket_newconnev_t *ndev;
384 #if 0
385 isc_sockaddr_t addr;
386 char socktext[256];
388 isc_socket_getpeername(sock, &addr);
389 isc_sockaddr_format(&addr, socktext, sizeof(socktext));
390 printf("Remote Socket: %s\n", socktext);
391 isc_socket_getsockname(sock, &addr);
392 isc_sockaddr_format(&addr, socktext, sizeof(socktext));
393 printf("This Socket: %s\n", socktext);
394 #endif
396 printf("\n\t\tSock Dump\n");
397 printf("\t\tfd: %u\n", sock->fd);
398 printf("\t\treferences: %d\n", sock->references);
399 printf("\t\tpending_accept: %d\n", sock->pending_accept);
400 printf("\t\tconnecting: %d\n", sock->pending_connect);
401 printf("\t\tconnected: %d\n", sock->connected);
402 printf("\t\tbound: %d\n", sock->bound);
403 printf("\t\tpending_iocp: %d\n", sock->pending_iocp);
404 printf("\t\tsocket type: %d\n", sock->type);
406 printf("\n\t\tSock Recv List\n");
407 ldev = ISC_LIST_HEAD(sock->recv_list);
408 while (ldev != NULL) {
409 printf("\t\tdev: %p\n", ldev);
410 ldev = ISC_LIST_NEXT(ldev, ev_link);
413 printf("\n\t\tSock Send List\n");
414 ldev = ISC_LIST_HEAD(sock->send_list);
415 while (ldev != NULL) {
416 printf("\t\tdev: %p\n", ldev);
417 ldev = ISC_LIST_NEXT(ldev, ev_link);
420 printf("\n\t\tSock Accept List\n");
421 ndev = ISC_LIST_HEAD(sock->accept_list);
422 while (ndev != NULL) {
423 printf("\t\tdev: %p\n", ldev);
424 ndev = ISC_LIST_NEXT(ndev, ev_link);
428 static void
429 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
430 isc_logcategory_t *category, isc_logmodule_t *module, int level,
431 isc_msgcat_t *msgcat, int msgset, int message,
432 const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10);
434 /* This function will add an entry to the I/O completion port
435 * that will signal the I/O thread to exit (gracefully)
437 static void
438 signal_iocompletionport_exit(isc_socketmgr_t *manager) {
439 int i;
440 int errval;
441 char strbuf[ISC_STRERRORSIZE];
443 REQUIRE(VALID_MANAGER(manager));
444 for (i = 0; i < manager->maxIOCPThreads; i++) {
445 if (!PostQueuedCompletionStatus(manager->hIoCompletionPort,
446 0, 0, 0)) {
447 errval = GetLastError();
448 isc__strerror(errval, strbuf, sizeof(strbuf));
449 FATAL_ERROR(__FILE__, __LINE__,
450 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
451 ISC_MSG_FAILED,
452 "Can't request service thread to exit: %s"),
453 strbuf);
459 * Create the worker threads for the I/O Completion Port
461 void
462 iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) {
463 int errval;
464 char strbuf[ISC_STRERRORSIZE];
465 int i;
467 INSIST(total_threads > 0);
468 REQUIRE(VALID_MANAGER(manager));
470 * We need at least one
472 for (i = 0; i < total_threads; i++) {
473 manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread,
474 manager, 0,
475 &manager->dwIOCPThreadIds[i]);
476 if (manager->hIOCPThreads[i] == NULL) {
477 errval = GetLastError();
478 isc__strerror(errval, strbuf, sizeof(strbuf));
479 FATAL_ERROR(__FILE__, __LINE__,
480 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
481 ISC_MSG_FAILED,
482 "Can't create IOCP thread: %s"),
483 strbuf);
484 exit(1);
490 * Create/initialise the I/O completion port
492 void
493 iocompletionport_init(isc_socketmgr_t *manager) {
494 int errval;
495 char strbuf[ISC_STRERRORSIZE];
497 REQUIRE(VALID_MANAGER(manager));
499 * Create a private heap to handle the socket overlapped structure
500 * The minimum number of structures is 10, there is no maximum
502 hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0);
503 if (hHeapHandle == NULL) {
504 errval = GetLastError();
505 isc__strerror(errval, strbuf, sizeof(strbuf));
506 FATAL_ERROR(__FILE__, __LINE__,
507 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
508 ISC_MSG_FAILED,
509 "HeapCreate() failed during "
510 "initialization: %s"),
511 strbuf);
512 exit(1);
515 manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS);
517 /* Now Create the Completion Port */
518 manager->hIoCompletionPort = CreateIoCompletionPort(
519 INVALID_HANDLE_VALUE, NULL,
520 0, manager->maxIOCPThreads);
521 if (manager->hIoCompletionPort == NULL) {
522 errval = GetLastError();
523 isc__strerror(errval, strbuf, sizeof(strbuf));
524 FATAL_ERROR(__FILE__, __LINE__,
525 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
526 ISC_MSG_FAILED,
527 "CreateIoCompletionPort() failed "
528 "during initialization: %s"),
529 strbuf);
530 exit(1);
534 * Worker threads for servicing the I/O
536 iocompletionport_createthreads(manager->maxIOCPThreads, manager);
540 * Associate a socket with an IO Completion Port. This allows us to queue events for it
541 * and have our worker pool of threads process them.
543 void
544 iocompletionport_update(isc_socket_t *sock) {
545 HANDLE hiocp;
546 char strbuf[ISC_STRERRORSIZE];
548 REQUIRE(VALID_SOCKET(sock));
550 hiocp = CreateIoCompletionPort((HANDLE)sock->fd,
551 sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0);
553 if (hiocp == NULL) {
554 DWORD errval = GetLastError();
555 isc__strerror(errval, strbuf, sizeof(strbuf));
556 isc_log_iwrite(isc_lctx,
557 ISC_LOGCATEGORY_GENERAL,
558 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
559 isc_msgcat, ISC_MSGSET_SOCKET,
560 ISC_MSG_TOOMANYHANDLES,
561 "iocompletionport_update: failed to open"
562 " io completion port: %s",
563 strbuf);
565 /* XXXMLG temporary hack to make failures detected.
566 * This function should return errors to the caller, not
567 * exit here.
569 FATAL_ERROR(__FILE__, __LINE__,
570 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
571 ISC_MSG_FAILED,
572 "CreateIoCompletionPort() failed "
573 "during initialization: %s"),
574 strbuf);
575 exit(1);
578 InterlockedIncrement(&sock->manager->iocp_total);
582 * Routine to cleanup and then close the socket.
583 * Only close the socket here if it is NOT associated
584 * with an event, otherwise the WSAWaitForMultipleEvents
585 * may fail due to the fact that the Wait should not
586 * be running while closing an event or a socket.
587 * The socket is locked before calling this function
589 void
590 socket_close(isc_socket_t *sock) {
592 REQUIRE(sock != NULL);
594 if (sock->fd != INVALID_SOCKET) {
595 closesocket(sock->fd);
596 sock->fd = INVALID_SOCKET;
597 _set_state(sock, SOCK_CLOSED);
598 InterlockedDecrement(&sock->manager->totalSockets);
602 static isc_once_t initialise_once = ISC_ONCE_INIT;
603 static isc_boolean_t initialised = ISC_FALSE;
605 static void
606 initialise(void) {
607 WORD wVersionRequested;
608 WSADATA wsaData;
609 int err;
610 SOCKET sock;
611 GUID GUIDConnectEx = WSAID_CONNECTEX;
612 GUID GUIDAcceptEx = WSAID_ACCEPTEX;
613 GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS;
614 DWORD dwBytes;
616 /* Need Winsock 2.2 or better */
617 wVersionRequested = MAKEWORD(2, 2);
619 err = WSAStartup(wVersionRequested, &wsaData);
620 if (err != 0) {
621 char strbuf[ISC_STRERRORSIZE];
622 isc__strerror(err, strbuf, sizeof(strbuf));
623 FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s",
624 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
625 ISC_MSG_FAILED, "failed"),
626 strbuf);
627 exit(1);
630 * The following APIs do not exist as functions in a library, but we must
631 * ask winsock for them. They are "extensions" -- but why they cannot be
632 * actual functions is beyond me. So, ask winsock for the pointers to the
633 * functions we need.
635 sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
636 INSIST(sock != INVALID_SOCKET);
637 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
638 &GUIDConnectEx, sizeof(GUIDConnectEx),
639 &ISCConnectEx, sizeof(ISCConnectEx),
640 &dwBytes, NULL, NULL);
641 INSIST(err == 0);
643 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
644 &GUIDAcceptEx, sizeof(GUIDAcceptEx),
645 &ISCAcceptEx, sizeof(ISCAcceptEx),
646 &dwBytes, NULL, NULL);
647 INSIST(err == 0);
649 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
650 &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs),
651 &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs),
652 &dwBytes, NULL, NULL);
653 INSIST(err == 0);
655 closesocket(sock);
657 initialised = ISC_TRUE;
661 * Initialize socket services
663 void
664 InitSockets(void) {
665 RUNTIME_CHECK(isc_once_do(&initialise_once,
666 initialise) == ISC_R_SUCCESS);
667 if (!initialised)
668 exit(1);
672 internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo,
673 struct msghdr *messagehdr, int flags, int *Error)
675 int Result;
676 DWORD BytesSent;
677 DWORD Flags = flags;
678 int total_sent;
680 *Error = 0;
681 Result = WSASendTo(sock->fd, messagehdr->msg_iov,
682 messagehdr->msg_iovlen, &BytesSent,
683 Flags, (SOCKADDR *)&messagehdr->to_addr,
684 messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo,
685 NULL);
687 total_sent = (int)BytesSent;
689 /* Check for errors.*/
690 if (Result == SOCKET_ERROR) {
691 *Error = WSAGetLastError();
693 switch (*Error) {
694 case WSA_IO_INCOMPLETE:
695 case WSA_WAIT_IO_COMPLETION:
696 case WSA_IO_PENDING:
697 case NO_ERROR: /* Strange, but okay */
698 sock->pending_iocp++;
699 sock->pending_send++;
700 break;
702 default:
703 return (-1);
704 break;
706 } else {
707 sock->pending_iocp++;
708 sock->pending_send++;
711 if (lpo != NULL)
712 return (0);
713 else
714 return (total_sent);
717 static void
718 queue_receive_request(isc_socket_t *sock) {
719 DWORD Flags = 0;
720 DWORD NumBytes = 0;
721 int total_bytes = 0;
722 int Result;
723 int Error;
724 isc_boolean_t need_recovering = ISC_FALSE;
725 WSABUF iov[1];
726 IoCompletionInfo *lpo;
727 isc_result_t isc_result;
730 * If we already have a receive pending, do nothing.
732 if (sock->pending_recv > 0)
733 return;
736 * If no one is waiting, do nothing.
738 if (ISC_LIST_EMPTY(sock->recv_list))
739 return;
741 INSIST(sock->recvbuf.remaining == 0);
742 INSIST(sock->fd != INVALID_SOCKET);
744 iov[0].len = sock->recvbuf.len;
745 iov[0].buf = sock->recvbuf.base;
747 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
748 HEAP_ZERO_MEMORY,
749 sizeof(IoCompletionInfo));
750 RUNTIME_CHECK(lpo != NULL);
751 lpo->request_type = SOCKET_RECV;
753 sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr);
755 Error = 0;
756 Result = WSARecvFrom((SOCKET)sock->fd, iov, 1,
757 &NumBytes, &Flags,
758 (SOCKADDR *)&sock->recvbuf.from_addr,
759 &sock->recvbuf.from_addr_len,
760 (LPWSAOVERLAPPED)lpo, NULL);
762 /* Check for errors. */
763 if (Result == SOCKET_ERROR) {
764 Error = WSAGetLastError();
766 switch (Error) {
767 case WSA_IO_PENDING:
768 sock->pending_iocp++;
769 sock->pending_recv++;
770 break;
772 case ERROR_HOST_UNREACHABLE:
773 if (sock->type == isc_sockettype_udp) {
774 UNEXPECTED_ERROR(__FILE__, __LINE__,
775 "WSARecvFrom ERROR_HOST_UNREACHABLE: trying to recover");
776 need_recovering = ISC_TRUE;
777 break;
778 } else
779 goto fail;
781 case WSAENETRESET:
782 if (sock->type == isc_sockettype_udp) {
783 UNEXPECTED_ERROR(__FILE__, __LINE__,
784 "WSARecvFrom WSAENETRESET: trying to recover");
785 need_recovering = ISC_TRUE;
786 break;
787 } else
788 goto fail;
790 case WSAECONNRESET:
791 if (sock->type == isc_sockettype_udp) {
792 UNEXPECTED_ERROR(__FILE__, __LINE__,
793 "WSARecvFrom WSAECONNRESET: trying to recover");
794 need_recovering = ISC_TRUE;
795 break;
796 } else
797 goto fail;
799 default:
800 fail:
801 isc_result = isc__errno2result(Error);
802 if ((isc_result == ISC_R_UNEXPECTED) ||
803 (isc_result == ISC_R_CONNECTIONRESET) ||
804 (isc_result == ISC_R_HOSTUNREACH))
805 UNEXPECTED_ERROR(__FILE__, __LINE__,
806 "WSARecvFrom: Windows error code: %d, isc result %d",
807 Error, isc_result);
808 send_recvdone_abort(sock, isc_result);
809 break;
811 } else {
813 * The recv() finished immediately, but we will still get
814 * a completion event. Rather than duplicate code, let
815 * that thread handle sending the data along its way.
817 sock->pending_iocp++;
818 sock->pending_recv++;
819 sock->in_recovery_cnt = 0;
822 socket_log(__LINE__, sock, NULL, IOEVENT,
823 isc_msgcat, ISC_MSGSET_SOCKET,
824 ISC_MSG_DOIORECV,
825 "queue_io_request: fd %d result %d error %d",
826 sock->fd, Result, Error);
828 CONSISTENT(sock);
830 if (need_recovering)
831 recover_receive_request(sock, &lpo);
835 * (placeholder) Hard recovery, doing nothing useful today
836 * (other than to avoid unlimited recursion).
838 static void
839 hard_recover_receive_request(isc_socket_t *sock)
841 UNEXPECTED_ERROR(__FILE__, __LINE__,
842 "can't recover fd %d sock %p",
843 sock->fd, sock);
844 send_recvdone_abort(sock, ISC_R_UNEXPECTED);
848 * Recovery from a Windows 2008 Server bug
849 * (WSARecvFrom() getting an ERROR_HOST_UNREACHABLE).
850 * Free the overlapped pointer and requeue a receive request.
852 static void
853 recover_receive_request(isc_socket_t *sock, void **lplpo)
855 if (*lplpo != NULL)
856 HeapFree(hHeapHandle, 0, *lplpo);
857 *lplpo = NULL;
859 /* limit recursion to 20 */
860 if (sock->in_recovery_cnt++ < 20)
861 queue_receive_request(sock);
862 else
863 hard_recover_receive_request(sock);
866 static void
867 manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category,
868 isc_logmodule_t *module, int level, const char *fmt, ...)
870 char msgbuf[2048];
871 va_list ap;
873 if (!isc_log_wouldlog(isc_lctx, level))
874 return;
876 va_start(ap, fmt);
877 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
878 va_end(ap);
880 isc_log_write(isc_lctx, category, module, level,
881 "sockmgr %p: %s", sockmgr, msgbuf);
884 static void
885 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
886 isc_logcategory_t *category, isc_logmodule_t *module, int level,
887 isc_msgcat_t *msgcat, int msgset, int message,
888 const char *fmt, ...)
890 char msgbuf[2048];
891 char peerbuf[256];
892 va_list ap;
895 if (!isc_log_wouldlog(isc_lctx, level))
896 return;
898 va_start(ap, fmt);
899 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
900 va_end(ap);
902 if (address == NULL) {
903 isc_log_iwrite(isc_lctx, category, module, level,
904 msgcat, msgset, message,
905 "socket %p line %d: %s", sock, lineno, msgbuf);
906 } else {
907 isc_sockaddr_format(address, peerbuf, sizeof(peerbuf));
908 isc_log_iwrite(isc_lctx, category, module, level,
909 msgcat, msgset, message,
910 "socket %p line %d peer %s: %s", sock, lineno,
911 peerbuf, msgbuf);
917 * Make an fd SOCKET non-blocking.
919 static isc_result_t
920 make_nonblock(SOCKET fd) {
921 int ret;
922 unsigned long flags = 1;
923 char strbuf[ISC_STRERRORSIZE];
925 /* Set the socket to non-blocking */
926 ret = ioctlsocket(fd, FIONBIO, &flags);
928 if (ret == -1) {
929 isc__strerror(errno, strbuf, sizeof(strbuf));
930 UNEXPECTED_ERROR(__FILE__, __LINE__,
931 "ioctlsocket(%d, FIOBIO, %d): %s",
932 fd, flags, strbuf);
934 return (ISC_R_UNEXPECTED);
937 return (ISC_R_SUCCESS);
941 * Windows 2000 systems incorrectly cause UDP sockets using WSARecvFrom
942 * to not work correctly, returning a WSACONNRESET error when a WSASendTo
943 * fails with an "ICMP port unreachable" response and preventing the
944 * socket from using the WSARecvFrom in subsequent operations.
945 * The function below fixes this, but requires that Windows 2000
946 * Service Pack 2 or later be installed on the system. NT 4.0
947 * systems are not affected by this and work correctly.
948 * See Microsoft Knowledge Base Article Q263823 for details of this.
950 isc_result_t
951 connection_reset_fix(SOCKET fd) {
952 DWORD dwBytesReturned = 0;
953 BOOL bNewBehavior = FALSE;
954 DWORD status;
956 if (isc_win32os_majorversion() < 5)
957 return (ISC_R_SUCCESS); /* NT 4.0 has no problem */
959 /* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */
960 status = WSAIoctl(fd, SIO_UDP_CONNRESET, &bNewBehavior,
961 sizeof(bNewBehavior), NULL, 0,
962 &dwBytesReturned, NULL, NULL);
963 if (status != SOCKET_ERROR)
964 return (ISC_R_SUCCESS);
965 else {
966 UNEXPECTED_ERROR(__FILE__, __LINE__,
967 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s",
968 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
969 ISC_MSG_FAILED, "failed"));
970 return (ISC_R_UNEXPECTED);
975 * Construct an iov array and attach it to the msghdr passed in. This is
976 * the SEND constructor, which will use the used region of the buffer
977 * (if using a buffer list) or will use the internal region (if a single
978 * buffer I/O is requested).
980 * Nothing can be NULL, and the done event must list at least one buffer
981 * on the buffer linked list for this function to be meaningful.
983 static void
984 build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
985 struct msghdr *msg, char *cmsg, WSABUF *iov,
986 IoCompletionInfo *lpo)
988 unsigned int iovcount;
989 isc_buffer_t *buffer;
990 buflist_t *cpbuffer;
991 isc_region_t used;
992 size_t write_count;
993 size_t skip_count;
995 memset(msg, 0, sizeof(*msg));
997 memcpy(&msg->to_addr, &dev->address.type, dev->address.length);
998 msg->to_addr_len = dev->address.length;
1000 buffer = ISC_LIST_HEAD(dev->bufferlist);
1001 write_count = 0;
1002 iovcount = 0;
1005 * Single buffer I/O? Skip what we've done so far in this region.
1007 if (buffer == NULL) {
1008 write_count = dev->region.length - dev->n;
1009 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
1010 RUNTIME_CHECK(cpbuffer != NULL);
1011 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count);
1012 RUNTIME_CHECK(cpbuffer->buf != NULL);
1014 socket_log(__LINE__, sock, NULL, TRACE,
1015 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
1016 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
1017 cpbuffer->buf, write_count);
1019 memcpy(cpbuffer->buf,(dev->region.base + dev->n), write_count);
1020 cpbuffer->buflen = write_count;
1021 ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link);
1022 iov[0].buf = cpbuffer->buf;
1023 iov[0].len = write_count;
1024 iovcount = 1;
1026 goto config;
1030 * Multibuffer I/O.
1031 * Skip the data in the buffer list that we have already written.
1033 skip_count = dev->n;
1034 while (buffer != NULL) {
1035 REQUIRE(ISC_BUFFER_VALID(buffer));
1036 if (skip_count < isc_buffer_usedlength(buffer))
1037 break;
1038 skip_count -= isc_buffer_usedlength(buffer);
1039 buffer = ISC_LIST_NEXT(buffer, link);
1042 while (buffer != NULL) {
1043 INSIST(iovcount < MAXSCATTERGATHER_SEND);
1045 isc_buffer_usedregion(buffer, &used);
1047 if (used.length > 0) {
1048 int uselen = used.length - skip_count;
1049 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
1050 RUNTIME_CHECK(cpbuffer != NULL);
1051 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen);
1052 RUNTIME_CHECK(cpbuffer->buf != NULL);
1054 socket_log(__LINE__, sock, NULL, TRACE,
1055 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
1056 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
1057 cpbuffer->buf, write_count);
1059 memcpy(cpbuffer->buf,(used.base + skip_count), uselen);
1060 cpbuffer->buflen = uselen;
1061 iov[iovcount].buf = cpbuffer->buf;
1062 iov[iovcount].len = used.length - skip_count;
1063 write_count += uselen;
1064 skip_count = 0;
1065 iovcount++;
1067 buffer = ISC_LIST_NEXT(buffer, link);
1070 INSIST(skip_count == 0);
1072 config:
1073 msg->msg_iov = iov;
1074 msg->msg_iovlen = iovcount;
1075 msg->msg_totallen = write_count;
1078 static void
1079 set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock,
1080 isc_socketevent_t *dev)
1082 if (sock->type == isc_sockettype_udp) {
1083 if (address != NULL)
1084 dev->address = *address;
1085 else
1086 dev->address = sock->address;
1087 } else if (sock->type == isc_sockettype_tcp) {
1088 INSIST(address == NULL);
1089 dev->address = sock->address;
1093 static void
1094 destroy_socketevent(isc_event_t *event) {
1095 isc_socketevent_t *ev = (isc_socketevent_t *)event;
1097 INSIST(ISC_LIST_EMPTY(ev->bufferlist));
1099 (ev->destroy)(event);
1102 static isc_socketevent_t *
1103 allocate_socketevent(isc_socket_t *sock, isc_eventtype_t eventtype,
1104 isc_taskaction_t action, const void *arg)
1106 isc_socketevent_t *ev;
1108 ev = (isc_socketevent_t *)isc_event_allocate(sock->manager->mctx,
1109 sock, eventtype,
1110 action, arg,
1111 sizeof(*ev));
1112 if (ev == NULL)
1113 return (NULL);
1115 ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set
1116 ISC_LINK_INIT(ev, ev_link);
1117 ISC_LIST_INIT(ev->bufferlist);
1118 ev->region.base = NULL;
1119 ev->n = 0;
1120 ev->offset = 0;
1121 ev->attributes = 0;
1122 ev->destroy = ev->ev_destroy;
1123 ev->ev_destroy = destroy_socketevent;
1125 return (ev);
1128 #if defined(ISC_SOCKET_DEBUG)
1129 static void
1130 dump_msg(struct msghdr *msg, isc_socket_t *sock) {
1131 unsigned int i;
1133 printf("MSGHDR %p, Socket #: %u\n", msg, sock->fd);
1134 printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen);
1135 printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen);
1136 for (i = 0; i < (unsigned int)msg->msg_iovlen; i++)
1137 printf("\t\t%d\tbase %p, len %d\n", i,
1138 msg->msg_iov[i].buf,
1139 msg->msg_iov[i].len);
1141 #endif
1144 * map the error code
1147 map_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno,
1148 char *errorstring, size_t bufsize) {
1150 int doreturn;
1151 switch (windows_errno) {
1152 case WSAECONNREFUSED:
1153 *isc_errno = ISC_R_CONNREFUSED;
1154 if (sock->connected)
1155 doreturn = DOIO_HARD;
1156 else
1157 doreturn = DOIO_SOFT;
1158 break;
1159 case WSAENETUNREACH:
1160 case ERROR_NETWORK_UNREACHABLE:
1161 *isc_errno = ISC_R_NETUNREACH;
1162 if (sock->connected)
1163 doreturn = DOIO_HARD;
1164 else
1165 doreturn = DOIO_SOFT;
1166 break;
1167 case ERROR_PORT_UNREACHABLE:
1168 case ERROR_HOST_UNREACHABLE:
1169 case WSAEHOSTUNREACH:
1170 *isc_errno = ISC_R_HOSTUNREACH;
1171 if (sock->connected)
1172 doreturn = DOIO_HARD;
1173 else
1174 doreturn = DOIO_SOFT;
1175 break;
1176 case WSAENETDOWN:
1177 *isc_errno = ISC_R_NETDOWN;
1178 if (sock->connected)
1179 doreturn = DOIO_HARD;
1180 else
1181 doreturn = DOIO_SOFT;
1182 break;
1183 case WSAEHOSTDOWN:
1184 *isc_errno = ISC_R_HOSTDOWN;
1185 if (sock->connected)
1186 doreturn = DOIO_HARD;
1187 else
1188 doreturn = DOIO_SOFT;
1189 break;
1190 case WSAEACCES:
1191 *isc_errno = ISC_R_NOPERM;
1192 if (sock->connected)
1193 doreturn = DOIO_HARD;
1194 else
1195 doreturn = DOIO_SOFT;
1196 break;
1197 case WSAECONNRESET:
1198 case WSAENETRESET:
1199 case WSAECONNABORTED:
1200 case WSAEDISCON:
1201 *isc_errno = ISC_R_CONNECTIONRESET;
1202 if (sock->connected)
1203 doreturn = DOIO_HARD;
1204 else
1205 doreturn = DOIO_SOFT;
1206 break;
1207 case WSAENOTCONN:
1208 *isc_errno = ISC_R_NOTCONNECTED;
1209 if (sock->connected)
1210 doreturn = DOIO_HARD;
1211 else
1212 doreturn = DOIO_SOFT;
1213 break;
1214 case ERROR_OPERATION_ABORTED:
1215 case ERROR_CONNECTION_ABORTED:
1216 case ERROR_REQUEST_ABORTED:
1217 *isc_errno = ISC_R_CONNECTIONRESET;
1218 doreturn = DOIO_HARD;
1219 break;
1220 case WSAENOBUFS:
1221 *isc_errno = ISC_R_NORESOURCES;
1222 doreturn = DOIO_HARD;
1223 break;
1224 case WSAEAFNOSUPPORT:
1225 *isc_errno = ISC_R_FAMILYNOSUPPORT;
1226 doreturn = DOIO_HARD;
1227 break;
1228 case WSAEADDRNOTAVAIL:
1229 *isc_errno = ISC_R_ADDRNOTAVAIL;
1230 doreturn = DOIO_HARD;
1231 break;
1232 case WSAEDESTADDRREQ:
1233 *isc_errno = ISC_R_BADADDRESSFORM;
1234 doreturn = DOIO_HARD;
1235 break;
1236 case ERROR_NETNAME_DELETED:
1237 *isc_errno = ISC_R_NETDOWN;
1238 doreturn = DOIO_HARD;
1239 break;
1240 default:
1241 *isc_errno = ISC_R_IOERROR;
1242 doreturn = DOIO_HARD;
1243 break;
1245 if (doreturn == DOIO_HARD) {
1246 isc__strerror(windows_errno, errorstring, bufsize);
1248 return (doreturn);
1251 static void
1252 fill_recv(isc_socket_t *sock, isc_socketevent_t *dev) {
1253 isc_region_t r;
1254 int copylen;
1255 isc_buffer_t *buffer;
1257 INSIST(dev->n < dev->minimum);
1258 INSIST(sock->recvbuf.remaining > 0);
1259 INSIST(sock->pending_recv == 0);
1261 if (sock->type == isc_sockettype_udp) {
1262 dev->address.length = sock->recvbuf.from_addr_len;
1263 memcpy(&dev->address.type, &sock->recvbuf.from_addr,
1264 sock->recvbuf.from_addr_len);
1265 if (isc_sockaddr_getport(&dev->address) == 0) {
1266 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1267 socket_log(__LINE__, sock, &dev->address, IOEVENT,
1268 isc_msgcat, ISC_MSGSET_SOCKET,
1269 ISC_MSG_ZEROPORT,
1270 "dropping source port zero packet");
1272 sock->recvbuf.remaining = 0;
1273 return;
1275 } else if (sock->type == isc_sockettype_tcp) {
1276 dev->address = sock->address;
1280 * Run through the list of buffers we were given, and find the
1281 * first one with space. Once it is found, loop through, filling
1282 * the buffers as much as possible.
1284 buffer = ISC_LIST_HEAD(dev->bufferlist);
1285 if (buffer != NULL) { // Multi-buffer receive
1286 while (buffer != NULL && sock->recvbuf.remaining > 0) {
1287 REQUIRE(ISC_BUFFER_VALID(buffer));
1288 if (isc_buffer_availablelength(buffer) > 0) {
1289 isc_buffer_availableregion(buffer, &r);
1290 copylen = min(r.length, sock->recvbuf.remaining);
1291 memcpy(r.base, sock->recvbuf.consume_position, copylen);
1292 sock->recvbuf.consume_position += copylen;
1293 sock->recvbuf.remaining -= copylen;
1294 isc_buffer_add(buffer, copylen);
1295 dev->n += copylen;
1297 buffer = ISC_LIST_NEXT(buffer, link);
1299 } else { // Single-buffer receive
1300 copylen = min(dev->region.length - dev->n, sock->recvbuf.remaining);
1301 memcpy(dev->region.base + dev->n, sock->recvbuf.consume_position, copylen);
1302 sock->recvbuf.consume_position += copylen;
1303 sock->recvbuf.remaining -= copylen;
1304 dev->n += copylen;
1308 * UDP receives are all-consuming. That is, if we have 4k worth of
1309 * data in our receive buffer, and the caller only gave us
1310 * 1k of space, we will toss the remaining 3k of data. TCP
1311 * will keep the extra data around and use it for later requests.
1313 if (sock->type == isc_sockettype_udp)
1314 sock->recvbuf.remaining = 0;
1318 * Copy out as much data from the internal buffer to done events.
1319 * As each done event is filled, send it along its way.
1321 static void
1322 completeio_recv(isc_socket_t *sock)
1324 isc_socketevent_t *dev;
1327 * If we are in the process of filling our buffer, we cannot
1328 * touch it yet, so don't.
1330 if (sock->pending_recv > 0)
1331 return;
1333 while (sock->recvbuf.remaining > 0 && !ISC_LIST_EMPTY(sock->recv_list)) {
1334 dev = ISC_LIST_HEAD(sock->recv_list);
1337 * See if we have sufficient data in our receive buffer
1338 * to handle this. If we do, copy out the data.
1340 fill_recv(sock, dev);
1343 * Did we satisfy it?
1345 if (dev->n >= dev->minimum) {
1346 dev->result = ISC_R_SUCCESS;
1347 send_recvdone_event(sock, &dev);
1353 * Returns:
1354 * DOIO_SUCCESS The operation succeeded. dev->result contains
1355 * ISC_R_SUCCESS.
1357 * DOIO_HARD A hard or unexpected I/O error was encountered.
1358 * dev->result contains the appropriate error.
1360 * DOIO_SOFT A soft I/O error was encountered. No senddone
1361 * event was sent. The operation should be retried.
1363 * No other return values are possible.
1365 static int
1366 completeio_send(isc_socket_t *sock, isc_socketevent_t *dev,
1367 struct msghdr *messagehdr, int cc, int send_errno)
1369 char addrbuf[ISC_SOCKADDR_FORMATSIZE];
1370 char strbuf[ISC_STRERRORSIZE];
1372 if (send_errno != 0) {
1373 if (SOFT_ERROR(send_errno))
1374 return (DOIO_SOFT);
1376 return (map_socket_error(sock, send_errno, &dev->result,
1377 strbuf, sizeof(strbuf)));
1380 * The other error types depend on whether or not the
1381 * socket is UDP or TCP. If it is UDP, some errors
1382 * that we expect to be fatal under TCP are merely
1383 * annoying, and are really soft errors.
1385 * However, these soft errors are still returned as
1386 * a status.
1388 isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf));
1389 isc__strerror(send_errno, strbuf, sizeof(strbuf));
1390 UNEXPECTED_ERROR(__FILE__, __LINE__, "completeio_send: %s: %s",
1391 addrbuf, strbuf);
1392 dev->result = isc__errno2result(send_errno);
1393 return (DOIO_HARD);
1397 * If we write less than we expected, update counters, poke.
1399 dev->n += cc;
1400 if (cc != messagehdr->msg_totallen)
1401 return (DOIO_SOFT);
1404 * Exactly what we wanted to write. We're done with this
1405 * entry. Post its completion event.
1407 dev->result = ISC_R_SUCCESS;
1408 return (DOIO_SUCCESS);
1411 static int
1412 startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes,
1413 int *send_errno)
1415 char *cmsg = NULL;
1416 char strbuf[ISC_STRERRORSIZE];
1417 IoCompletionInfo *lpo;
1418 int status;
1419 struct msghdr *msghdr;
1421 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
1422 HEAP_ZERO_MEMORY,
1423 sizeof(IoCompletionInfo));
1424 RUNTIME_CHECK(lpo != NULL);
1425 lpo->request_type = SOCKET_SEND;
1426 lpo->dev = dev;
1427 msghdr = &lpo->messagehdr;
1428 memset(msghdr, 0, sizeof(struct msghdr));
1429 ISC_LIST_INIT(lpo->bufferlist);
1431 build_msghdr_send(sock, dev, msghdr, cmsg, sock->iov, lpo);
1433 *nbytes = internal_sendmsg(sock, lpo, msghdr, 0, send_errno);
1435 if (*nbytes < 0) {
1437 * I/O has been initiated
1438 * completion will be through the completion port
1440 if (PENDING_ERROR(*send_errno)) {
1441 status = DOIO_PENDING;
1442 goto done;
1445 if (SOFT_ERROR(*send_errno)) {
1446 status = DOIO_SOFT;
1447 goto done;
1451 * If we got this far then something is wrong
1453 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1454 isc__strerror(*send_errno, strbuf, sizeof(strbuf));
1455 socket_log(__LINE__, sock, NULL, IOEVENT,
1456 isc_msgcat, ISC_MSGSET_SOCKET,
1457 ISC_MSG_INTERNALSEND,
1458 "startio_send: internal_sendmsg(%d) %d "
1459 "bytes, err %d/%s",
1460 sock->fd, *nbytes, *send_errno, strbuf);
1462 status = DOIO_HARD;
1463 goto done;
1465 dev->result = ISC_R_SUCCESS;
1466 status = DOIO_SOFT;
1467 done:
1468 _set_state(sock, SOCK_DATA);
1469 return (status);
1472 static isc_result_t
1473 allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
1474 isc_socket_t **socketp) {
1475 isc_socket_t *sock;
1476 isc_result_t result;
1478 sock = isc_mem_get(manager->mctx, sizeof(*sock));
1480 if (sock == NULL)
1481 return (ISC_R_NOMEMORY);
1483 sock->magic = 0;
1484 sock->references = 0;
1486 sock->manager = manager;
1487 sock->type = type;
1488 sock->fd = INVALID_SOCKET;
1490 ISC_LINK_INIT(sock, link);
1493 * set up list of readers and writers to be initially empty
1495 ISC_LIST_INIT(sock->recv_list);
1496 ISC_LIST_INIT(sock->send_list);
1497 ISC_LIST_INIT(sock->accept_list);
1498 sock->connect_ev = NULL;
1499 sock->pending_accept = 0;
1500 sock->pending_recv = 0;
1501 sock->pending_send = 0;
1502 sock->pending_iocp = 0;
1503 sock->listener = 0;
1504 sock->connected = 0;
1505 sock->pending_connect = 0;
1506 sock->bound = 0;
1507 sock->in_recovery_cnt = 0;
1508 memset(sock->name, 0, sizeof(sock->name)); // zero the name field
1509 _set_state(sock, SOCK_INITIALIZED);
1511 sock->recvbuf.len = 65536;
1512 sock->recvbuf.consume_position = sock->recvbuf.base;
1513 sock->recvbuf.remaining = 0;
1514 sock->recvbuf.base = isc_mem_get(manager->mctx, sock->recvbuf.len); // max buffer size
1515 if (sock->recvbuf.base == NULL) {
1516 sock->magic = 0;
1517 goto error;
1521 * initialize the lock
1523 result = isc_mutex_init(&sock->lock);
1524 if (result != ISC_R_SUCCESS) {
1525 sock->magic = 0;
1526 isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len);
1527 sock->recvbuf.base = NULL;
1528 goto error;
1531 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1532 "allocated");
1534 sock->magic = SOCKET_MAGIC;
1535 *socketp = sock;
1537 return (ISC_R_SUCCESS);
1539 error:
1540 isc_mem_put(manager->mctx, sock, sizeof(*sock));
1542 return (result);
1546 * Verify that the socket state is consistent.
1548 static void
1549 consistent(isc_socket_t *sock) {
1551 isc_socketevent_t *dev;
1552 isc_socket_newconnev_t *nev;
1553 unsigned int count;
1554 char *crash_reason;
1555 isc_boolean_t crash = ISC_FALSE;
1557 REQUIRE(sock->pending_iocp == sock->pending_recv + sock->pending_send
1558 + sock->pending_accept + sock->pending_connect);
1560 dev = ISC_LIST_HEAD(sock->send_list);
1561 count = 0;
1562 while (dev != NULL) {
1563 count++;
1564 dev = ISC_LIST_NEXT(dev, ev_link);
1566 if (count > sock->pending_send) {
1567 crash = ISC_TRUE;
1568 crash_reason = "send_list > sock->pending_send";
1571 nev = ISC_LIST_HEAD(sock->accept_list);
1572 count = 0;
1573 while (nev != NULL) {
1574 count++;
1575 nev = ISC_LIST_NEXT(nev, ev_link);
1577 if (count > sock->pending_accept) {
1578 crash = ISC_TRUE;
1579 crash_reason = "send_list > sock->pending_send";
1582 if (crash) {
1583 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1584 ISC_MSG_DESTROYING, "SOCKET INCONSISTENT: %s",
1585 crash_reason);
1586 sock_dump(sock);
1587 INSIST(crash == ISC_FALSE);
1592 * Maybe free the socket.
1594 * This function will verify tht the socket is no longer in use in any way,
1595 * either internally or externally. This is the only place where this
1596 * check is to be made; if some bit of code believes that IT is done with
1597 * the socket (e.g., some reference counter reaches zero), it should call
1598 * this function.
1600 * When calling this function, the socket must be locked, and the manager
1601 * must be unlocked.
1603 * When this function returns, *socketp will be NULL. No tricks to try
1604 * to hold on to this pointer are allowed.
1606 static void
1607 maybe_free_socket(isc_socket_t **socketp, int lineno) {
1608 isc_socket_t *sock = *socketp;
1609 *socketp = NULL;
1611 INSIST(VALID_SOCKET(sock));
1612 CONSISTENT(sock);
1614 if (sock->pending_iocp > 0
1615 || sock->pending_recv > 0
1616 || sock->pending_send > 0
1617 || sock->pending_accept > 0
1618 || sock->references > 0
1619 || sock->pending_connect == 1
1620 || !ISC_LIST_EMPTY(sock->recv_list)
1621 || !ISC_LIST_EMPTY(sock->send_list)
1622 || !ISC_LIST_EMPTY(sock->accept_list)
1623 || sock->fd != INVALID_SOCKET) {
1624 UNLOCK(&sock->lock);
1625 return;
1627 UNLOCK(&sock->lock);
1629 free_socket(&sock, lineno);
1632 void
1633 free_socket(isc_socket_t **sockp, int lineno) {
1634 isc_socketmgr_t *manager;
1635 isc_socket_t *sock = *sockp;
1636 *sockp = NULL;
1638 manager = sock->manager;
1641 * Seems we can free the socket after all.
1643 manager = sock->manager;
1644 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1645 ISC_MSG_DESTROYING, "freeing socket line %d fd %d lock %p semaphore %p",
1646 lineno, sock->fd, &sock->lock, sock->lock.LockSemaphore);
1648 sock->magic = 0;
1649 DESTROYLOCK(&sock->lock);
1651 if (sock->recvbuf.base != NULL)
1652 isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len);
1654 LOCK(&manager->lock);
1655 if (ISC_LINK_LINKED(sock, link))
1656 ISC_LIST_UNLINK(manager->socklist, sock, link);
1657 isc_mem_put(manager->mctx, sock, sizeof(*sock));
1659 if (ISC_LIST_EMPTY(manager->socklist))
1660 SIGNAL(&manager->shutdown_ok);
1661 UNLOCK(&manager->lock);
1665 * Create a new 'type' socket managed by 'manager'. Events
1666 * will be posted to 'task' and when dispatched 'action' will be
1667 * called with 'arg' as the arg value. The new socket is returned
1668 * in 'socketp'.
1670 isc_result_t
1671 isc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1672 isc_socket_t **socketp) {
1673 isc_socket_t *sock = NULL;
1674 isc_result_t result;
1675 #if defined(USE_CMSG)
1676 int on = 1;
1677 #endif
1678 #if defined(SO_RCVBUF)
1679 ISC_SOCKADDR_LEN_T optlen;
1680 int size;
1681 #endif
1682 int socket_errno;
1683 char strbuf[ISC_STRERRORSIZE];
1685 REQUIRE(VALID_MANAGER(manager));
1686 REQUIRE(socketp != NULL && *socketp == NULL);
1687 REQUIRE(type != isc_sockettype_fdwatch);
1689 result = allocate_socket(manager, type, &sock);
1690 if (result != ISC_R_SUCCESS)
1691 return (result);
1693 sock->pf = pf;
1694 switch (type) {
1695 case isc_sockettype_udp:
1696 sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP);
1697 if (sock->fd != INVALID_SOCKET) {
1698 result = connection_reset_fix(sock->fd);
1699 if (result != ISC_R_SUCCESS) {
1700 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1701 "closed %d %d %d con_reset_fix_failed",
1702 sock->pending_recv, sock->pending_send,
1703 sock->references);
1704 closesocket(sock->fd);
1705 _set_state(sock, SOCK_CLOSED);
1706 sock->fd = INVALID_SOCKET;
1707 free_socket(&sock, __LINE__);
1708 return (result);
1711 break;
1712 case isc_sockettype_tcp:
1713 sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP);
1714 break;
1717 if (sock->fd == INVALID_SOCKET) {
1718 socket_errno = WSAGetLastError();
1719 free_socket(&sock, __LINE__);
1721 switch (socket_errno) {
1722 case WSAEMFILE:
1723 case WSAENOBUFS:
1724 return (ISC_R_NORESOURCES);
1726 case WSAEPROTONOSUPPORT:
1727 case WSAEPFNOSUPPORT:
1728 case WSAEAFNOSUPPORT:
1729 return (ISC_R_FAMILYNOSUPPORT);
1731 default:
1732 isc__strerror(socket_errno, strbuf, sizeof(strbuf));
1733 UNEXPECTED_ERROR(__FILE__, __LINE__,
1734 "socket() %s: %s",
1735 isc_msgcat_get(isc_msgcat,
1736 ISC_MSGSET_GENERAL,
1737 ISC_MSG_FAILED,
1738 "failed"),
1739 strbuf);
1740 return (ISC_R_UNEXPECTED);
1744 result = make_nonblock(sock->fd);
1745 if (result != ISC_R_SUCCESS) {
1746 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1747 "closed %d %d %d make_nonblock_failed",
1748 sock->pending_recv, sock->pending_send,
1749 sock->references);
1750 closesocket(sock->fd);
1751 sock->fd = INVALID_SOCKET;
1752 free_socket(&sock, __LINE__);
1753 return (result);
1757 #if defined(USE_CMSG) || defined(SO_RCVBUF)
1758 if (type == isc_sockettype_udp) {
1760 #if defined(USE_CMSG)
1761 #if defined(ISC_PLATFORM_HAVEIPV6)
1762 #ifdef IPV6_RECVPKTINFO
1763 /* 2292bis */
1764 if ((pf == AF_INET6)
1765 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1766 (char *)&on, sizeof(on)) < 0)) {
1767 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1768 UNEXPECTED_ERROR(__FILE__, __LINE__,
1769 "setsockopt(%d, IPV6_RECVPKTINFO) "
1770 "%s: %s", sock->fd,
1771 isc_msgcat_get(isc_msgcat,
1772 ISC_MSGSET_GENERAL,
1773 ISC_MSG_FAILED,
1774 "failed"),
1775 strbuf);
1777 #else
1778 /* 2292 */
1779 if ((pf == AF_INET6)
1780 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO,
1781 (char *)&on, sizeof(on)) < 0)) {
1782 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1783 UNEXPECTED_ERROR(__FILE__, __LINE__,
1784 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1785 sock->fd,
1786 isc_msgcat_get(isc_msgcat,
1787 ISC_MSGSET_GENERAL,
1788 ISC_MSG_FAILED,
1789 "failed"),
1790 strbuf);
1792 #endif /* IPV6_RECVPKTINFO */
1793 #ifdef IPV6_USE_MIN_MTU /*2292bis, not too common yet*/
1794 /* use minimum MTU */
1795 if (pf == AF_INET6) {
1796 (void)setsockopt(sock->fd, IPPROTO_IPV6,
1797 IPV6_USE_MIN_MTU,
1798 (char *)&on, sizeof(on));
1800 #endif
1801 #endif /* ISC_PLATFORM_HAVEIPV6 */
1802 #endif /* defined(USE_CMSG) */
1804 #if defined(SO_RCVBUF)
1805 optlen = sizeof(size);
1806 if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1807 (char *)&size, &optlen) >= 0 &&
1808 size < RCVBUFSIZE) {
1809 size = RCVBUFSIZE;
1810 (void)setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1811 (char *)&size, sizeof(size));
1813 #endif
1816 #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */
1818 _set_state(sock, SOCK_OPEN);
1819 sock->references = 1;
1820 *socketp = sock;
1822 iocompletionport_update(sock);
1825 * Note we don't have to lock the socket like we normally would because
1826 * there are no external references to it yet.
1828 LOCK(&manager->lock);
1829 ISC_LIST_APPEND(manager->socklist, sock, link);
1830 InterlockedIncrement(&manager->totalSockets);
1831 UNLOCK(&manager->lock);
1833 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1834 ISC_MSG_CREATED, "created %u type %u", sock->fd, type);
1836 return (ISC_R_SUCCESS);
1839 isc_result_t
1840 isc_socket_open(isc_socket_t *sock) {
1841 REQUIRE(VALID_SOCKET(sock));
1842 REQUIRE(sock->type != isc_sockettype_fdwatch);
1844 return (ISC_R_NOTIMPLEMENTED);
1848 * Attach to a socket. Caller must explicitly detach when it is done.
1850 void
1851 isc__socket_attach(isc_socket_t *sock, isc_socket_t **socketp) {
1852 REQUIRE(VALID_SOCKET(sock));
1853 REQUIRE(socketp != NULL && *socketp == NULL);
1855 LOCK(&sock->lock);
1856 CONSISTENT(sock);
1857 sock->references++;
1858 UNLOCK(&sock->lock);
1860 *socketp = sock;
1864 * Dereference a socket. If this is the last reference to it, clean things
1865 * up by destroying the socket.
1867 void
1868 isc__socket_detach(isc_socket_t **socketp) {
1869 isc_socket_t *sock;
1870 isc_boolean_t kill_socket = ISC_FALSE;
1872 REQUIRE(socketp != NULL);
1873 sock = *socketp;
1874 REQUIRE(VALID_SOCKET(sock));
1875 REQUIRE(sock->type != isc_sockettype_fdwatch);
1877 LOCK(&sock->lock);
1878 CONSISTENT(sock);
1879 REQUIRE(sock->references > 0);
1880 sock->references--;
1882 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1883 "detach_socket %d %d %d",
1884 sock->pending_recv, sock->pending_send,
1885 sock->references);
1887 if (sock->references == 0 && sock->fd != INVALID_SOCKET) {
1888 closesocket(sock->fd);
1889 sock->fd = INVALID_SOCKET;
1890 _set_state(sock, SOCK_CLOSED);
1893 maybe_free_socket(&sock, __LINE__);
1895 *socketp = NULL;
1898 isc_result_t
1899 isc_socket_close(isc_socket_t *sock) {
1900 REQUIRE(VALID_SOCKET(sock));
1901 REQUIRE(sock->type != isc_sockettype_fdwatch);
1903 return (ISC_R_NOTIMPLEMENTED);
1907 * Dequeue an item off the given socket's read queue, set the result code
1908 * in the done event to the one provided, and send it to the task it was
1909 * destined for.
1911 * If the event to be sent is on a list, remove it before sending. If
1912 * asked to, send and detach from the task as well.
1914 * Caller must have the socket locked if the event is attached to the socket.
1916 static void
1917 send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1918 isc_task_t *task;
1920 task = (*dev)->ev_sender;
1921 (*dev)->ev_sender = sock;
1923 if (ISC_LINK_LINKED(*dev, ev_link))
1924 ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link);
1926 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1927 == ISC_SOCKEVENTATTR_ATTACHED)
1928 isc_task_sendanddetach(&task, (isc_event_t **)dev);
1929 else
1930 isc_task_send(task, (isc_event_t **)dev);
1932 CONSISTENT(sock);
1936 * See comments for send_recvdone_event() above.
1938 static void
1939 send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1940 isc_task_t *task;
1942 INSIST(dev != NULL && *dev != NULL);
1944 task = (*dev)->ev_sender;
1945 (*dev)->ev_sender = sock;
1947 if (ISC_LINK_LINKED(*dev, ev_link))
1948 ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link);
1950 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1951 == ISC_SOCKEVENTATTR_ATTACHED)
1952 isc_task_sendanddetach(&task, (isc_event_t **)dev);
1953 else
1954 isc_task_send(task, (isc_event_t **)dev);
1956 CONSISTENT(sock);
1960 * See comments for send_recvdone_event() above.
1962 static void
1963 send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev) {
1964 isc_task_t *task;
1966 INSIST(adev != NULL && *adev != NULL);
1968 task = (*adev)->ev_sender;
1969 (*adev)->ev_sender = sock;
1971 if (ISC_LINK_LINKED(*adev, ev_link))
1972 ISC_LIST_DEQUEUE(sock->accept_list, *adev, ev_link);
1974 isc_task_sendanddetach(&task, (isc_event_t **)adev);
1976 CONSISTENT(sock);
1980 * See comments for send_recvdone_event() above.
1982 static void
1983 send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev) {
1984 isc_task_t *task;
1986 INSIST(cdev != NULL && *cdev != NULL);
1988 task = (*cdev)->ev_sender;
1989 (*cdev)->ev_sender = sock;
1991 sock->connect_ev = NULL;
1993 isc_task_sendanddetach(&task, (isc_event_t **)cdev);
1995 CONSISTENT(sock);
1999 * On entry to this function, the event delivered is the internal
2000 * readable event, and the first item on the accept_list should be
2001 * the done event we want to send. If the list is empty, this is a no-op,
2002 * so just close the new connection, unlock, and return.
2004 * Note the socket is locked before entering here
2006 static void
2007 internal_accept(isc_socket_t *sock, IoCompletionInfo *lpo, int accept_errno) {
2008 isc_socket_newconnev_t *adev;
2009 isc_result_t result = ISC_R_SUCCESS;
2010 isc_socket_t *nsock;
2011 struct sockaddr *localaddr;
2012 int localaddr_len = sizeof(*localaddr);
2013 struct sockaddr *remoteaddr;
2014 int remoteaddr_len = sizeof(*remoteaddr);
2016 INSIST(VALID_SOCKET(sock));
2017 LOCK(&sock->lock);
2018 CONSISTENT(sock);
2020 socket_log(__LINE__, sock, NULL, TRACE,
2021 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2022 "internal_accept called");
2024 INSIST(sock->listener);
2026 INSIST(sock->pending_iocp > 0);
2027 sock->pending_iocp--;
2028 INSIST(sock->pending_accept > 0);
2029 sock->pending_accept--;
2031 adev = lpo->adev;
2034 * If the event is no longer in the list we can just return.
2036 if (!acceptdone_is_active(sock, adev))
2037 goto done;
2039 nsock = adev->newsocket;
2042 * Pull off the done event.
2044 ISC_LIST_UNLINK(sock->accept_list, adev, ev_link);
2047 * Extract the addresses from the socket, copy them into the structure,
2048 * and return the new socket.
2050 ISCGetAcceptExSockaddrs(lpo->acceptbuffer, 0,
2051 sizeof(SOCKADDR_STORAGE) + 16, sizeof(SOCKADDR_STORAGE) + 16,
2052 (LPSOCKADDR *)&localaddr, &localaddr_len,
2053 (LPSOCKADDR *)&remoteaddr, &remoteaddr_len);
2054 memcpy(&adev->address.type, remoteaddr, remoteaddr_len);
2055 adev->address.length = remoteaddr_len;
2056 nsock->address = adev->address;
2057 nsock->pf = adev->address.type.sa.sa_family;
2059 socket_log(__LINE__, nsock, &nsock->address, TRACE,
2060 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2061 "internal_accept parent %p", sock);
2063 result = make_nonblock(adev->newsocket->fd);
2064 INSIST(result == ISC_R_SUCCESS);
2066 INSIST(setsockopt(nsock->fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT,
2067 (char *)&sock->fd, sizeof(sock->fd)) == 0);
2070 * Hook it up into the manager.
2072 nsock->bound = 1;
2073 nsock->connected = 1;
2074 _set_state(nsock, SOCK_OPEN);
2076 LOCK(&nsock->manager->lock);
2077 ISC_LIST_APPEND(nsock->manager->socklist, nsock, link);
2078 InterlockedIncrement(&nsock->manager->totalSockets);
2079 UNLOCK(&nsock->manager->lock);
2081 socket_log(__LINE__, sock, &nsock->address, CREATION,
2082 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2083 "accepted_connection new_socket %p fd %d",
2084 nsock, nsock->fd);
2086 adev->result = result;
2087 send_acceptdone_event(sock, &adev);
2089 done:
2090 CONSISTENT(sock);
2091 UNLOCK(&sock->lock);
2093 HeapFree(hHeapHandle, 0, lpo->acceptbuffer);
2094 lpo->acceptbuffer = NULL;
2098 * Called when a socket with a pending connect() finishes.
2099 * Note that the socket is locked before entering.
2101 static void
2102 internal_connect(isc_socket_t *sock, IoCompletionInfo *lpo, int connect_errno) {
2103 isc_socket_connev_t *cdev;
2104 char strbuf[ISC_STRERRORSIZE];
2106 INSIST(VALID_SOCKET(sock));
2108 LOCK(&sock->lock);
2110 INSIST(sock->pending_iocp > 0);
2111 sock->pending_iocp--;
2112 INSIST(sock->pending_connect == 1);
2113 sock->pending_connect = 0;
2116 * Has this event been canceled?
2118 cdev = lpo->cdev;
2119 if (!connectdone_is_active(sock, cdev)) {
2120 sock->pending_connect = 0;
2121 if (sock->fd != INVALID_SOCKET) {
2122 closesocket(sock->fd);
2123 sock->fd = INVALID_SOCKET;
2124 _set_state(sock, SOCK_CLOSED);
2126 CONSISTENT(sock);
2127 UNLOCK(&sock->lock);
2128 return;
2132 * Check possible Windows network event error status here.
2134 if (connect_errno != 0) {
2136 * If the error is SOFT, just try again on this
2137 * fd and pretend nothing strange happened.
2139 if (SOFT_ERROR(connect_errno) ||
2140 connect_errno == WSAEINPROGRESS) {
2141 sock->pending_connect = 1;
2142 CONSISTENT(sock);
2143 UNLOCK(&sock->lock);
2144 return;
2148 * Translate other errors into ISC_R_* flavors.
2150 switch (connect_errno) {
2151 #define ERROR_MATCH(a, b) case a: cdev->result = b; break;
2152 ERROR_MATCH(WSAEACCES, ISC_R_NOPERM);
2153 ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
2154 ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
2155 ERROR_MATCH(WSAECONNREFUSED, ISC_R_CONNREFUSED);
2156 ERROR_MATCH(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH);
2157 ERROR_MATCH(WSAEHOSTDOWN, ISC_R_HOSTDOWN);
2158 ERROR_MATCH(WSAENETUNREACH, ISC_R_NETUNREACH);
2159 ERROR_MATCH(WSAENETDOWN, ISC_R_NETDOWN);
2160 ERROR_MATCH(WSAENOBUFS, ISC_R_NORESOURCES);
2161 ERROR_MATCH(WSAECONNRESET, ISC_R_CONNECTIONRESET);
2162 ERROR_MATCH(WSAECONNABORTED, ISC_R_CONNECTIONRESET);
2163 ERROR_MATCH(WSAETIMEDOUT, ISC_R_TIMEDOUT);
2164 #undef ERROR_MATCH
2165 default:
2166 cdev->result = ISC_R_UNEXPECTED;
2167 isc__strerror(connect_errno, strbuf, sizeof(strbuf));
2168 UNEXPECTED_ERROR(__FILE__, __LINE__,
2169 "internal_connect: connect() %s",
2170 strbuf);
2172 } else {
2173 INSIST(setsockopt(sock->fd, SOL_SOCKET,
2174 SO_UPDATE_CONNECT_CONTEXT, NULL, 0) == 0);
2175 cdev->result = ISC_R_SUCCESS;
2176 sock->connected = 1;
2177 socket_log(__LINE__, sock, &sock->address, IOEVENT,
2178 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2179 "internal_connect: success");
2182 send_connectdone_event(sock, &cdev);
2184 UNLOCK(&sock->lock);
2188 * Loop through the socket, returning ISC_R_EOF for each done event pending.
2190 static void
2191 send_recvdone_abort(isc_socket_t *sock, isc_result_t result) {
2192 isc_socketevent_t *dev;
2194 while (!ISC_LIST_EMPTY(sock->recv_list)) {
2195 dev = ISC_LIST_HEAD(sock->recv_list);
2196 dev->result = result;
2197 send_recvdone_event(sock, &dev);
2202 * Take the data we received in our private buffer, and if any recv() calls on
2203 * our list are satisfied, send the corresponding done event.
2205 * If we need more data (there are still items on the recv_list after we consume all
2206 * our data) then arrange for another system recv() call to fill our buffers.
2208 static void
2209 internal_recv(isc_socket_t *sock, int nbytes)
2211 INSIST(VALID_SOCKET(sock));
2213 LOCK(&sock->lock);
2214 CONSISTENT(sock);
2216 socket_log(__LINE__, sock, NULL, IOEVENT,
2217 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV,
2218 "internal_recv: %d bytes received", nbytes);
2221 * If we got here, the I/O operation succeeded. However, we might still have removed this
2222 * event from our notification list (or never placed it on it due to immediate completion.)
2223 * Handle the reference counting here, and handle the cancellation event just after.
2225 INSIST(sock->pending_iocp > 0);
2226 sock->pending_iocp--;
2227 INSIST(sock->pending_recv > 0);
2228 sock->pending_recv--;
2231 * The only way we could have gotten here is that our I/O has successfully completed.
2232 * Update our pointers, and move on. The only odd case here is that we might not
2233 * have received enough data on a TCP stream to satisfy the minimum requirements. If
2234 * this is the case, we will re-issue the recv() call for what we need.
2236 * We do check for a recv() of 0 bytes on a TCP stream. This means the remote end
2237 * has closed.
2239 if (nbytes == 0 && sock->type == isc_sockettype_tcp) {
2240 send_recvdone_abort(sock, ISC_R_EOF);
2241 maybe_free_socket(&sock, __LINE__);
2242 return;
2244 sock->recvbuf.remaining = nbytes;
2245 sock->recvbuf.consume_position = sock->recvbuf.base;
2246 completeio_recv(sock);
2249 * If there are more receivers waiting for data, queue another receive
2250 * here.
2252 queue_receive_request(sock);
2255 * Unlock and/or destroy if we are the last thing this socket has left to do.
2257 maybe_free_socket(&sock, __LINE__);
2260 static void
2261 internal_send(isc_socket_t *sock, isc_socketevent_t *dev,
2262 struct msghdr *messagehdr, int nbytes, int send_errno, IoCompletionInfo *lpo)
2264 buflist_t *buffer;
2267 * Find out what socket this is and lock it.
2269 INSIST(VALID_SOCKET(sock));
2271 LOCK(&sock->lock);
2272 CONSISTENT(sock);
2274 socket_log(__LINE__, sock, NULL, IOEVENT,
2275 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND,
2276 "internal_send: task got socket event %p", dev);
2278 buffer = ISC_LIST_HEAD(lpo->bufferlist);
2279 while (buffer != NULL) {
2280 ISC_LIST_DEQUEUE(lpo->bufferlist, buffer, link);
2282 socket_log(__LINE__, sock, NULL, TRACE,
2283 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2284 "free_buffer %p %p", buffer, buffer->buf);
2286 HeapFree(hHeapHandle, 0, buffer->buf);
2287 HeapFree(hHeapHandle, 0, buffer);
2288 buffer = ISC_LIST_HEAD(lpo->bufferlist);
2291 INSIST(sock->pending_iocp > 0);
2292 sock->pending_iocp--;
2293 INSIST(sock->pending_send > 0);
2294 sock->pending_send--;
2296 /* If the event is no longer in the list we can just return */
2297 if (!senddone_is_active(sock, dev))
2298 goto done;
2301 * Set the error code and send things on its way.
2303 switch (completeio_send(sock, dev, messagehdr, nbytes, send_errno)) {
2304 case DOIO_SOFT:
2305 break;
2306 case DOIO_HARD:
2307 case DOIO_SUCCESS:
2308 send_senddone_event(sock, &dev);
2309 break;
2312 done:
2313 maybe_free_socket(&sock, __LINE__);
2317 * These return if the done event passed in is on the list (or for connect, is
2318 * the one we're waiting for. Using these ensures we will not double-send an
2319 * event.
2321 static isc_boolean_t
2322 senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev)
2324 isc_socketevent_t *ldev;
2326 ldev = ISC_LIST_HEAD(sock->send_list);
2327 while (ldev != NULL && ldev != dev)
2328 ldev = ISC_LIST_NEXT(ldev, ev_link);
2330 return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
2333 static isc_boolean_t
2334 acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev)
2336 isc_socket_newconnev_t *ldev;
2338 ldev = ISC_LIST_HEAD(sock->accept_list);
2339 while (ldev != NULL && ldev != dev)
2340 ldev = ISC_LIST_NEXT(ldev, ev_link);
2342 return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
2345 static isc_boolean_t
2346 connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev)
2348 return (sock->connect_ev == dev ? ISC_TRUE : ISC_FALSE);
2352 * This is the I/O Completion Port Worker Function. It loops forever
2353 * waiting for I/O to complete and then forwards them for further
2354 * processing. There are a number of these in separate threads.
2356 static isc_threadresult_t WINAPI
2357 SocketIoThread(LPVOID ThreadContext) {
2358 isc_socketmgr_t *manager = ThreadContext;
2359 BOOL bSuccess = FALSE;
2360 DWORD nbytes;
2361 IoCompletionInfo *lpo = NULL;
2362 isc_socket_t *sock = NULL;
2363 int request;
2364 struct msghdr *messagehdr = NULL;
2365 int errval;
2366 char strbuf[ISC_STRERRORSIZE];
2367 int errstatus;
2369 REQUIRE(VALID_MANAGER(manager));
2372 * Set the thread priority high enough so I/O will
2373 * preempt normal recv packet processing, but not
2374 * higher than the timer sync thread.
2376 if (!SetThreadPriority(GetCurrentThread(),
2377 THREAD_PRIORITY_ABOVE_NORMAL)) {
2378 errval = GetLastError();
2379 isc__strerror(errval, strbuf, sizeof(strbuf));
2380 FATAL_ERROR(__FILE__, __LINE__,
2381 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2382 ISC_MSG_FAILED,
2383 "Can't set thread priority: %s"),
2384 strbuf);
2388 * Loop forever waiting on I/O Completions and then processing them
2390 while (TRUE) {
2391 bSuccess = GetQueuedCompletionStatus(manager->hIoCompletionPort,
2392 &nbytes, (LPDWORD)&sock,
2393 (LPWSAOVERLAPPED *)&lpo,
2394 INFINITE);
2395 if (lpo == NULL) /* Received request to exit */
2396 break;
2398 REQUIRE(VALID_SOCKET(sock));
2400 request = lpo->request_type;
2402 errstatus = 0;
2403 if (!bSuccess) {
2404 isc_result_t isc_result;
2407 * Did the I/O operation complete?
2409 errstatus = GetLastError();
2410 isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__);
2412 LOCK(&sock->lock);
2413 CONSISTENT(sock);
2414 switch (request) {
2415 case SOCKET_RECV:
2416 INSIST(sock->pending_iocp > 0);
2417 sock->pending_iocp--;
2418 INSIST(sock->pending_recv > 0);
2419 sock->pending_recv--;
2420 if ((sock->type == isc_sockettype_udp) &&
2421 (errstatus == ERROR_HOST_UNREACHABLE)) {
2422 UNEXPECTED_ERROR(__FILE__, __LINE__,
2423 "SOCKET_RECV ERROR_HOST_UNREACHABLE: trying to recover");
2424 recover_receive_request(sock, &lpo);
2425 break;
2427 if ((sock->type == isc_sockettype_udp) &&
2428 (errstatus == WSAENETRESET)) {
2429 UNEXPECTED_ERROR(__FILE__, __LINE__,
2430 "SOCKET_RECV WSAENETRESET: trying to recover");
2431 recover_receive_request(sock, &lpo);
2432 break;
2434 if ((sock->type == isc_sockettype_udp) &&
2435 (errstatus == WSAECONNRESET)) {
2436 UNEXPECTED_ERROR(__FILE__, __LINE__,
2437 "SOCKET_RECV WSAECONNRESET: trying to recover");
2438 recover_receive_request(sock, &lpo);
2439 break;
2441 send_recvdone_abort(sock, isc_result);
2442 if ((isc_result == ISC_R_UNEXPECTED) ||
2443 ((isc_result == ISC_R_CONNECTIONRESET) &&
2444 (errstatus != ERROR_OPERATION_ABORTED)) ||
2445 (isc_result == ISC_R_HOSTUNREACH)) {
2446 UNEXPECTED_ERROR(__FILE__, __LINE__,
2447 "SOCKET_RECV: Windows error code: %d, returning ISC error %d",
2448 errstatus, isc_result);
2450 break;
2452 case SOCKET_SEND:
2453 INSIST(sock->pending_iocp > 0);
2454 sock->pending_iocp--;
2455 INSIST(sock->pending_send > 0);
2456 sock->pending_send--;
2457 if (senddone_is_active(sock, lpo->dev)) {
2458 lpo->dev->result = isc_result;
2459 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2460 "canceled_send");
2461 send_senddone_event(sock, &lpo->dev);
2463 break;
2465 case SOCKET_ACCEPT:
2466 INSIST(sock->pending_iocp > 0);
2467 sock->pending_iocp--;
2468 INSIST(sock->pending_accept > 0);
2469 sock->pending_accept--;
2470 if (acceptdone_is_active(sock, lpo->adev)) {
2471 closesocket(lpo->adev->newsocket->fd);
2472 lpo->adev->newsocket->fd = INVALID_SOCKET;
2473 lpo->adev->newsocket->references--;
2474 free_socket(&lpo->adev->newsocket, __LINE__);
2475 lpo->adev->result = isc_result;
2476 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2477 "canceled_accept");
2478 send_acceptdone_event(sock, &lpo->adev);
2480 break;
2482 case SOCKET_CONNECT:
2483 INSIST(sock->pending_iocp > 0);
2484 sock->pending_iocp--;
2485 INSIST(sock->pending_connect == 1);
2486 sock->pending_connect = 0;
2487 if (connectdone_is_active(sock, lpo->cdev)) {
2488 lpo->cdev->result = isc_result;
2489 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2490 "canceled_connect");
2491 send_connectdone_event(sock, &lpo->cdev);
2493 break;
2495 maybe_free_socket(&sock, __LINE__);
2497 if (lpo != NULL)
2498 HeapFree(hHeapHandle, 0, lpo);
2499 continue;
2502 messagehdr = &lpo->messagehdr;
2504 switch (request) {
2505 case SOCKET_RECV:
2506 internal_recv(sock, nbytes);
2507 break;
2508 case SOCKET_SEND:
2509 internal_send(sock, lpo->dev, messagehdr, nbytes, errstatus, lpo);
2510 break;
2511 case SOCKET_ACCEPT:
2512 internal_accept(sock, lpo, errstatus);
2513 break;
2514 case SOCKET_CONNECT:
2515 internal_connect(sock, lpo, errstatus);
2516 break;
2519 if (lpo != NULL)
2520 HeapFree(hHeapHandle, 0, lpo);
2524 * Exit Completion Port Thread
2526 manager_log(manager, TRACE,
2527 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2528 ISC_MSG_EXITING, "SocketIoThread exiting"));
2529 return ((isc_threadresult_t)0);
2533 * Create a new socket manager.
2535 isc_result_t
2536 isc__socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
2537 return (isc_socketmgr_create2(mctx, managerp, 0));
2540 isc_result_t
2541 isc__socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp,
2542 unsigned int maxsocks)
2544 isc_socketmgr_t *manager;
2545 isc_result_t result;
2547 REQUIRE(managerp != NULL && *managerp == NULL);
2549 if (maxsocks != 0)
2550 return (ISC_R_NOTIMPLEMENTED);
2552 manager = isc_mem_get(mctx, sizeof(*manager));
2553 if (manager == NULL)
2554 return (ISC_R_NOMEMORY);
2556 InitSockets();
2558 manager->magic = SOCKET_MANAGER_MAGIC;
2559 manager->mctx = NULL;
2560 manager->stats = NULL;
2561 ISC_LIST_INIT(manager->socklist);
2562 result = isc_mutex_init(&manager->lock);
2563 if (result != ISC_R_SUCCESS) {
2564 isc_mem_put(mctx, manager, sizeof(*manager));
2565 return (result);
2567 if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
2568 DESTROYLOCK(&manager->lock);
2569 isc_mem_put(mctx, manager, sizeof(*manager));
2570 UNEXPECTED_ERROR(__FILE__, __LINE__,
2571 "isc_condition_init() %s",
2572 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2573 ISC_MSG_FAILED, "failed"));
2574 return (ISC_R_UNEXPECTED);
2577 isc_mem_attach(mctx, &manager->mctx);
2579 iocompletionport_init(manager); /* Create the Completion Ports */
2581 manager->bShutdown = ISC_FALSE;
2582 manager->totalSockets = 0;
2583 manager->iocp_total = 0;
2585 *managerp = manager;
2587 return (ISC_R_SUCCESS);
2590 isc_result_t
2591 isc__socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) {
2592 REQUIRE(VALID_MANAGER(manager));
2593 REQUIRE(nsockp != NULL);
2595 return (ISC_R_NOTIMPLEMENTED);
2598 void
2599 isc__socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats) {
2600 REQUIRE(VALID_MANAGER(manager));
2601 REQUIRE(ISC_LIST_EMPTY(manager->socklist));
2602 REQUIRE(manager->stats == NULL);
2603 REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max);
2605 isc_stats_attach(stats, &manager->stats);
2608 void
2609 isc__socketmgr_destroy(isc_socketmgr_t **managerp) {
2610 isc_socketmgr_t *manager;
2611 int i;
2612 isc_mem_t *mctx;
2615 * Destroy a socket manager.
2618 REQUIRE(managerp != NULL);
2619 manager = *managerp;
2620 REQUIRE(VALID_MANAGER(manager));
2622 LOCK(&manager->lock);
2625 * Wait for all sockets to be destroyed.
2627 while (!ISC_LIST_EMPTY(manager->socklist)) {
2628 manager_log(manager, CREATION,
2629 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2630 ISC_MSG_SOCKETSREMAIN,
2631 "sockets exist"));
2632 WAIT(&manager->shutdown_ok, &manager->lock);
2635 UNLOCK(&manager->lock);
2638 * Here, we need to had some wait code for the completion port
2639 * thread.
2641 signal_iocompletionport_exit(manager);
2642 manager->bShutdown = ISC_TRUE;
2645 * Wait for threads to exit.
2647 for (i = 0; i < manager->maxIOCPThreads; i++) {
2648 if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i],
2649 NULL) != ISC_R_SUCCESS)
2650 UNEXPECTED_ERROR(__FILE__, __LINE__,
2651 "isc_thread_join() for Completion Port %s",
2652 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2653 ISC_MSG_FAILED, "failed"));
2656 * Clean up.
2659 CloseHandle(manager->hIoCompletionPort);
2661 (void)isc_condition_destroy(&manager->shutdown_ok);
2663 DESTROYLOCK(&manager->lock);
2664 if (manager->stats != NULL)
2665 isc_stats_detach(&manager->stats);
2666 manager->magic = 0;
2667 mctx= manager->mctx;
2668 isc_mem_put(mctx, manager, sizeof(*manager));
2670 isc_mem_detach(&mctx);
2672 *managerp = NULL;
2675 static void
2676 queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev)
2678 isc_task_t *ntask = NULL;
2680 isc_task_attach(task, &ntask);
2681 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2684 * Enqueue the request.
2686 INSIST(!ISC_LINK_LINKED(dev, ev_link));
2687 ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link);
2689 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2690 "queue_receive_event: event %p -> task %p",
2691 dev, ntask);
2695 * Check the pending receive queue, and if we have data pending, give it to this
2696 * caller. If we have none, queue an I/O request. If this caller is not the first
2697 * on the list, then we will just queue this event and return.
2699 * Caller must have the socket locked.
2701 static isc_result_t
2702 socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2703 unsigned int flags)
2705 int cc = 0;
2706 isc_task_t *ntask = NULL;
2707 isc_result_t result = ISC_R_SUCCESS;
2708 int recv_errno = 0;
2710 dev->ev_sender = task;
2712 if (sock->fd == INVALID_SOCKET)
2713 return (ISC_R_EOF);
2716 * Queue our event on the list of things to do. Call our function to
2717 * attempt to fill buffers as much as possible, and return done events.
2718 * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE
2719 * here and tell our caller that we could not satisfy it immediately.
2721 queue_receive_event(sock, task, dev);
2722 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2723 result = ISC_R_INPROGRESS;
2725 completeio_recv(sock);
2728 * If there are more receivers waiting for data, queue another receive
2729 * here. If the
2731 queue_receive_request(sock);
2733 return (result);
2736 isc_result_t
2737 isc__socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2738 unsigned int minimum, isc_task_t *task,
2739 isc_taskaction_t action, const void *arg)
2741 isc_socketevent_t *dev;
2742 isc_socketmgr_t *manager;
2743 unsigned int iocount;
2744 isc_buffer_t *buffer;
2745 isc_result_t ret;
2747 REQUIRE(VALID_SOCKET(sock));
2748 LOCK(&sock->lock);
2749 CONSISTENT(sock);
2752 * Make sure that the socket is not closed. XXXMLG change error here?
2754 if (sock->fd == INVALID_SOCKET) {
2755 UNLOCK(&sock->lock);
2756 return (ISC_R_CONNREFUSED);
2759 REQUIRE(buflist != NULL);
2760 REQUIRE(!ISC_LIST_EMPTY(*buflist));
2761 REQUIRE(task != NULL);
2762 REQUIRE(action != NULL);
2764 manager = sock->manager;
2765 REQUIRE(VALID_MANAGER(manager));
2767 iocount = isc_bufferlist_availablecount(buflist);
2768 REQUIRE(iocount > 0);
2770 INSIST(sock->bound);
2772 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2773 if (dev == NULL) {
2774 UNLOCK(&sock->lock);
2775 return (ISC_R_NOMEMORY);
2779 * UDP sockets are always partial read
2781 if (sock->type == isc_sockettype_udp)
2782 dev->minimum = 1;
2783 else {
2784 if (minimum == 0)
2785 dev->minimum = iocount;
2786 else
2787 dev->minimum = minimum;
2791 * Move each buffer from the passed in list to our internal one.
2793 buffer = ISC_LIST_HEAD(*buflist);
2794 while (buffer != NULL) {
2795 ISC_LIST_DEQUEUE(*buflist, buffer, link);
2796 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2797 buffer = ISC_LIST_HEAD(*buflist);
2800 ret = socket_recv(sock, dev, task, 0);
2802 UNLOCK(&sock->lock);
2803 return (ret);
2806 isc_result_t
2807 isc__socket_recv(isc_socket_t *sock, isc_region_t *region,
2808 unsigned int minimum, isc_task_t *task,
2809 isc_taskaction_t action, const void *arg)
2811 isc_socketevent_t *dev;
2812 isc_socketmgr_t *manager;
2813 isc_result_t ret;
2815 REQUIRE(VALID_SOCKET(sock));
2816 LOCK(&sock->lock);
2817 CONSISTENT(sock);
2820 * make sure that the socket's not closed
2822 if (sock->fd == INVALID_SOCKET) {
2823 UNLOCK(&sock->lock);
2824 return (ISC_R_CONNREFUSED);
2826 REQUIRE(action != NULL);
2828 manager = sock->manager;
2829 REQUIRE(VALID_MANAGER(manager));
2831 INSIST(sock->bound);
2833 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2834 if (dev == NULL) {
2835 UNLOCK(&sock->lock);
2836 return (ISC_R_NOMEMORY);
2839 ret = isc_socket_recv2(sock, region, minimum, task, dev, 0);
2840 UNLOCK(&sock->lock);
2841 return (ret);
2844 isc_result_t
2845 isc__socket_recv2(isc_socket_t *sock, isc_region_t *region,
2846 unsigned int minimum, isc_task_t *task,
2847 isc_socketevent_t *event, unsigned int flags)
2849 isc_result_t ret;
2851 REQUIRE(VALID_SOCKET(sock));
2852 LOCK(&sock->lock);
2853 CONSISTENT(sock);
2855 event->result = ISC_R_UNEXPECTED;
2856 event->ev_sender = sock;
2858 * make sure that the socket's not closed
2860 if (sock->fd == INVALID_SOCKET) {
2861 UNLOCK(&sock->lock);
2862 return (ISC_R_CONNREFUSED);
2865 ISC_LIST_INIT(event->bufferlist);
2866 event->region = *region;
2867 event->n = 0;
2868 event->offset = 0;
2869 event->attributes = 0;
2872 * UDP sockets are always partial read.
2874 if (sock->type == isc_sockettype_udp)
2875 event->minimum = 1;
2876 else {
2877 if (minimum == 0)
2878 event->minimum = region->length;
2879 else
2880 event->minimum = minimum;
2883 ret = socket_recv(sock, event, task, flags);
2884 UNLOCK(&sock->lock);
2885 return (ret);
2889 * Caller must have the socket locked.
2891 static isc_result_t
2892 socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2893 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
2894 unsigned int flags)
2896 int io_state;
2897 int send_errno = 0;
2898 int cc = 0;
2899 isc_task_t *ntask = NULL;
2900 isc_result_t result = ISC_R_SUCCESS;
2902 dev->ev_sender = task;
2904 set_dev_address(address, sock, dev);
2905 if (pktinfo != NULL) {
2906 socket_log(__LINE__, sock, NULL, TRACE, isc_msgcat, ISC_MSGSET_SOCKET,
2907 ISC_MSG_PKTINFOPROVIDED,
2908 "pktinfo structure provided, ifindex %u (set to 0)",
2909 pktinfo->ipi6_ifindex);
2911 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
2912 dev->pktinfo = *pktinfo;
2914 * Set the pktinfo index to 0 here, to let the kernel decide
2915 * what interface it should send on.
2917 dev->pktinfo.ipi6_ifindex = 0;
2920 io_state = startio_send(sock, dev, &cc, &send_errno);
2921 switch (io_state) {
2922 case DOIO_PENDING: /* I/O started. Nothing more to do */
2923 case DOIO_SOFT:
2925 * We couldn't send all or part of the request right now, so
2926 * queue it unless ISC_SOCKFLAG_NORETRY is set.
2928 if ((flags & ISC_SOCKFLAG_NORETRY) == 0) {
2929 isc_task_attach(task, &ntask);
2930 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2933 * Enqueue the request.
2935 INSIST(!ISC_LINK_LINKED(dev, ev_link));
2936 ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link);
2938 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2939 "socket_send: event %p -> task %p",
2940 dev, ntask);
2942 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2943 result = ISC_R_INPROGRESS;
2944 break;
2947 case DOIO_SUCCESS:
2948 break;
2951 return (result);
2954 isc_result_t
2955 isc__socket_send(isc_socket_t *sock, isc_region_t *region,
2956 isc_task_t *task, isc_taskaction_t action, const void *arg)
2959 * REQUIRE() checking is performed in isc_socket_sendto().
2961 return (isc_socket_sendto(sock, region, task, action, arg, NULL,
2962 NULL));
2965 isc_result_t
2966 isc__socket_sendto(isc_socket_t *sock, isc_region_t *region,
2967 isc_task_t *task, isc_taskaction_t action, const void *arg,
2968 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
2970 isc_socketevent_t *dev;
2971 isc_socketmgr_t *manager;
2972 isc_result_t ret;
2974 REQUIRE(VALID_SOCKET(sock));
2975 REQUIRE(sock->type != isc_sockettype_fdwatch);
2977 LOCK(&sock->lock);
2978 CONSISTENT(sock);
2981 * make sure that the socket's not closed
2983 if (sock->fd == INVALID_SOCKET) {
2984 UNLOCK(&sock->lock);
2985 return (ISC_R_CONNREFUSED);
2987 REQUIRE(region != NULL);
2988 REQUIRE(task != NULL);
2989 REQUIRE(action != NULL);
2991 manager = sock->manager;
2992 REQUIRE(VALID_MANAGER(manager));
2994 INSIST(sock->bound);
2996 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
2997 if (dev == NULL) {
2998 UNLOCK(&sock->lock);
2999 return (ISC_R_NOMEMORY);
3001 dev->region = *region;
3003 ret = socket_send(sock, dev, task, address, pktinfo, 0);
3004 UNLOCK(&sock->lock);
3005 return (ret);
3008 isc_result_t
3009 isc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
3010 isc_task_t *task, isc_taskaction_t action, const void *arg)
3012 return (isc_socket_sendtov(sock, buflist, task, action, arg, NULL,
3013 NULL));
3016 isc_result_t
3017 isc__socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
3018 isc_task_t *task, isc_taskaction_t action, const void *arg,
3019 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
3021 isc_socketevent_t *dev;
3022 isc_socketmgr_t *manager;
3023 unsigned int iocount;
3024 isc_buffer_t *buffer;
3025 isc_result_t ret;
3027 REQUIRE(VALID_SOCKET(sock));
3029 LOCK(&sock->lock);
3030 CONSISTENT(sock);
3033 * make sure that the socket's not closed
3035 if (sock->fd == INVALID_SOCKET) {
3036 UNLOCK(&sock->lock);
3037 return (ISC_R_CONNREFUSED);
3039 REQUIRE(buflist != NULL);
3040 REQUIRE(!ISC_LIST_EMPTY(*buflist));
3041 REQUIRE(task != NULL);
3042 REQUIRE(action != NULL);
3044 manager = sock->manager;
3045 REQUIRE(VALID_MANAGER(manager));
3047 iocount = isc_bufferlist_usedcount(buflist);
3048 REQUIRE(iocount > 0);
3050 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
3051 if (dev == NULL) {
3052 UNLOCK(&sock->lock);
3053 return (ISC_R_NOMEMORY);
3057 * Move each buffer from the passed in list to our internal one.
3059 buffer = ISC_LIST_HEAD(*buflist);
3060 while (buffer != NULL) {
3061 ISC_LIST_DEQUEUE(*buflist, buffer, link);
3062 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
3063 buffer = ISC_LIST_HEAD(*buflist);
3066 ret = socket_send(sock, dev, task, address, pktinfo, 0);
3067 UNLOCK(&sock->lock);
3068 return (ret);
3071 isc_result_t
3072 isc__socket_sendto2(isc_socket_t *sock, isc_region_t *region,
3073 isc_task_t *task,
3074 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
3075 isc_socketevent_t *event, unsigned int flags)
3077 isc_result_t ret;
3079 REQUIRE(VALID_SOCKET(sock));
3080 LOCK(&sock->lock);
3081 CONSISTENT(sock);
3083 REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0);
3084 if ((flags & ISC_SOCKFLAG_NORETRY) != 0)
3085 REQUIRE(sock->type == isc_sockettype_udp);
3086 event->ev_sender = sock;
3087 event->result = ISC_R_UNEXPECTED;
3089 * make sure that the socket's not closed
3091 if (sock->fd == INVALID_SOCKET) {
3092 UNLOCK(&sock->lock);
3093 return (ISC_R_CONNREFUSED);
3095 ISC_LIST_INIT(event->bufferlist);
3096 event->region = *region;
3097 event->n = 0;
3098 event->offset = 0;
3099 event->attributes = 0;
3101 ret = socket_send(sock, event, task, address, pktinfo, flags);
3102 UNLOCK(&sock->lock);
3103 return (ret);
3106 isc_result_t
3107 isc__socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr,
3108 unsigned int options) {
3109 int bind_errno;
3110 char strbuf[ISC_STRERRORSIZE];
3111 int on = 1;
3113 REQUIRE(VALID_SOCKET(sock));
3114 LOCK(&sock->lock);
3115 CONSISTENT(sock);
3118 * make sure that the socket's not closed
3120 if (sock->fd == INVALID_SOCKET) {
3121 UNLOCK(&sock->lock);
3122 return (ISC_R_CONNREFUSED);
3125 INSIST(!sock->bound);
3127 if (sock->pf != sockaddr->type.sa.sa_family) {
3128 UNLOCK(&sock->lock);
3129 return (ISC_R_FAMILYMISMATCH);
3132 * Only set SO_REUSEADDR when we want a specific port.
3134 if ((options & ISC_SOCKET_REUSEADDRESS) != 0 &&
3135 isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
3136 setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
3137 sizeof(on)) < 0) {
3138 UNEXPECTED_ERROR(__FILE__, __LINE__,
3139 "setsockopt(%d) %s", sock->fd,
3140 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
3141 ISC_MSG_FAILED, "failed"));
3142 /* Press on... */
3144 if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) {
3145 bind_errno = WSAGetLastError();
3146 UNLOCK(&sock->lock);
3147 switch (bind_errno) {
3148 case WSAEACCES:
3149 return (ISC_R_NOPERM);
3150 case WSAEADDRNOTAVAIL:
3151 return (ISC_R_ADDRNOTAVAIL);
3152 case WSAEADDRINUSE:
3153 return (ISC_R_ADDRINUSE);
3154 case WSAEINVAL:
3155 return (ISC_R_BOUND);
3156 default:
3157 isc__strerror(bind_errno, strbuf, sizeof(strbuf));
3158 UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s",
3159 strbuf);
3160 return (ISC_R_UNEXPECTED);
3164 socket_log(__LINE__, sock, sockaddr, TRACE,
3165 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound");
3166 sock->bound = 1;
3168 UNLOCK(&sock->lock);
3169 return (ISC_R_SUCCESS);
3172 isc_result_t
3173 isc__socket_filter(isc_socket_t *sock, const char *filter) {
3174 UNUSED(sock);
3175 UNUSED(filter);
3177 REQUIRE(VALID_SOCKET(sock));
3178 return (ISC_R_NOTIMPLEMENTED);
3182 * Set up to listen on a given socket. We do this by creating an internal
3183 * event that will be dispatched when the socket has read activity. The
3184 * watcher will send the internal event to the task when there is a new
3185 * connection.
3187 * Unlike in read, we don't preallocate a done event here. Every time there
3188 * is a new connection we'll have to allocate a new one anyway, so we might
3189 * as well keep things simple rather than having to track them.
3191 isc_result_t
3192 isc__socket_listen(isc_socket_t *sock, unsigned int backlog) {
3193 char strbuf[ISC_STRERRORSIZE];
3195 REQUIRE(VALID_SOCKET(sock));
3197 LOCK(&sock->lock);
3198 CONSISTENT(sock);
3201 * make sure that the socket's not closed
3203 if (sock->fd == INVALID_SOCKET) {
3204 UNLOCK(&sock->lock);
3205 return (ISC_R_CONNREFUSED);
3208 REQUIRE(!sock->listener);
3209 REQUIRE(sock->bound);
3210 REQUIRE(sock->type == isc_sockettype_tcp);
3212 if (backlog == 0)
3213 backlog = SOMAXCONN;
3215 if (listen(sock->fd, (int)backlog) < 0) {
3216 UNLOCK(&sock->lock);
3217 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3219 UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf);
3221 return (ISC_R_UNEXPECTED);
3224 socket_log(__LINE__, sock, NULL, TRACE,
3225 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "listening");
3226 sock->listener = 1;
3227 _set_state(sock, SOCK_LISTEN);
3229 UNLOCK(&sock->lock);
3230 return (ISC_R_SUCCESS);
3234 * This should try to do aggressive accept() XXXMLG
3236 isc_result_t
3237 isc__socket_accept(isc_socket_t *sock,
3238 isc_task_t *task, isc_taskaction_t action, const void *arg)
3240 isc_socket_newconnev_t *adev;
3241 isc_socketmgr_t *manager;
3242 isc_task_t *ntask = NULL;
3243 isc_socket_t *nsock;
3244 isc_result_t result;
3245 IoCompletionInfo *lpo;
3247 REQUIRE(VALID_SOCKET(sock));
3249 manager = sock->manager;
3250 REQUIRE(VALID_MANAGER(manager));
3252 LOCK(&sock->lock);
3253 CONSISTENT(sock);
3256 * make sure that the socket's not closed
3258 if (sock->fd == INVALID_SOCKET) {
3259 UNLOCK(&sock->lock);
3260 return (ISC_R_CONNREFUSED);
3263 REQUIRE(sock->listener);
3266 * Sender field is overloaded here with the task we will be sending
3267 * this event to. Just before the actual event is delivered the
3268 * actual ev_sender will be touched up to be the socket.
3270 adev = (isc_socket_newconnev_t *)
3271 isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN,
3272 action, arg, sizeof(*adev));
3273 if (adev == NULL) {
3274 UNLOCK(&sock->lock);
3275 return (ISC_R_NOMEMORY);
3277 ISC_LINK_INIT(adev, ev_link);
3279 result = allocate_socket(manager, sock->type, &nsock);
3280 if (result != ISC_R_SUCCESS) {
3281 isc_event_free((isc_event_t **)&adev);
3282 UNLOCK(&sock->lock);
3283 return (result);
3287 * AcceptEx() requires we pass in a socket.
3289 nsock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP);
3290 if (nsock->fd == INVALID_SOCKET) {
3291 free_socket(&nsock, __LINE__);
3292 isc_event_free((isc_event_t **)&adev);
3293 UNLOCK(&sock->lock);
3294 return (ISC_R_FAILURE); // XXXMLG need real error message
3298 * Attach to socket and to task.
3300 isc_task_attach(task, &ntask);
3301 nsock->references++;
3303 adev->ev_sender = ntask;
3304 adev->newsocket = nsock;
3305 _set_state(nsock, SOCK_ACCEPT);
3308 * Queue io completion for an accept().
3310 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3311 HEAP_ZERO_MEMORY,
3312 sizeof(IoCompletionInfo));
3313 RUNTIME_CHECK(lpo != NULL);
3314 lpo->acceptbuffer = (void *)HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY,
3315 (sizeof(SOCKADDR_STORAGE) + 16) * 2);
3316 RUNTIME_CHECK(lpo->acceptbuffer != NULL);
3318 lpo->adev = adev;
3319 lpo->request_type = SOCKET_ACCEPT;
3321 ISCAcceptEx(sock->fd,
3322 nsock->fd, /* Accepted Socket */
3323 lpo->acceptbuffer, /* Buffer for initial Recv */
3324 0, /* Length of Buffer */
3325 sizeof(SOCKADDR_STORAGE) + 16, /* Local address length + 16 */
3326 sizeof(SOCKADDR_STORAGE) + 16, /* Remote address lengh + 16 */
3327 (LPDWORD)&lpo->received_bytes, /* Bytes Recved */
3328 (LPOVERLAPPED)lpo /* Overlapped structure */
3330 iocompletionport_update(nsock);
3332 socket_log(__LINE__, sock, NULL, TRACE,
3333 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND,
3334 "accepting for nsock %p fd %d", nsock, nsock->fd);
3337 * Enqueue the event
3339 ISC_LIST_ENQUEUE(sock->accept_list, adev, ev_link);
3340 sock->pending_accept++;
3341 sock->pending_iocp++;
3343 UNLOCK(&sock->lock);
3344 return (ISC_R_SUCCESS);
3347 isc_result_t
3348 isc__socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr,
3349 isc_task_t *task, isc_taskaction_t action, const void *arg)
3351 char strbuf[ISC_STRERRORSIZE];
3352 isc_socket_connev_t *cdev;
3353 isc_task_t *ntask = NULL;
3354 isc_socketmgr_t *manager;
3355 IoCompletionInfo *lpo;
3356 int bind_errno;
3358 REQUIRE(VALID_SOCKET(sock));
3359 REQUIRE(addr != NULL);
3360 REQUIRE(task != NULL);
3361 REQUIRE(action != NULL);
3363 manager = sock->manager;
3364 REQUIRE(VALID_MANAGER(manager));
3365 REQUIRE(addr != NULL);
3367 if (isc_sockaddr_ismulticast(addr))
3368 return (ISC_R_MULTICAST);
3370 LOCK(&sock->lock);
3371 CONSISTENT(sock);
3374 * make sure that the socket's not closed
3376 if (sock->fd == INVALID_SOCKET) {
3377 UNLOCK(&sock->lock);
3378 return (ISC_R_CONNREFUSED);
3382 * Windows sockets won't connect unless the socket is bound.
3384 if (!sock->bound) {
3385 isc_sockaddr_t any;
3387 isc_sockaddr_anyofpf(&any, isc_sockaddr_pf(addr));
3388 if (bind(sock->fd, &any.type.sa, any.length) < 0) {
3389 bind_errno = WSAGetLastError();
3390 UNLOCK(&sock->lock);
3391 switch (bind_errno) {
3392 case WSAEACCES:
3393 return (ISC_R_NOPERM);
3394 case WSAEADDRNOTAVAIL:
3395 return (ISC_R_ADDRNOTAVAIL);
3396 case WSAEADDRINUSE:
3397 return (ISC_R_ADDRINUSE);
3398 case WSAEINVAL:
3399 return (ISC_R_BOUND);
3400 default:
3401 isc__strerror(bind_errno, strbuf,
3402 sizeof(strbuf));
3403 UNEXPECTED_ERROR(__FILE__, __LINE__,
3404 "bind: %s", strbuf);
3405 return (ISC_R_UNEXPECTED);
3408 sock->bound = 1;
3411 REQUIRE(!sock->pending_connect);
3413 cdev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock,
3414 ISC_SOCKEVENT_CONNECT,
3415 action, arg,
3416 sizeof(*cdev));
3417 if (cdev == NULL) {
3418 UNLOCK(&sock->lock);
3419 return (ISC_R_NOMEMORY);
3421 ISC_LINK_INIT(cdev, ev_link);
3423 if (sock->type == isc_sockettype_tcp) {
3425 * Queue io completion for an accept().
3427 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3428 HEAP_ZERO_MEMORY,
3429 sizeof(IoCompletionInfo));
3430 lpo->cdev = cdev;
3431 lpo->request_type = SOCKET_CONNECT;
3433 sock->address = *addr;
3434 ISCConnectEx(sock->fd, &addr->type.sa, addr->length,
3435 NULL, 0, NULL, (LPOVERLAPPED)lpo);
3438 * Attach to task.
3440 isc_task_attach(task, &ntask);
3441 cdev->ev_sender = ntask;
3443 sock->pending_connect = 1;
3444 _set_state(sock, SOCK_CONNECT);
3447 * Enqueue the request.
3449 sock->connect_ev = cdev;
3450 sock->pending_iocp++;
3451 } else {
3452 WSAConnect(sock->fd, &addr->type.sa, addr->length, NULL, NULL, NULL, NULL);
3453 cdev->result = ISC_R_SUCCESS;
3454 isc_task_send(task, (isc_event_t **)&cdev);
3456 CONSISTENT(sock);
3457 UNLOCK(&sock->lock);
3459 return (ISC_R_SUCCESS);
3462 isc_result_t
3463 isc__socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3464 isc_result_t result;
3466 REQUIRE(VALID_SOCKET(sock));
3467 REQUIRE(addressp != NULL);
3469 LOCK(&sock->lock);
3470 CONSISTENT(sock);
3473 * make sure that the socket's not closed
3475 if (sock->fd == INVALID_SOCKET) {
3476 UNLOCK(&sock->lock);
3477 return (ISC_R_CONNREFUSED);
3480 if (sock->connected) {
3481 *addressp = sock->address;
3482 result = ISC_R_SUCCESS;
3483 } else {
3484 result = ISC_R_NOTCONNECTED;
3487 UNLOCK(&sock->lock);
3489 return (result);
3492 isc_result_t
3493 isc__socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3494 ISC_SOCKADDR_LEN_T len;
3495 isc_result_t result;
3496 char strbuf[ISC_STRERRORSIZE];
3498 REQUIRE(VALID_SOCKET(sock));
3499 REQUIRE(addressp != NULL);
3501 LOCK(&sock->lock);
3502 CONSISTENT(sock);
3505 * make sure that the socket's not closed
3507 if (sock->fd == INVALID_SOCKET) {
3508 UNLOCK(&sock->lock);
3509 return (ISC_R_CONNREFUSED);
3512 if (!sock->bound) {
3513 result = ISC_R_NOTBOUND;
3514 goto out;
3517 result = ISC_R_SUCCESS;
3519 len = sizeof(addressp->type);
3520 if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) {
3521 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3522 UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s",
3523 strbuf);
3524 result = ISC_R_UNEXPECTED;
3525 goto out;
3527 addressp->length = (unsigned int)len;
3529 out:
3530 UNLOCK(&sock->lock);
3532 return (result);
3536 * Run through the list of events on this socket, and cancel the ones
3537 * queued for task "task" of type "how". "how" is a bitmask.
3539 void
3540 isc__socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
3542 REQUIRE(VALID_SOCKET(sock));
3545 * Quick exit if there is nothing to do. Don't even bother locking
3546 * in this case.
3548 if (how == 0)
3549 return;
3551 LOCK(&sock->lock);
3552 CONSISTENT(sock);
3555 * make sure that the socket's not closed
3557 if (sock->fd == INVALID_SOCKET) {
3558 UNLOCK(&sock->lock);
3559 return;
3563 * All of these do the same thing, more or less.
3564 * Each will:
3565 * o If the internal event is marked as "posted" try to
3566 * remove it from the task's queue. If this fails, mark it
3567 * as canceled instead, and let the task clean it up later.
3568 * o For each I/O request for that task of that type, post
3569 * its done event with status of "ISC_R_CANCELED".
3570 * o Reset any state needed.
3573 if ((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) {
3574 isc_socketevent_t *dev;
3575 isc_socketevent_t *next;
3576 isc_task_t *current_task;
3578 dev = ISC_LIST_HEAD(sock->recv_list);
3579 while (dev != NULL) {
3580 current_task = dev->ev_sender;
3581 next = ISC_LIST_NEXT(dev, ev_link);
3582 if ((task == NULL) || (task == current_task)) {
3583 dev->result = ISC_R_CANCELED;
3584 send_recvdone_event(sock, &dev);
3586 dev = next;
3589 how &= ~ISC_SOCKCANCEL_RECV;
3591 if ((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) {
3592 isc_socketevent_t *dev;
3593 isc_socketevent_t *next;
3594 isc_task_t *current_task;
3596 dev = ISC_LIST_HEAD(sock->send_list);
3598 while (dev != NULL) {
3599 current_task = dev->ev_sender;
3600 next = ISC_LIST_NEXT(dev, ev_link);
3601 if ((task == NULL) || (task == current_task)) {
3602 dev->result = ISC_R_CANCELED;
3603 send_senddone_event(sock, &dev);
3605 dev = next;
3608 how &= ~ISC_SOCKCANCEL_SEND;
3610 if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT)
3611 && !ISC_LIST_EMPTY(sock->accept_list)) {
3612 isc_socket_newconnev_t *dev;
3613 isc_socket_newconnev_t *next;
3614 isc_task_t *current_task;
3616 dev = ISC_LIST_HEAD(sock->accept_list);
3617 while (dev != NULL) {
3618 current_task = dev->ev_sender;
3619 next = ISC_LIST_NEXT(dev, ev_link);
3621 if ((task == NULL) || (task == current_task)) {
3623 dev->newsocket->references--;
3624 closesocket(dev->newsocket->fd);
3625 dev->newsocket->fd = INVALID_SOCKET;
3626 free_socket(&dev->newsocket, __LINE__);
3628 dev->result = ISC_R_CANCELED;
3629 send_acceptdone_event(sock, &dev);
3632 dev = next;
3635 how &= ~ISC_SOCKCANCEL_ACCEPT;
3638 * Connecting is not a list.
3640 if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT)
3641 && sock->connect_ev != NULL) {
3642 isc_socket_connev_t *dev;
3643 isc_task_t *current_task;
3645 INSIST(sock->pending_connect);
3647 dev = sock->connect_ev;
3648 current_task = dev->ev_sender;
3650 if ((task == NULL) || (task == current_task)) {
3651 closesocket(sock->fd);
3652 sock->fd = INVALID_SOCKET;
3653 _set_state(sock, SOCK_CLOSED);
3655 sock->connect_ev = NULL;
3656 dev->result = ISC_R_CANCELED;
3657 send_connectdone_event(sock, &dev);
3660 how &= ~ISC_SOCKCANCEL_CONNECT;
3662 maybe_free_socket(&sock, __LINE__);
3665 isc_sockettype_t
3666 isc__socket_gettype(isc_socket_t *sock) {
3667 isc_sockettype_t type;
3669 REQUIRE(VALID_SOCKET(sock));
3671 LOCK(&sock->lock);
3674 * make sure that the socket's not closed
3676 if (sock->fd == INVALID_SOCKET) {
3677 UNLOCK(&sock->lock);
3678 return (ISC_R_CONNREFUSED);
3681 type = sock->type;
3682 UNLOCK(&sock->lock);
3683 return (type);
3686 isc_boolean_t
3687 isc__socket_isbound(isc_socket_t *sock) {
3688 isc_boolean_t val;
3690 REQUIRE(VALID_SOCKET(sock));
3692 LOCK(&sock->lock);
3693 CONSISTENT(sock);
3696 * make sure that the socket's not closed
3698 if (sock->fd == INVALID_SOCKET) {
3699 UNLOCK(&sock->lock);
3700 return (ISC_FALSE);
3703 val = ((sock->bound) ? ISC_TRUE : ISC_FALSE);
3704 UNLOCK(&sock->lock);
3706 return (val);
3709 void
3710 isc__socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) {
3711 #if defined(IPV6_V6ONLY)
3712 int onoff = yes ? 1 : 0;
3713 #else
3714 UNUSED(yes);
3715 #endif
3717 REQUIRE(VALID_SOCKET(sock));
3719 #ifdef IPV6_V6ONLY
3720 if (sock->pf == AF_INET6) {
3721 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY,
3722 (char *)&onoff, sizeof(onoff));
3724 #endif
3727 void
3728 isc__socket_cleanunix(isc_sockaddr_t *addr, isc_boolean_t active) {
3729 UNUSED(addr);
3730 UNUSED(active);
3733 isc_result_t
3734 isc__socket_permunix(isc_sockaddr_t *addr, isc_uint32_t perm,
3735 isc_uint32_t owner, isc_uint32_t group)
3737 UNUSED(addr);
3738 UNUSED(perm);
3739 UNUSED(owner);
3740 UNUSED(group);
3741 return (ISC_R_NOTIMPLEMENTED);
3744 void
3745 isc__socket_setname(isc_socket_t *socket, const char *name, void *tag) {
3748 * Name 'socket'.
3751 REQUIRE(VALID_SOCKET(socket));
3753 LOCK(&socket->lock);
3754 memset(socket->name, 0, sizeof(socket->name));
3755 strncpy(socket->name, name, sizeof(socket->name) - 1);
3756 socket->tag = tag;
3757 UNLOCK(&socket->lock);
3760 const char *
3761 isc__socket_getname(isc_socket_t *socket) {
3762 return (socket->name);
3765 void *
3766 isc__socket_gettag(isc_socket_t *socket) {
3767 return (socket->tag);
3770 void
3771 isc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) {
3772 UNUSED(manager);
3773 UNUSED(reserved);
3776 void
3777 isc___socketmgr_maxudp(isc_socketmgr_t *manager, int maxudp) {
3779 UNUSED(manager);
3780 UNUSED(maxudp);
3783 #ifdef HAVE_LIBXML2
3785 static const char *
3786 _socktype(isc_sockettype_t type)
3788 if (type == isc_sockettype_udp)
3789 return ("udp");
3790 else if (type == isc_sockettype_tcp)
3791 return ("tcp");
3792 else if (type == isc_sockettype_unix)
3793 return ("unix");
3794 else if (type == isc_sockettype_fdwatch)
3795 return ("fdwatch");
3796 else
3797 return ("not-initialized");
3800 void
3801 isc_socketmgr_renderxml(isc_socketmgr_t *mgr, xmlTextWriterPtr writer)
3803 isc_socket_t *sock;
3804 char peerbuf[ISC_SOCKADDR_FORMATSIZE];
3805 isc_sockaddr_t addr;
3806 ISC_SOCKADDR_LEN_T len;
3808 LOCK(&mgr->lock);
3810 #ifndef ISC_PLATFORM_USETHREADS
3811 xmlTextWriterStartElement(writer, ISC_XMLCHAR "references");
3812 xmlTextWriterWriteFormatString(writer, "%d", mgr->refs);
3813 xmlTextWriterEndElement(writer);
3814 #endif
3816 xmlTextWriterStartElement(writer, ISC_XMLCHAR "sockets");
3817 sock = ISC_LIST_HEAD(mgr->socklist);
3818 while (sock != NULL) {
3819 LOCK(&sock->lock);
3820 xmlTextWriterStartElement(writer, ISC_XMLCHAR "socket");
3822 xmlTextWriterStartElement(writer, ISC_XMLCHAR "id");
3823 xmlTextWriterWriteFormatString(writer, "%p", sock);
3824 xmlTextWriterEndElement(writer);
3826 if (sock->name[0] != 0) {
3827 xmlTextWriterStartElement(writer, ISC_XMLCHAR "name");
3828 xmlTextWriterWriteFormatString(writer, "%s",
3829 sock->name);
3830 xmlTextWriterEndElement(writer); /* name */
3833 xmlTextWriterStartElement(writer, ISC_XMLCHAR "references");
3834 xmlTextWriterWriteFormatString(writer, "%d", sock->references);
3835 xmlTextWriterEndElement(writer);
3837 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "type",
3838 ISC_XMLCHAR _socktype(sock->type));
3840 if (sock->connected) {
3841 isc_sockaddr_format(&sock->address, peerbuf,
3842 sizeof(peerbuf));
3843 xmlTextWriterWriteElement(writer,
3844 ISC_XMLCHAR "peer-address",
3845 ISC_XMLCHAR peerbuf);
3848 len = sizeof(addr);
3849 if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) {
3850 isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf));
3851 xmlTextWriterWriteElement(writer,
3852 ISC_XMLCHAR "local-address",
3853 ISC_XMLCHAR peerbuf);
3856 xmlTextWriterStartElement(writer, ISC_XMLCHAR "states");
3857 if (sock->pending_recv)
3858 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3859 ISC_XMLCHAR "pending-receive");
3860 if (sock->pending_send)
3861 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3862 ISC_XMLCHAR "pending-send");
3863 if (sock->pending_accept)
3864 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3865 ISC_XMLCHAR "pending_accept");
3866 if (sock->listener)
3867 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3868 ISC_XMLCHAR "listener");
3869 if (sock->connected)
3870 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3871 ISC_XMLCHAR "connected");
3872 if (sock->pending_connect)
3873 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3874 ISC_XMLCHAR "connecting");
3875 if (sock->bound)
3876 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3877 ISC_XMLCHAR "bound");
3879 xmlTextWriterEndElement(writer); /* states */
3881 xmlTextWriterEndElement(writer); /* socket */
3883 UNLOCK(&sock->lock);
3884 sock = ISC_LIST_NEXT(sock, link);
3886 xmlTextWriterEndElement(writer); /* sockets */
3888 UNLOCK(&mgr->lock);
3890 #endif /* HAVE_LIBXML2 */