Patrick Welche <prlw1@cam.ac.uk>
[netbsd-mini2440.git] / external / bsd / ntp / dist / lib / isc / win32 / socket.c
blob0cda00c72789bb29d24caf5fd42f1256f84a8e72
1 /* $NetBSD$ */
3 /*
4 * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
5 * Copyright (C) 2000-2003 Internet Software Consortium.
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
20 /* Id: socket.c,v 1.70.54.4 2009/01/29 22:40:36 jinmei Exp */
22 /* This code uses functions which are only available on Server 2003 and
23 * higher, and Windows XP and higher.
25 * This code is by nature multithreaded and takes advantage of various
26 * features to pass on information through the completion port for
27 * when I/O is completed. All sends, receives, accepts, and connects are
28 * completed through the completion port.
30 * The number of Completion Port Worker threads used is the total number
31 * of CPU's + 1. This increases the likelihood that a Worker Thread is
32 * available for processing a completed request.
34 * XXXPDM 5 August, 2002
37 #define MAKE_EXTERNAL 1
38 #include <config.h>
40 #include <sys/types.h>
42 #ifndef _WINSOCKAPI_
43 #define _WINSOCKAPI_ /* Prevent inclusion of winsock.h in windows.h */
44 #endif
46 #include <errno.h>
47 #include <stddef.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <io.h>
52 #include <fcntl.h>
53 #include <process.h>
55 #include <isc/buffer.h>
56 #include <isc/bufferlist.h>
57 #include <isc/condition.h>
58 #include <isc/list.h>
59 #include <isc/log.h>
60 #include <isc/mem.h>
61 #include <isc/msgs.h>
62 #include <isc/mutex.h>
63 #include <isc/net.h>
64 #include <isc/once.h>
65 #include <isc/os.h>
66 #include <isc/platform.h>
67 #include <isc/print.h>
68 #include <isc/region.h>
69 #include <isc/socket.h>
70 #include <isc/stats.h>
71 #include <isc/strerror.h>
72 #include <isc/syslog.h>
73 #include <isc/task.h>
74 #include <isc/thread.h>
75 #include <isc/util.h>
76 #include <isc/win32os.h>
78 #include <mswsock.h>
80 #include "errno2result.h"
83 * How in the world can Microsoft exist with APIs like this?
84 * We can't actually call this directly, because it turns out
85 * no library exports this function. Instead, we need to
86 * issue a runtime call to get the address.
88 LPFN_CONNECTEX ISCConnectEx;
89 LPFN_ACCEPTEX ISCAcceptEx;
90 LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs;
93 * Run expensive internal consistency checks.
95 #ifdef ISC_SOCKET_CONSISTENCY_CHECKS
96 #define CONSISTENT(sock) consistent(sock)
97 #else
98 #define CONSISTENT(sock) do {} while (0)
99 #endif
100 static void consistent(isc_socket_t *sock);
103 * Define this macro to control the behavior of connection
104 * resets on UDP sockets. See Microsoft KnowledgeBase Article Q263823
105 * for details.
106 * NOTE: This requires that Windows 2000 systems install Service Pack 2
107 * or later.
109 #ifndef SIO_UDP_CONNRESET
110 #define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12)
111 #endif
114 * Some systems define the socket length argument as an int, some as size_t,
115 * some as socklen_t. This is here so it can be easily changed if needed.
117 #ifndef ISC_SOCKADDR_LEN_T
118 #define ISC_SOCKADDR_LEN_T unsigned int
119 #endif
122 * Define what the possible "soft" errors can be. These are non-fatal returns
123 * of various network related functions, like recv() and so on.
125 #define SOFT_ERROR(e) ((e) == WSAEINTR || \
126 (e) == WSAEWOULDBLOCK || \
127 (e) == EWOULDBLOCK || \
128 (e) == EINTR || \
129 (e) == EAGAIN || \
130 (e) == 0)
133 * Pending errors are not really errors and should be
134 * kept separate
136 #define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0)
138 #define DOIO_SUCCESS 0 /* i/o ok, event sent */
139 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */
140 #define DOIO_HARD 2 /* i/o error, event sent */
141 #define DOIO_EOF 3 /* EOF, no event sent */
142 #define DOIO_PENDING 4 /* status when i/o is in process */
143 #define DOIO_NEEDMORE 5 /* IO was processed, but we need more due to minimum */
145 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
148 * DLVL(90) -- Function entry/exit and other tracing.
149 * DLVL(70) -- Socket "correctness" -- including returning of events, etc.
150 * DLVL(60) -- Socket data send/receive
151 * DLVL(50) -- Event tracing, including receiving/sending completion events.
152 * DLVL(20) -- Socket creation/destruction.
154 #define TRACE_LEVEL 90
155 #define CORRECTNESS_LEVEL 70
156 #define IOEVENT_LEVEL 60
157 #define EVENT_LEVEL 50
158 #define CREATION_LEVEL 20
160 #define TRACE DLVL(TRACE_LEVEL)
161 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL)
162 #define IOEVENT DLVL(IOEVENT_LEVEL)
163 #define EVENT DLVL(EVENT_LEVEL)
164 #define CREATION DLVL(CREATION_LEVEL)
166 typedef isc_event_t intev_t;
169 * Socket State
171 enum {
172 SOCK_INITIALIZED, /* Socket Initialized */
173 SOCK_OPEN, /* Socket opened but nothing yet to do */
174 SOCK_DATA, /* Socket sending or receiving data */
175 SOCK_LISTEN, /* TCP Socket listening for connects */
176 SOCK_ACCEPT, /* TCP socket is waiting to accept */
177 SOCK_CONNECT, /* TCP Socket connecting */
178 SOCK_CLOSED, /* Socket has been closed */
181 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o')
182 #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC)
185 * IPv6 control information. If the socket is an IPv6 socket we want
186 * to collect the destination address and interface so the client can
187 * set them on outgoing packets.
189 #ifdef ISC_PLATFORM_HAVEIPV6
190 #ifndef USE_CMSG
191 #define USE_CMSG 1
192 #endif
193 #endif
196 * We really don't want to try and use these control messages. Win32
197 * doesn't have this mechanism before XP.
199 #undef USE_CMSG
202 * Message header for recvmsg and sendmsg calls.
203 * Used value-result for recvmsg, value only for sendmsg.
205 struct msghdr {
206 SOCKADDR_STORAGE to_addr; /* UDP send/recv address */
207 int to_addr_len; /* length of the address */
208 WSABUF *msg_iov; /* scatter/gather array */
209 u_int msg_iovlen; /* # elements in msg_iov */
210 void *msg_control; /* ancillary data, see below */
211 u_int msg_controllen; /* ancillary data buffer len */
212 int msg_totallen; /* total length of this message */
213 } msghdr;
216 * The size to raise the receive buffer to.
218 #define RCVBUFSIZE (32*1024)
221 * The number of times a send operation is repeated if the result
222 * is WSAEINTR.
224 #define NRETRIES 10
226 struct isc_socket {
227 /* Not locked. */
228 unsigned int magic;
229 isc_socketmgr_t *manager;
230 isc_mutex_t lock;
231 isc_sockettype_t type;
233 /* Pointers to scatter/gather buffers */
234 WSABUF iov[ISC_SOCKET_MAXSCATTERGATHER];
236 /* Locked by socket lock. */
237 ISC_LINK(isc_socket_t) link;
238 unsigned int references; /* EXTERNAL references */
239 SOCKET fd; /* file handle */
240 int pf; /* protocol family */
241 char name[16];
242 void * tag;
245 * Each recv() call uses this buffer. It is a per-socket receive
246 * buffer that allows us to decouple the system recv() from the
247 * recv_list done events. This means the items on the recv_list
248 * can be removed without having to cancel pending system recv()
249 * calls. It also allows us to read-ahead in some cases.
251 struct {
252 SOCKADDR_STORAGE from_addr; // UDP send/recv address
253 int from_addr_len; // length of the address
254 char *base; // the base of the buffer
255 char *consume_position; // where to start copying data from next
256 unsigned int len; // the actual size of this buffer
257 unsigned int remaining; // the number of bytes remaining
258 } recvbuf;
260 ISC_LIST(isc_socketevent_t) send_list;
261 ISC_LIST(isc_socketevent_t) recv_list;
262 ISC_LIST(isc_socket_newconnev_t) accept_list;
263 isc_socket_connev_t *connect_ev;
265 isc_sockaddr_t address; /* remote address */
267 unsigned int listener : 1, /* listener socket */
268 connected : 1,
269 pending_connect : 1, /* connect pending */
270 bound : 1; /* bound to local addr */
271 unsigned int pending_iocp; /* Should equal the counters below. Debug. */
272 unsigned int pending_recv; /* Number of outstanding recv() calls. */
273 unsigned int pending_send; /* Number of outstanding send() calls. */
274 unsigned int pending_accept; /* Number of outstanding accept() calls. */
275 unsigned int state; /* Socket state. Debugging and consistency checking. */
276 int state_lineno; /* line which last touched state */
279 #define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0)
282 * Buffer structure
284 typedef struct buflist buflist_t;
286 struct buflist {
287 void *buf;
288 unsigned int buflen;
289 ISC_LINK(buflist_t) link;
293 * I/O Completion ports Info structures
296 static HANDLE hHeapHandle = NULL;
297 typedef struct IoCompletionInfo {
298 OVERLAPPED overlapped;
299 isc_socketevent_t *dev; /* send()/recv() done event */
300 isc_socket_connev_t *cdev; /* connect() done event */
301 isc_socket_newconnev_t *adev; /* accept() done event */
302 void *acceptbuffer;
303 DWORD received_bytes;
304 int request_type;
305 struct msghdr messagehdr;
306 ISC_LIST(buflist_t) bufferlist; /*%< list of buffers */
307 } IoCompletionInfo;
310 * Define a maximum number of I/O Completion Port worker threads
311 * to handle the load on the Completion Port. The actual number
312 * used is the number of CPU's + 1.
314 #define MAX_IOCPTHREADS 20
316 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g')
317 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
319 struct isc_socketmgr {
320 /* Not locked. */
321 unsigned int magic;
322 isc_mem_t *mctx;
323 isc_mutex_t lock;
324 isc_stats_t *stats;
326 /* Locked by manager lock. */
327 ISC_LIST(isc_socket_t) socklist;
328 isc_boolean_t bShutdown;
329 isc_condition_t shutdown_ok;
330 HANDLE hIoCompletionPort;
331 int maxIOCPThreads;
332 HANDLE hIOCPThreads[MAX_IOCPTHREADS];
333 DWORD dwIOCPThreadIds[MAX_IOCPTHREADS];
336 * Debugging.
337 * Modified by InterlockedIncrement() and InterlockedDecrement()
339 LONG totalSockets;
340 LONG iocp_total;
343 enum {
344 SOCKET_RECV,
345 SOCKET_SEND,
346 SOCKET_ACCEPT,
347 SOCKET_CONNECT
351 * send() and recv() iovec counts
353 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
354 #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
356 static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext);
357 static void maybe_free_socket(isc_socket_t **, int);
358 static void free_socket(isc_socket_t **, int);
359 static isc_boolean_t senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev);
360 static isc_boolean_t acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev);
361 static isc_boolean_t connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev);
362 static void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev);
363 static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev);
364 static void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev);
365 static void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev);
366 static void send_recvdone_abort(isc_socket_t *sock, isc_result_t result);
367 static void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev);
368 static void queue_receive_request(isc_socket_t *sock);
371 * This is used to dump the contents of the sock structure
372 * You should make sure that the sock is locked before
373 * dumping it. Since the code uses simple printf() statements
374 * it should only be used interactively.
376 void
377 sock_dump(isc_socket_t *sock) {
378 isc_socketevent_t *ldev;
379 isc_socket_newconnev_t *ndev;
381 #if 0
382 isc_sockaddr_t addr;
383 char socktext[256];
385 isc_socket_getpeername(sock, &addr);
386 isc_sockaddr_format(&addr, socktext, sizeof(socktext));
387 printf("Remote Socket: %s\n", socktext);
388 isc_socket_getsockname(sock, &addr);
389 isc_sockaddr_format(&addr, socktext, sizeof(socktext));
390 printf("This Socket: %s\n", socktext);
391 #endif
393 printf("\n\t\tSock Dump\n");
394 printf("\t\tfd: %u\n", sock->fd);
395 printf("\t\treferences: %d\n", sock->references);
396 printf("\t\tpending_accept: %d\n", sock->pending_accept);
397 printf("\t\tconnecting: %d\n", sock->pending_connect);
398 printf("\t\tconnected: %d\n", sock->connected);
399 printf("\t\tbound: %d\n", sock->bound);
400 printf("\t\tpending_iocp: %d\n", sock->pending_iocp);
401 printf("\t\tsocket type: %d\n", sock->type);
403 printf("\n\t\tSock Recv List\n");
404 ldev = ISC_LIST_HEAD(sock->recv_list);
405 while (ldev != NULL) {
406 printf("\t\tdev: %p\n", ldev);
407 ldev = ISC_LIST_NEXT(ldev, ev_link);
410 printf("\n\t\tSock Send List\n");
411 ldev = ISC_LIST_HEAD(sock->send_list);
412 while (ldev != NULL) {
413 printf("\t\tdev: %p\n", ldev);
414 ldev = ISC_LIST_NEXT(ldev, ev_link);
417 printf("\n\t\tSock Accept List\n");
418 ndev = ISC_LIST_HEAD(sock->accept_list);
419 while (ndev != NULL) {
420 printf("\t\tdev: %p\n", ldev);
421 ndev = ISC_LIST_NEXT(ndev, ev_link);
425 static void
426 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
427 isc_logcategory_t *category, isc_logmodule_t *module, int level,
428 isc_msgcat_t *msgcat, int msgset, int message,
429 const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10);
431 /* This function will add an entry to the I/O completion port
432 * that will signal the I/O thread to exit (gracefully)
434 static void
435 signal_iocompletionport_exit(isc_socketmgr_t *manager) {
436 int i;
437 int errval;
438 char strbuf[ISC_STRERRORSIZE];
440 REQUIRE(VALID_MANAGER(manager));
441 for (i = 0; i < manager->maxIOCPThreads; i++) {
442 if (!PostQueuedCompletionStatus(manager->hIoCompletionPort,
443 0, 0, 0)) {
444 errval = GetLastError();
445 isc__strerror(errval, strbuf, sizeof(strbuf));
446 FATAL_ERROR(__FILE__, __LINE__,
447 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
448 ISC_MSG_FAILED,
449 "Can't request service thread to exit: %s"),
450 strbuf);
456 * Create the worker threads for the I/O Completion Port
458 void
459 iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) {
460 int errval;
461 char strbuf[ISC_STRERRORSIZE];
462 int i;
464 INSIST(total_threads > 0);
465 REQUIRE(VALID_MANAGER(manager));
467 * We need at least one
469 for (i = 0; i < total_threads; i++) {
470 manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread,
471 manager, 0,
472 &manager->dwIOCPThreadIds[i]);
473 if (manager->hIOCPThreads[i] == NULL) {
474 errval = GetLastError();
475 isc__strerror(errval, strbuf, sizeof(strbuf));
476 FATAL_ERROR(__FILE__, __LINE__,
477 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
478 ISC_MSG_FAILED,
479 "Can't create IOCP thread: %s"),
480 strbuf);
481 exit(1);
487 * Create/initialise the I/O completion port
489 void
490 iocompletionport_init(isc_socketmgr_t *manager) {
491 int errval;
492 char strbuf[ISC_STRERRORSIZE];
494 REQUIRE(VALID_MANAGER(manager));
496 * Create a private heap to handle the socket overlapped structure
497 * The minimum number of structures is 10, there is no maximum
499 hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0);
500 if (hHeapHandle == NULL) {
501 errval = GetLastError();
502 isc__strerror(errval, strbuf, sizeof(strbuf));
503 FATAL_ERROR(__FILE__, __LINE__,
504 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
505 ISC_MSG_FAILED,
506 "HeapCreate() failed during "
507 "initialization: %s"),
508 strbuf);
509 exit(1);
512 manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS);
514 /* Now Create the Completion Port */
515 manager->hIoCompletionPort = CreateIoCompletionPort(
516 INVALID_HANDLE_VALUE, NULL,
517 0, manager->maxIOCPThreads);
518 if (manager->hIoCompletionPort == NULL) {
519 errval = GetLastError();
520 isc__strerror(errval, strbuf, sizeof(strbuf));
521 FATAL_ERROR(__FILE__, __LINE__,
522 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
523 ISC_MSG_FAILED,
524 "CreateIoCompletionPort() failed "
525 "during initialization: %s"),
526 strbuf);
527 exit(1);
531 * Worker threads for servicing the I/O
533 iocompletionport_createthreads(manager->maxIOCPThreads, manager);
537 * Associate a socket with an IO Completion Port. This allows us to queue events for it
538 * and have our worker pool of threads process them.
540 void
541 iocompletionport_update(isc_socket_t *sock) {
542 HANDLE hiocp;
543 char strbuf[ISC_STRERRORSIZE];
545 REQUIRE(VALID_SOCKET(sock));
547 hiocp = CreateIoCompletionPort((HANDLE)sock->fd,
548 sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0);
550 if (hiocp == NULL) {
551 DWORD errval = GetLastError();
552 isc__strerror(errval, strbuf, sizeof(strbuf));
553 isc_log_iwrite(isc_lctx,
554 ISC_LOGCATEGORY_GENERAL,
555 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
556 isc_msgcat, ISC_MSGSET_SOCKET,
557 ISC_MSG_TOOMANYHANDLES,
558 "iocompletionport_update: failed to open"
559 " io completion port: %s",
560 strbuf);
562 /* XXXMLG temporary hack to make failures detected.
563 * This function should return errors to the caller, not
564 * exit here.
566 FATAL_ERROR(__FILE__, __LINE__,
567 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
568 ISC_MSG_FAILED,
569 "CreateIoCompletionPort() failed "
570 "during initialization: %s"),
571 strbuf);
572 exit(1);
575 InterlockedIncrement(&sock->manager->iocp_total);
579 * Routine to cleanup and then close the socket.
580 * Only close the socket here if it is NOT associated
581 * with an event, otherwise the WSAWaitForMultipleEvents
582 * may fail due to the fact that the Wait should not
583 * be running while closing an event or a socket.
584 * The socket is locked before calling this function
586 void
587 socket_close(isc_socket_t *sock) {
589 REQUIRE(sock != NULL);
591 if (sock->fd != INVALID_SOCKET) {
592 closesocket(sock->fd);
593 sock->fd = INVALID_SOCKET;
594 _set_state(sock, SOCK_CLOSED);
595 InterlockedDecrement(&sock->manager->totalSockets);
599 static isc_once_t initialise_once = ISC_ONCE_INIT;
600 static isc_boolean_t initialised = ISC_FALSE;
602 static void
603 initialise(void) {
604 WORD wVersionRequested;
605 WSADATA wsaData;
606 int err;
607 SOCKET sock;
608 GUID GUIDConnectEx = WSAID_CONNECTEX;
609 GUID GUIDAcceptEx = WSAID_ACCEPTEX;
610 GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS;
611 DWORD dwBytes;
613 /* Need Winsock 2.2 or better */
614 wVersionRequested = MAKEWORD(2, 2);
616 err = WSAStartup(wVersionRequested, &wsaData);
617 if (err != 0) {
618 char strbuf[ISC_STRERRORSIZE];
619 isc__strerror(err, strbuf, sizeof(strbuf));
620 FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s",
621 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
622 ISC_MSG_FAILED, "failed"),
623 strbuf);
624 exit(1);
627 * The following APIs do not exist as functions in a library, but we must
628 * ask winsock for them. They are "extensions" -- but why they cannot be
629 * actual functions is beyond me. So, ask winsock for the pointers to the
630 * functions we need.
632 sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
633 INSIST(sock != INVALID_SOCKET);
634 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
635 &GUIDConnectEx, sizeof(GUIDConnectEx),
636 &ISCConnectEx, sizeof(ISCConnectEx),
637 &dwBytes, NULL, NULL);
638 INSIST(err == 0);
640 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
641 &GUIDAcceptEx, sizeof(GUIDAcceptEx),
642 &ISCAcceptEx, sizeof(ISCAcceptEx),
643 &dwBytes, NULL, NULL);
644 INSIST(err == 0);
646 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
647 &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs),
648 &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs),
649 &dwBytes, NULL, NULL);
650 INSIST(err == 0);
652 closesocket(sock);
654 initialised = ISC_TRUE;
658 * Initialize socket services
660 void
661 InitSockets(void) {
662 RUNTIME_CHECK(isc_once_do(&initialise_once,
663 initialise) == ISC_R_SUCCESS);
664 if (!initialised)
665 exit(1);
669 internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo,
670 struct msghdr *messagehdr, int flags, int *Error)
672 int Result;
673 DWORD BytesSent;
674 DWORD Flags = flags;
675 int total_sent;
677 *Error = 0;
678 Result = WSASendTo(sock->fd, messagehdr->msg_iov,
679 messagehdr->msg_iovlen, &BytesSent,
680 Flags, (SOCKADDR *)&messagehdr->to_addr,
681 messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo,
682 NULL);
684 total_sent = (int)BytesSent;
686 /* Check for errors.*/
687 if (Result == SOCKET_ERROR) {
688 *Error = WSAGetLastError();
690 switch (*Error) {
691 case WSA_IO_INCOMPLETE:
692 case WSA_WAIT_IO_COMPLETION:
693 case WSA_IO_PENDING:
694 case NO_ERROR: /* Strange, but okay */
695 sock->pending_iocp++;
696 sock->pending_send++;
697 break;
699 default:
700 return (-1);
701 break;
703 } else {
704 sock->pending_iocp++;
705 sock->pending_send++;
708 if (lpo != NULL)
709 return (0);
710 else
711 return (total_sent);
714 static void
715 queue_receive_request(isc_socket_t *sock) {
716 DWORD Flags = 0;
717 DWORD NumBytes = 0;
718 int total_bytes = 0;
719 int Result;
720 int Error;
721 WSABUF iov[1];
722 IoCompletionInfo *lpo;
723 isc_result_t isc_result;
726 * If we already have a receive pending, do nothing.
728 if (sock->pending_recv > 0)
729 return;
732 * If no one is waiting, do nothing.
734 if (ISC_LIST_EMPTY(sock->recv_list))
735 return;
737 INSIST(sock->recvbuf.remaining == 0);
738 INSIST(sock->fd != INVALID_SOCKET);
740 iov[0].len = sock->recvbuf.len;
741 iov[0].buf = sock->recvbuf.base;
743 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
744 HEAP_ZERO_MEMORY,
745 sizeof(IoCompletionInfo));
746 RUNTIME_CHECK(lpo != NULL);
747 lpo->request_type = SOCKET_RECV;
749 sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr);
751 Error = 0;
752 Result = WSARecvFrom((SOCKET)sock->fd, iov, 1,
753 &NumBytes, &Flags,
754 (SOCKADDR *)&sock->recvbuf.from_addr,
755 &sock->recvbuf.from_addr_len,
756 (LPWSAOVERLAPPED)lpo, NULL);
758 /* Check for errors. */
759 if (Result == SOCKET_ERROR) {
760 Error = WSAGetLastError();
762 switch (Error) {
763 case WSA_IO_PENDING:
764 sock->pending_iocp++;
765 sock->pending_recv++;
766 break;
768 default:
769 isc_result = isc__errno2result(Error);
770 if (isc_result == ISC_R_UNEXPECTED)
771 UNEXPECTED_ERROR(__FILE__, __LINE__,
772 "WSARecvFrom: Windows error code: %d, isc result %d",
773 Error, isc_result);
774 send_recvdone_abort(sock, isc_result);
775 break;
777 } else {
779 * The recv() finished immediately, but we will still get
780 * a completion event. Rather than duplicate code, let
781 * that thread handle sending the data along its way.
783 sock->pending_iocp++;
784 sock->pending_recv++;
787 socket_log(__LINE__, sock, NULL, IOEVENT,
788 isc_msgcat, ISC_MSGSET_SOCKET,
789 ISC_MSG_DOIORECV,
790 "queue_io_request: fd %d result %d error %d",
791 sock->fd, Result, Error);
793 CONSISTENT(sock);
796 static void
797 manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category,
798 isc_logmodule_t *module, int level, const char *fmt, ...)
800 char msgbuf[2048];
801 va_list ap;
803 if (!isc_log_wouldlog(isc_lctx, level))
804 return;
806 va_start(ap, fmt);
807 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
808 va_end(ap);
810 isc_log_write(isc_lctx, category, module, level,
811 "sockmgr %p: %s", sockmgr, msgbuf);
814 static void
815 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
816 isc_logcategory_t *category, isc_logmodule_t *module, int level,
817 isc_msgcat_t *msgcat, int msgset, int message,
818 const char *fmt, ...)
820 char msgbuf[2048];
821 char peerbuf[256];
822 va_list ap;
825 if (!isc_log_wouldlog(isc_lctx, level))
826 return;
828 va_start(ap, fmt);
829 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
830 va_end(ap);
832 if (address == NULL) {
833 isc_log_iwrite(isc_lctx, category, module, level,
834 msgcat, msgset, message,
835 "socket %p line %d: %s", sock, lineno, msgbuf);
836 } else {
837 isc_sockaddr_format(address, peerbuf, sizeof(peerbuf));
838 isc_log_iwrite(isc_lctx, category, module, level,
839 msgcat, msgset, message,
840 "socket %p line %d peer %s: %s", sock, lineno,
841 peerbuf, msgbuf);
847 * Make an fd SOCKET non-blocking.
849 static isc_result_t
850 make_nonblock(SOCKET fd) {
851 int ret;
852 unsigned long flags = 1;
853 char strbuf[ISC_STRERRORSIZE];
855 /* Set the socket to non-blocking */
856 ret = ioctlsocket(fd, FIONBIO, &flags);
858 if (ret == -1) {
859 isc__strerror(errno, strbuf, sizeof(strbuf));
860 UNEXPECTED_ERROR(__FILE__, __LINE__,
861 "ioctlsocket(%d, FIOBIO, %d): %s",
862 fd, flags, strbuf);
864 return (ISC_R_UNEXPECTED);
867 return (ISC_R_SUCCESS);
871 * Windows 2000 systems incorrectly cause UDP sockets using WASRecvFrom
872 * to not work correctly, returning a WSACONNRESET error when a WSASendTo
873 * fails with an "ICMP port unreachable" response and preventing the
874 * socket from using the WSARecvFrom in subsequent operations.
875 * The function below fixes this, but requires that Windows 2000
876 * Service Pack 2 or later be installed on the system. NT 4.0
877 * systems are not affected by this and work correctly.
878 * See Microsoft Knowledge Base Article Q263823 for details of this.
880 isc_result_t
881 connection_reset_fix(SOCKET fd) {
882 DWORD dwBytesReturned = 0;
883 BOOL bNewBehavior = FALSE;
884 DWORD status;
886 if (isc_win32os_majorversion() < 5)
887 return (ISC_R_SUCCESS); /* NT 4.0 has no problem */
889 /* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */
890 status = WSAIoctl(fd, SIO_UDP_CONNRESET, &bNewBehavior,
891 sizeof(bNewBehavior), NULL, 0,
892 &dwBytesReturned, NULL, NULL);
893 if (status != SOCKET_ERROR)
894 return (ISC_R_SUCCESS);
895 else {
896 UNEXPECTED_ERROR(__FILE__, __LINE__,
897 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s",
898 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
899 ISC_MSG_FAILED, "failed"));
900 return (ISC_R_UNEXPECTED);
905 * Construct an iov array and attach it to the msghdr passed in. This is
906 * the SEND constructor, which will use the used region of the buffer
907 * (if using a buffer list) or will use the internal region (if a single
908 * buffer I/O is requested).
910 * Nothing can be NULL, and the done event must list at least one buffer
911 * on the buffer linked list for this function to be meaningful.
913 static void
914 build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
915 struct msghdr *msg, char *cmsg, WSABUF *iov,
916 IoCompletionInfo *lpo)
918 unsigned int iovcount;
919 isc_buffer_t *buffer;
920 buflist_t *cpbuffer;
921 isc_region_t used;
922 size_t write_count;
923 size_t skip_count;
925 memset(msg, 0, sizeof(*msg));
927 memcpy(&msg->to_addr, &dev->address.type, dev->address.length);
928 msg->to_addr_len = dev->address.length;
930 buffer = ISC_LIST_HEAD(dev->bufferlist);
931 write_count = 0;
932 iovcount = 0;
935 * Single buffer I/O? Skip what we've done so far in this region.
937 if (buffer == NULL) {
938 write_count = dev->region.length - dev->n;
939 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
940 RUNTIME_CHECK(cpbuffer != NULL);
941 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count);
942 RUNTIME_CHECK(cpbuffer->buf != NULL);
944 socket_log(__LINE__, sock, NULL, TRACE,
945 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
946 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
947 cpbuffer->buf, write_count);
949 memcpy(cpbuffer->buf,(dev->region.base + dev->n), write_count);
950 cpbuffer->buflen = write_count;
951 ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link);
952 iov[0].buf = cpbuffer->buf;
953 iov[0].len = write_count;
954 iovcount = 1;
956 goto config;
960 * Multibuffer I/O.
961 * Skip the data in the buffer list that we have already written.
963 skip_count = dev->n;
964 while (buffer != NULL) {
965 REQUIRE(ISC_BUFFER_VALID(buffer));
966 if (skip_count < isc_buffer_usedlength(buffer))
967 break;
968 skip_count -= isc_buffer_usedlength(buffer);
969 buffer = ISC_LIST_NEXT(buffer, link);
972 while (buffer != NULL) {
973 INSIST(iovcount < MAXSCATTERGATHER_SEND);
975 isc_buffer_usedregion(buffer, &used);
977 if (used.length > 0) {
978 int uselen = used.length - skip_count;
979 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
980 RUNTIME_CHECK(cpbuffer != NULL);
981 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen);
982 RUNTIME_CHECK(cpbuffer->buf != NULL);
984 socket_log(__LINE__, sock, NULL, TRACE,
985 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
986 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
987 cpbuffer->buf, write_count);
989 memcpy(cpbuffer->buf,(used.base + skip_count), uselen);
990 cpbuffer->buflen = uselen;
991 iov[iovcount].buf = cpbuffer->buf;
992 iov[iovcount].len = used.length - skip_count;
993 write_count += uselen;
994 skip_count = 0;
995 iovcount++;
997 buffer = ISC_LIST_NEXT(buffer, link);
1000 INSIST(skip_count == 0);
1002 config:
1003 msg->msg_iov = iov;
1004 msg->msg_iovlen = iovcount;
1005 msg->msg_totallen = write_count;
1008 static void
1009 set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock,
1010 isc_socketevent_t *dev)
1012 if (sock->type == isc_sockettype_udp) {
1013 if (address != NULL)
1014 dev->address = *address;
1015 else
1016 dev->address = sock->address;
1017 } else if (sock->type == isc_sockettype_tcp) {
1018 INSIST(address == NULL);
1019 dev->address = sock->address;
1023 static void
1024 destroy_socketevent(isc_event_t *event) {
1025 isc_socketevent_t *ev = (isc_socketevent_t *)event;
1027 INSIST(ISC_LIST_EMPTY(ev->bufferlist));
1029 (ev->destroy)(event);
1032 static isc_socketevent_t *
1033 allocate_socketevent(isc_socket_t *sock, isc_eventtype_t eventtype,
1034 isc_taskaction_t action, const void *arg)
1036 isc_socketevent_t *ev;
1038 ev = (isc_socketevent_t *)isc_event_allocate(sock->manager->mctx,
1039 sock, eventtype,
1040 action, arg,
1041 sizeof(*ev));
1042 if (ev == NULL)
1043 return (NULL);
1045 ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set
1046 ISC_LINK_INIT(ev, ev_link);
1047 ISC_LIST_INIT(ev->bufferlist);
1048 ev->region.base = NULL;
1049 ev->n = 0;
1050 ev->offset = 0;
1051 ev->attributes = 0;
1052 ev->destroy = ev->ev_destroy;
1053 ev->ev_destroy = destroy_socketevent;
1055 return (ev);
1058 #if defined(ISC_SOCKET_DEBUG)
1059 static void
1060 dump_msg(struct msghdr *msg, isc_socket_t *sock) {
1061 unsigned int i;
1063 printf("MSGHDR %p, Socket #: %u\n", msg, sock->fd);
1064 printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen);
1065 printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen);
1066 for (i = 0; i < (unsigned int)msg->msg_iovlen; i++)
1067 printf("\t\t%d\tbase %p, len %d\n", i,
1068 msg->msg_iov[i].buf,
1069 msg->msg_iov[i].len);
1071 #endif
1074 * map the error code
1077 map_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno,
1078 char *errorstring, size_t bufsize) {
1080 int doreturn;
1081 switch (windows_errno) {
1082 case WSAECONNREFUSED:
1083 *isc_errno = ISC_R_CONNREFUSED;
1084 if (sock->connected)
1085 doreturn = DOIO_HARD;
1086 else
1087 doreturn = DOIO_SOFT;
1088 break;
1089 case WSAENETUNREACH:
1090 case ERROR_NETWORK_UNREACHABLE:
1091 *isc_errno = ISC_R_NETUNREACH;
1092 if (sock->connected)
1093 doreturn = DOIO_HARD;
1094 else
1095 doreturn = DOIO_SOFT;
1096 break;
1097 case ERROR_PORT_UNREACHABLE:
1098 case ERROR_HOST_UNREACHABLE:
1099 case WSAEHOSTUNREACH:
1100 *isc_errno = ISC_R_HOSTUNREACH;
1101 if (sock->connected)
1102 doreturn = DOIO_HARD;
1103 else
1104 doreturn = DOIO_SOFT;
1105 break;
1106 case WSAENETDOWN:
1107 *isc_errno = ISC_R_NETDOWN;
1108 if (sock->connected)
1109 doreturn = DOIO_HARD;
1110 else
1111 doreturn = DOIO_SOFT;
1112 break;
1113 case WSAEHOSTDOWN:
1114 *isc_errno = ISC_R_HOSTDOWN;
1115 if (sock->connected)
1116 doreturn = DOIO_HARD;
1117 else
1118 doreturn = DOIO_SOFT;
1119 break;
1120 case WSAEACCES:
1121 *isc_errno = ISC_R_NOPERM;
1122 if (sock->connected)
1123 doreturn = DOIO_HARD;
1124 else
1125 doreturn = DOIO_SOFT;
1126 break;
1127 case WSAECONNRESET:
1128 case WSAENETRESET:
1129 case WSAECONNABORTED:
1130 case WSAEDISCON:
1131 *isc_errno = ISC_R_CONNECTIONRESET;
1132 if (sock->connected)
1133 doreturn = DOIO_HARD;
1134 else
1135 doreturn = DOIO_SOFT;
1136 break;
1137 case WSAENOTCONN:
1138 *isc_errno = ISC_R_NOTCONNECTED;
1139 if (sock->connected)
1140 doreturn = DOIO_HARD;
1141 else
1142 doreturn = DOIO_SOFT;
1143 break;
1144 case ERROR_OPERATION_ABORTED:
1145 case ERROR_CONNECTION_ABORTED:
1146 case ERROR_REQUEST_ABORTED:
1147 *isc_errno = ISC_R_CONNECTIONRESET;
1148 doreturn = DOIO_HARD;
1149 break;
1150 case WSAENOBUFS:
1151 *isc_errno = ISC_R_NORESOURCES;
1152 doreturn = DOIO_HARD;
1153 break;
1154 case WSAEAFNOSUPPORT:
1155 *isc_errno = ISC_R_FAMILYNOSUPPORT;
1156 doreturn = DOIO_HARD;
1157 break;
1158 case WSAEADDRNOTAVAIL:
1159 *isc_errno = ISC_R_ADDRNOTAVAIL;
1160 doreturn = DOIO_HARD;
1161 break;
1162 case WSAEDESTADDRREQ:
1163 *isc_errno = ISC_R_BADADDRESSFORM;
1164 doreturn = DOIO_HARD;
1165 break;
1166 case ERROR_NETNAME_DELETED:
1167 *isc_errno = ISC_R_NETDOWN;
1168 doreturn = DOIO_HARD;
1169 break;
1170 default:
1171 *isc_errno = ISC_R_IOERROR;
1172 doreturn = DOIO_HARD;
1173 break;
1175 if (doreturn == DOIO_HARD) {
1176 isc__strerror(windows_errno, errorstring, bufsize);
1178 return (doreturn);
1181 static void
1182 fill_recv(isc_socket_t *sock, isc_socketevent_t *dev) {
1183 isc_region_t r;
1184 int copylen;
1185 isc_buffer_t *buffer;
1187 INSIST(dev->n < dev->minimum);
1188 INSIST(sock->recvbuf.remaining > 0);
1189 INSIST(sock->pending_recv == 0);
1191 if (sock->type == isc_sockettype_udp) {
1192 dev->address.length = sock->recvbuf.from_addr_len;
1193 memcpy(&dev->address.type, &sock->recvbuf.from_addr,
1194 sock->recvbuf.from_addr_len);
1195 if (isc_sockaddr_getport(&dev->address) == 0) {
1196 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1197 socket_log(__LINE__, sock, &dev->address, IOEVENT,
1198 isc_msgcat, ISC_MSGSET_SOCKET,
1199 ISC_MSG_ZEROPORT,
1200 "dropping source port zero packet");
1202 sock->recvbuf.remaining = 0;
1203 return;
1205 } else if (sock->type == isc_sockettype_tcp) {
1206 dev->address = sock->address;
1210 * Run through the list of buffers we were given, and find the
1211 * first one with space. Once it is found, loop through, filling
1212 * the buffers as much as possible.
1214 buffer = ISC_LIST_HEAD(dev->bufferlist);
1215 if (buffer != NULL) { // Multi-buffer receive
1216 while (buffer != NULL && sock->recvbuf.remaining > 0) {
1217 REQUIRE(ISC_BUFFER_VALID(buffer));
1218 if (isc_buffer_availablelength(buffer) > 0) {
1219 isc_buffer_availableregion(buffer, &r);
1220 copylen = min(r.length, sock->recvbuf.remaining);
1221 memcpy(r.base, sock->recvbuf.consume_position, copylen);
1222 sock->recvbuf.consume_position += copylen;
1223 sock->recvbuf.remaining -= copylen;
1224 isc_buffer_add(buffer, copylen);
1225 dev->n += copylen;
1227 buffer = ISC_LIST_NEXT(buffer, link);
1229 } else { // Single-buffer receive
1230 copylen = min(dev->region.length - dev->n, sock->recvbuf.remaining);
1231 memcpy(dev->region.base + dev->n, sock->recvbuf.consume_position, copylen);
1232 sock->recvbuf.consume_position += copylen;
1233 sock->recvbuf.remaining -= copylen;
1234 dev->n += copylen;
1238 * UDP receives are all-consuming. That is, if we have 4k worth of
1239 * data in our receive buffer, and the caller only gave us
1240 * 1k of space, we will toss the remaining 3k of data. TCP
1241 * will keep the extra data around and use it for later requests.
1243 if (sock->type == isc_sockettype_udp)
1244 sock->recvbuf.remaining = 0;
1248 * Copy out as much data from the internal buffer to done events.
1249 * As each done event is filled, send it along its way.
1251 static void
1252 completeio_recv(isc_socket_t *sock)
1254 isc_socketevent_t *dev;
1257 * If we are in the process of filling our buffer, we cannot
1258 * touch it yet, so don't.
1260 if (sock->pending_recv > 0)
1261 return;
1263 while (sock->recvbuf.remaining > 0 && !ISC_LIST_EMPTY(sock->recv_list)) {
1264 dev = ISC_LIST_HEAD(sock->recv_list);
1267 * See if we have sufficient data in our receive buffer
1268 * to handle this. If we do, copy out the data.
1270 fill_recv(sock, dev);
1273 * Did we satisfy it?
1275 if (dev->n >= dev->minimum) {
1276 dev->result = ISC_R_SUCCESS;
1277 send_recvdone_event(sock, &dev);
1283 * Returns:
1284 * DOIO_SUCCESS The operation succeeded. dev->result contains
1285 * ISC_R_SUCCESS.
1287 * DOIO_HARD A hard or unexpected I/O error was encountered.
1288 * dev->result contains the appropriate error.
1290 * DOIO_SOFT A soft I/O error was encountered. No senddone
1291 * event was sent. The operation should be retried.
1293 * No other return values are possible.
1295 static int
1296 completeio_send(isc_socket_t *sock, isc_socketevent_t *dev,
1297 struct msghdr *messagehdr, int cc, int send_errno)
1299 char addrbuf[ISC_SOCKADDR_FORMATSIZE];
1300 char strbuf[ISC_STRERRORSIZE];
1302 if (send_errno != 0) {
1303 if (SOFT_ERROR(send_errno))
1304 return (DOIO_SOFT);
1306 return (map_socket_error(sock, send_errno, &dev->result,
1307 strbuf, sizeof(strbuf)));
1310 * The other error types depend on whether or not the
1311 * socket is UDP or TCP. If it is UDP, some errors
1312 * that we expect to be fatal under TCP are merely
1313 * annoying, and are really soft errors.
1315 * However, these soft errors are still returned as
1316 * a status.
1318 isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf));
1319 isc__strerror(send_errno, strbuf, sizeof(strbuf));
1320 UNEXPECTED_ERROR(__FILE__, __LINE__, "completeio_send: %s: %s",
1321 addrbuf, strbuf);
1322 dev->result = isc__errno2result(send_errno);
1323 return (DOIO_HARD);
1327 * If we write less than we expected, update counters, poke.
1329 dev->n += cc;
1330 if (cc != messagehdr->msg_totallen)
1331 return (DOIO_SOFT);
1334 * Exactly what we wanted to write. We're done with this
1335 * entry. Post its completion event.
1337 dev->result = ISC_R_SUCCESS;
1338 return (DOIO_SUCCESS);
1341 static int
1342 startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes,
1343 int *send_errno)
1345 char *cmsg = NULL;
1346 char strbuf[ISC_STRERRORSIZE];
1347 IoCompletionInfo *lpo;
1348 int status;
1349 struct msghdr *msghdr;
1351 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
1352 HEAP_ZERO_MEMORY,
1353 sizeof(IoCompletionInfo));
1354 RUNTIME_CHECK(lpo != NULL);
1355 lpo->request_type = SOCKET_SEND;
1356 lpo->dev = dev;
1357 msghdr = &lpo->messagehdr;
1358 memset(msghdr, 0, sizeof(struct msghdr));
1359 ISC_LIST_INIT(lpo->bufferlist);
1361 build_msghdr_send(sock, dev, msghdr, cmsg, sock->iov, lpo);
1363 *nbytes = internal_sendmsg(sock, lpo, msghdr, 0, send_errno);
1365 if (*nbytes < 0) {
1367 * I/O has been initiated
1368 * completion will be through the completion port
1370 if (PENDING_ERROR(*send_errno)) {
1371 status = DOIO_PENDING;
1372 goto done;
1375 if (SOFT_ERROR(*send_errno)) {
1376 status = DOIO_SOFT;
1377 goto done;
1381 * If we got this far then something is wrong
1383 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1384 isc__strerror(*send_errno, strbuf, sizeof(strbuf));
1385 socket_log(__LINE__, sock, NULL, IOEVENT,
1386 isc_msgcat, ISC_MSGSET_SOCKET,
1387 ISC_MSG_INTERNALSEND,
1388 "startio_send: internal_sendmsg(%d) %d "
1389 "bytes, err %d/%s",
1390 sock->fd, *nbytes, *send_errno, strbuf);
1392 goto done;
1394 dev->result = ISC_R_SUCCESS;
1395 status = DOIO_SOFT;
1396 done:
1397 _set_state(sock, SOCK_DATA);
1398 return (status);
1401 static isc_result_t
1402 allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
1403 isc_socket_t **socketp) {
1404 isc_socket_t *sock;
1405 isc_result_t result;
1407 sock = isc_mem_get(manager->mctx, sizeof(*sock));
1409 if (sock == NULL)
1410 return (ISC_R_NOMEMORY);
1412 sock->magic = 0;
1413 sock->references = 0;
1415 sock->manager = manager;
1416 sock->type = type;
1417 sock->fd = INVALID_SOCKET;
1419 ISC_LINK_INIT(sock, link);
1422 * set up list of readers and writers to be initially empty
1424 ISC_LIST_INIT(sock->recv_list);
1425 ISC_LIST_INIT(sock->send_list);
1426 ISC_LIST_INIT(sock->accept_list);
1427 sock->connect_ev = NULL;
1428 sock->pending_accept = 0;
1429 sock->pending_recv = 0;
1430 sock->pending_send = 0;
1431 sock->pending_iocp = 0;
1432 sock->listener = 0;
1433 sock->connected = 0;
1434 sock->pending_connect = 0;
1435 sock->bound = 0;
1436 memset(sock->name, 0, sizeof(sock->name)); // zero the name field
1437 _set_state(sock, SOCK_INITIALIZED);
1439 sock->recvbuf.len = 65536;
1440 sock->recvbuf.consume_position = sock->recvbuf.base;
1441 sock->recvbuf.remaining = 0;
1442 sock->recvbuf.base = isc_mem_get(manager->mctx, sock->recvbuf.len); // max buffer size
1443 if (sock->recvbuf.base == NULL) {
1444 sock->magic = 0;
1445 goto error;
1449 * initialize the lock
1451 result = isc_mutex_init(&sock->lock);
1452 if (result != ISC_R_SUCCESS) {
1453 sock->magic = 0;
1454 isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len);
1455 sock->recvbuf.base = NULL;
1456 goto error;
1459 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1460 "allocated");
1462 sock->magic = SOCKET_MAGIC;
1463 *socketp = sock;
1465 return (ISC_R_SUCCESS);
1467 error:
1468 isc_mem_put(manager->mctx, sock, sizeof(*sock));
1470 return (result);
1474 * Verify that the socket state is consistent.
1476 static void
1477 consistent(isc_socket_t *sock) {
1479 isc_socketevent_t *dev;
1480 isc_socket_newconnev_t *nev;
1481 unsigned int count;
1482 char *crash_reason;
1483 isc_boolean_t crash = ISC_FALSE;
1485 REQUIRE(sock->pending_iocp == sock->pending_recv + sock->pending_send
1486 + sock->pending_accept + sock->pending_connect);
1488 dev = ISC_LIST_HEAD(sock->send_list);
1489 count = 0;
1490 while (dev != NULL) {
1491 count++;
1492 dev = ISC_LIST_NEXT(dev, ev_link);
1494 if (count > sock->pending_send) {
1495 crash = ISC_TRUE;
1496 crash_reason = "send_list > sock->pending_send";
1499 nev = ISC_LIST_HEAD(sock->accept_list);
1500 count = 0;
1501 while (nev != NULL) {
1502 count++;
1503 nev = ISC_LIST_NEXT(nev, ev_link);
1505 if (count > sock->pending_accept) {
1506 crash = ISC_TRUE;
1507 crash_reason = "send_list > sock->pending_send";
1510 if (crash) {
1511 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1512 ISC_MSG_DESTROYING, "SOCKET INCONSISTENT: %s",
1513 crash_reason);
1514 sock_dump(sock);
1515 INSIST(crash == ISC_FALSE);
1520 * Maybe free the socket.
1522 * This function will verify tht the socket is no longer in use in any way,
1523 * either internally or externally. This is the only place where this
1524 * check is to be made; if some bit of code believes that IT is done with
1525 * the socket (e.g., some reference counter reaches zero), it should call
1526 * this function.
1528 * When calling this function, the socket must be locked, and the manager
1529 * must be unlocked.
1531 * When this function returns, *socketp will be NULL. No tricks to try
1532 * to hold on to this pointer are allowed.
1534 static void
1535 maybe_free_socket(isc_socket_t **socketp, int lineno) {
1536 isc_socket_t *sock = *socketp;
1537 *socketp = NULL;
1539 INSIST(VALID_SOCKET(sock));
1540 CONSISTENT(sock);
1542 if (sock->pending_iocp > 0
1543 || sock->pending_recv > 0
1544 || sock->pending_send > 0
1545 || sock->pending_accept > 0
1546 || sock->references > 0
1547 || sock->pending_connect == 1
1548 || !ISC_LIST_EMPTY(sock->recv_list)
1549 || !ISC_LIST_EMPTY(sock->send_list)
1550 || !ISC_LIST_EMPTY(sock->accept_list)
1551 || sock->fd != INVALID_SOCKET) {
1552 UNLOCK(&sock->lock);
1553 return;
1555 UNLOCK(&sock->lock);
1557 free_socket(&sock, lineno);
1560 void
1561 free_socket(isc_socket_t **sockp, int lineno) {
1562 isc_socketmgr_t *manager;
1563 isc_socket_t *sock = *sockp;
1564 *sockp = NULL;
1566 manager = sock->manager;
1569 * Seems we can free the socket after all.
1571 manager = sock->manager;
1572 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1573 ISC_MSG_DESTROYING, "freeing socket line %d fd %d lock %p semaphore %p",
1574 lineno, sock->fd, &sock->lock, sock->lock.LockSemaphore);
1576 sock->magic = 0;
1577 DESTROYLOCK(&sock->lock);
1579 if (sock->recvbuf.base != NULL)
1580 isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len);
1582 LOCK(&manager->lock);
1583 if (ISC_LINK_LINKED(sock, link))
1584 ISC_LIST_UNLINK(manager->socklist, sock, link);
1585 isc_mem_put(manager->mctx, sock, sizeof(*sock));
1587 if (ISC_LIST_EMPTY(manager->socklist))
1588 SIGNAL(&manager->shutdown_ok);
1589 UNLOCK(&manager->lock);
1593 * Create a new 'type' socket managed by 'manager'. Events
1594 * will be posted to 'task' and when dispatched 'action' will be
1595 * called with 'arg' as the arg value. The new socket is returned
1596 * in 'socketp'.
1598 isc_result_t
1599 isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1600 isc_socket_t **socketp) {
1601 isc_socket_t *sock = NULL;
1602 isc_result_t result;
1603 #if defined(USE_CMSG)
1604 int on = 1;
1605 #endif
1606 #if defined(SO_RCVBUF)
1607 ISC_SOCKADDR_LEN_T optlen;
1608 int size;
1609 #endif
1610 int socket_errno;
1611 char strbuf[ISC_STRERRORSIZE];
1613 REQUIRE(VALID_MANAGER(manager));
1614 REQUIRE(socketp != NULL && *socketp == NULL);
1615 REQUIRE(type != isc_sockettype_fdwatch);
1617 result = allocate_socket(manager, type, &sock);
1618 if (result != ISC_R_SUCCESS)
1619 return (result);
1621 sock->pf = pf;
1622 switch (type) {
1623 case isc_sockettype_udp:
1624 sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP);
1625 if (sock->fd != INVALID_SOCKET) {
1626 result = connection_reset_fix(sock->fd);
1627 if (result != ISC_R_SUCCESS) {
1628 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1629 "closed %d %d %d con_reset_fix_failed",
1630 sock->pending_recv, sock->pending_send,
1631 sock->references);
1632 closesocket(sock->fd);
1633 _set_state(sock, SOCK_CLOSED);
1634 sock->fd = INVALID_SOCKET;
1635 free_socket(&sock, __LINE__);
1636 return (result);
1639 break;
1640 case isc_sockettype_tcp:
1641 sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP);
1642 break;
1645 if (sock->fd == INVALID_SOCKET) {
1646 socket_errno = WSAGetLastError();
1647 free_socket(&sock, __LINE__);
1649 switch (socket_errno) {
1650 case WSAEMFILE:
1651 case WSAENOBUFS:
1652 return (ISC_R_NORESOURCES);
1654 case WSAEPROTONOSUPPORT:
1655 case WSAEPFNOSUPPORT:
1656 case WSAEAFNOSUPPORT:
1657 return (ISC_R_FAMILYNOSUPPORT);
1659 default:
1660 isc__strerror(socket_errno, strbuf, sizeof(strbuf));
1661 UNEXPECTED_ERROR(__FILE__, __LINE__,
1662 "socket() %s: %s",
1663 isc_msgcat_get(isc_msgcat,
1664 ISC_MSGSET_GENERAL,
1665 ISC_MSG_FAILED,
1666 "failed"),
1667 strbuf);
1668 return (ISC_R_UNEXPECTED);
1672 result = make_nonblock(sock->fd);
1673 if (result != ISC_R_SUCCESS) {
1674 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1675 "closed %d %d %d make_nonblock_failed",
1676 sock->pending_recv, sock->pending_send,
1677 sock->references);
1678 closesocket(sock->fd);
1679 sock->fd = INVALID_SOCKET;
1680 free_socket(&sock, __LINE__);
1681 return (result);
1685 #if defined(USE_CMSG) || defined(SO_RCVBUF)
1686 if (type == isc_sockettype_udp) {
1688 #if defined(USE_CMSG)
1689 #if defined(ISC_PLATFORM_HAVEIPV6)
1690 #ifdef IPV6_RECVPKTINFO
1691 /* 2292bis */
1692 if ((pf == AF_INET6)
1693 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1694 (void *)&on, sizeof(on)) < 0)) {
1695 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1696 UNEXPECTED_ERROR(__FILE__, __LINE__,
1697 "setsockopt(%d, IPV6_RECVPKTINFO) "
1698 "%s: %s", sock->fd,
1699 isc_msgcat_get(isc_msgcat,
1700 ISC_MSGSET_GENERAL,
1701 ISC_MSG_FAILED,
1702 "failed"),
1703 strbuf);
1705 #else
1706 /* 2292 */
1707 if ((pf == AF_INET6)
1708 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO,
1709 (void *)&on, sizeof(on)) < 0)) {
1710 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1711 UNEXPECTED_ERROR(__FILE__, __LINE__,
1712 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1713 sock->fd,
1714 isc_msgcat_get(isc_msgcat,
1715 ISC_MSGSET_GENERAL,
1716 ISC_MSG_FAILED,
1717 "failed"),
1718 strbuf);
1720 #endif /* IPV6_RECVPKTINFO */
1721 #ifdef IPV6_USE_MIN_MTU /*2292bis, not too common yet*/
1722 /* use minimum MTU */
1723 if (pf == AF_INET6) {
1724 (void)setsockopt(sock->fd, IPPROTO_IPV6,
1725 IPV6_USE_MIN_MTU,
1726 (void *)&on, sizeof(on));
1728 #endif
1729 #endif /* ISC_PLATFORM_HAVEIPV6 */
1730 #endif /* defined(USE_CMSG) */
1732 #if defined(SO_RCVBUF)
1733 optlen = sizeof(size);
1734 if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1735 (void *)&size, &optlen) >= 0 &&
1736 size < RCVBUFSIZE) {
1737 size = RCVBUFSIZE;
1738 (void)setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1739 (void *)&size, sizeof(size));
1741 #endif
1744 #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */
1746 _set_state(sock, SOCK_OPEN);
1747 sock->references = 1;
1748 *socketp = sock;
1750 iocompletionport_update(sock);
1753 * Note we don't have to lock the socket like we normally would because
1754 * there are no external references to it yet.
1756 LOCK(&manager->lock);
1757 ISC_LIST_APPEND(manager->socklist, sock, link);
1758 InterlockedIncrement(&manager->totalSockets);
1759 UNLOCK(&manager->lock);
1761 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1762 ISC_MSG_CREATED, "created %u type %u", sock->fd, type);
1764 return (ISC_R_SUCCESS);
1767 isc_result_t
1768 isc_socket_open(isc_socket_t *sock) {
1769 REQUIRE(VALID_SOCKET(sock));
1770 REQUIRE(sock->type != isc_sockettype_fdwatch);
1772 return (ISC_R_NOTIMPLEMENTED);
1776 * Attach to a socket. Caller must explicitly detach when it is done.
1778 void
1779 isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp) {
1780 REQUIRE(VALID_SOCKET(sock));
1781 REQUIRE(socketp != NULL && *socketp == NULL);
1783 LOCK(&sock->lock);
1784 CONSISTENT(sock);
1785 sock->references++;
1786 UNLOCK(&sock->lock);
1788 *socketp = sock;
1792 * Dereference a socket. If this is the last reference to it, clean things
1793 * up by destroying the socket.
1795 void
1796 isc_socket_detach(isc_socket_t **socketp) {
1797 isc_socket_t *sock;
1798 isc_boolean_t kill_socket = ISC_FALSE;
1800 REQUIRE(socketp != NULL);
1801 sock = *socketp;
1802 REQUIRE(VALID_SOCKET(sock));
1803 REQUIRE(sock->type != isc_sockettype_fdwatch);
1805 LOCK(&sock->lock);
1806 CONSISTENT(sock);
1807 REQUIRE(sock->references > 0);
1808 sock->references--;
1810 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1811 "detach_socket %d %d %d",
1812 sock->pending_recv, sock->pending_send,
1813 sock->references);
1815 if (sock->references == 0 && sock->fd != INVALID_SOCKET) {
1816 closesocket(sock->fd);
1817 sock->fd = INVALID_SOCKET;
1818 _set_state(sock, SOCK_CLOSED);
1821 maybe_free_socket(&sock, __LINE__);
1823 *socketp = NULL;
1826 isc_result_t
1827 isc_socket_close(isc_socket_t *sock) {
1828 REQUIRE(VALID_SOCKET(sock));
1829 REQUIRE(sock->type != isc_sockettype_fdwatch);
1831 return (ISC_R_NOTIMPLEMENTED);
1835 * Dequeue an item off the given socket's read queue, set the result code
1836 * in the done event to the one provided, and send it to the task it was
1837 * destined for.
1839 * If the event to be sent is on a list, remove it before sending. If
1840 * asked to, send and detach from the task as well.
1842 * Caller must have the socket locked if the event is attached to the socket.
1844 static void
1845 send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1846 isc_task_t *task;
1848 task = (*dev)->ev_sender;
1849 (*dev)->ev_sender = sock;
1851 if (ISC_LINK_LINKED(*dev, ev_link))
1852 ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link);
1854 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1855 == ISC_SOCKEVENTATTR_ATTACHED)
1856 isc_task_sendanddetach(&task, (isc_event_t **)dev);
1857 else
1858 isc_task_send(task, (isc_event_t **)dev);
1860 CONSISTENT(sock);
1864 * See comments for send_recvdone_event() above.
1866 static void
1867 send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1868 isc_task_t *task;
1870 INSIST(dev != NULL && *dev != NULL);
1872 task = (*dev)->ev_sender;
1873 (*dev)->ev_sender = sock;
1875 if (ISC_LINK_LINKED(*dev, ev_link))
1876 ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link);
1878 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1879 == ISC_SOCKEVENTATTR_ATTACHED)
1880 isc_task_sendanddetach(&task, (isc_event_t **)dev);
1881 else
1882 isc_task_send(task, (isc_event_t **)dev);
1884 CONSISTENT(sock);
1888 * See comments for send_recvdone_event() above.
1890 static void
1891 send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev) {
1892 isc_task_t *task;
1894 INSIST(adev != NULL && *adev != NULL);
1896 task = (*adev)->ev_sender;
1897 (*adev)->ev_sender = sock;
1899 if (ISC_LINK_LINKED(*adev, ev_link))
1900 ISC_LIST_DEQUEUE(sock->accept_list, *adev, ev_link);
1902 isc_task_sendanddetach(&task, (isc_event_t **)adev);
1904 CONSISTENT(sock);
1908 * See comments for send_recvdone_event() above.
1910 static void
1911 send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev) {
1912 isc_task_t *task;
1914 INSIST(cdev != NULL && *cdev != NULL);
1916 task = (*cdev)->ev_sender;
1917 (*cdev)->ev_sender = sock;
1919 sock->connect_ev = NULL;
1921 isc_task_sendanddetach(&task, (isc_event_t **)cdev);
1923 CONSISTENT(sock);
1927 * On entry to this function, the event delivered is the internal
1928 * readable event, and the first item on the accept_list should be
1929 * the done event we want to send. If the list is empty, this is a no-op,
1930 * so just close the new connection, unlock, and return.
1932 * Note the socket is locked before entering here
1934 static void
1935 internal_accept(isc_socket_t *sock, IoCompletionInfo *lpo, int accept_errno) {
1936 isc_socket_newconnev_t *adev;
1937 isc_result_t result = ISC_R_SUCCESS;
1938 isc_socket_t *nsock;
1939 struct sockaddr *localaddr;
1940 int localaddr_len = sizeof(*localaddr);
1941 struct sockaddr *remoteaddr;
1942 int remoteaddr_len = sizeof(*remoteaddr);
1944 INSIST(VALID_SOCKET(sock));
1945 LOCK(&sock->lock);
1946 CONSISTENT(sock);
1948 socket_log(__LINE__, sock, NULL, TRACE,
1949 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
1950 "internal_accept called");
1952 INSIST(sock->listener);
1954 INSIST(sock->pending_iocp > 0);
1955 sock->pending_iocp--;
1956 INSIST(sock->pending_accept > 0);
1957 sock->pending_accept--;
1959 adev = lpo->adev;
1962 * If the event is no longer in the list we can just return.
1964 if (!acceptdone_is_active(sock, adev))
1965 goto done;
1967 nsock = adev->newsocket;
1970 * Pull off the done event.
1972 ISC_LIST_UNLINK(sock->accept_list, adev, ev_link);
1975 * Extract the addresses from the socket, copy them into the structure,
1976 * and return the new socket.
1978 ISCGetAcceptExSockaddrs(lpo->acceptbuffer, 0,
1979 sizeof(SOCKADDR_STORAGE) + 16, sizeof(SOCKADDR_STORAGE) + 16,
1980 (LPSOCKADDR *)&localaddr, &localaddr_len,
1981 (LPSOCKADDR *)&remoteaddr, &remoteaddr_len);
1982 memcpy(&adev->address.type, remoteaddr, remoteaddr_len);
1983 adev->address.length = remoteaddr_len;
1984 nsock->address = adev->address;
1985 nsock->pf = adev->address.type.sa.sa_family;
1987 socket_log(__LINE__, nsock, &nsock->address, TRACE,
1988 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
1989 "internal_accept parent %p", sock);
1991 result = make_nonblock(adev->newsocket->fd);
1992 INSIST(result == ISC_R_SUCCESS);
1994 INSIST(setsockopt(nsock->fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT,
1995 (char *)&sock->fd, sizeof(sock->fd)) == 0);
1998 * Hook it up into the manager.
2000 nsock->bound = 1;
2001 nsock->connected = 1;
2002 _set_state(nsock, SOCK_OPEN);
2004 LOCK(&nsock->manager->lock);
2005 ISC_LIST_APPEND(nsock->manager->socklist, nsock, link);
2006 InterlockedIncrement(&nsock->manager->totalSockets);
2007 UNLOCK(&nsock->manager->lock);
2009 socket_log(__LINE__, sock, &nsock->address, CREATION,
2010 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2011 "accepted_connection new_socket %p fd %d",
2012 nsock, nsock->fd);
2014 adev->result = result;
2015 send_acceptdone_event(sock, &adev);
2017 done:
2018 CONSISTENT(sock);
2019 UNLOCK(&sock->lock);
2021 HeapFree(hHeapHandle, 0, lpo->acceptbuffer);
2022 lpo->acceptbuffer = NULL;
2026 * Called when a socket with a pending connect() finishes.
2027 * Note that the socket is locked before entering.
2029 static void
2030 internal_connect(isc_socket_t *sock, IoCompletionInfo *lpo, int connect_errno) {
2031 isc_socket_connev_t *cdev;
2032 char strbuf[ISC_STRERRORSIZE];
2034 INSIST(VALID_SOCKET(sock));
2036 LOCK(&sock->lock);
2038 INSIST(sock->pending_iocp > 0);
2039 sock->pending_iocp--;
2040 INSIST(sock->pending_connect == 1);
2041 sock->pending_connect = 0;
2044 * Has this event been canceled?
2046 cdev = lpo->cdev;
2047 if (!connectdone_is_active(sock, cdev)) {
2048 sock->pending_connect = 0;
2049 if (sock->fd != INVALID_SOCKET) {
2050 closesocket(sock->fd);
2051 sock->fd = INVALID_SOCKET;
2052 _set_state(sock, SOCK_CLOSED);
2054 CONSISTENT(sock);
2055 UNLOCK(&sock->lock);
2056 return;
2060 * Check possible Windows network event error status here.
2062 if (connect_errno != 0) {
2064 * If the error is SOFT, just try again on this
2065 * fd and pretend nothing strange happened.
2067 if (SOFT_ERROR(connect_errno) ||
2068 connect_errno == WSAEINPROGRESS) {
2069 sock->pending_connect = 1;
2070 CONSISTENT(sock);
2071 UNLOCK(&sock->lock);
2072 return;
2076 * Translate other errors into ISC_R_* flavors.
2078 switch (connect_errno) {
2079 #define ERROR_MATCH(a, b) case a: cdev->result = b; break;
2080 ERROR_MATCH(WSAEACCES, ISC_R_NOPERM);
2081 ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
2082 ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
2083 ERROR_MATCH(WSAECONNREFUSED, ISC_R_CONNREFUSED);
2084 ERROR_MATCH(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH);
2085 ERROR_MATCH(WSAEHOSTDOWN, ISC_R_HOSTDOWN);
2086 ERROR_MATCH(WSAENETUNREACH, ISC_R_NETUNREACH);
2087 ERROR_MATCH(WSAENETDOWN, ISC_R_NETDOWN);
2088 ERROR_MATCH(WSAENOBUFS, ISC_R_NORESOURCES);
2089 ERROR_MATCH(WSAECONNRESET, ISC_R_CONNECTIONRESET);
2090 ERROR_MATCH(WSAECONNABORTED, ISC_R_CONNECTIONRESET);
2091 ERROR_MATCH(WSAETIMEDOUT, ISC_R_TIMEDOUT);
2092 #undef ERROR_MATCH
2093 default:
2094 cdev->result = ISC_R_UNEXPECTED;
2095 isc__strerror(connect_errno, strbuf, sizeof(strbuf));
2096 UNEXPECTED_ERROR(__FILE__, __LINE__,
2097 "internal_connect: connect() %s",
2098 strbuf);
2100 } else {
2101 INSIST(setsockopt(sock->fd, SOL_SOCKET, SO_UPDATE_CONNECT_CONTEXT, NULL, 0) == 0);
2102 cdev->result = ISC_R_SUCCESS;
2103 sock->connected = 1;
2104 socket_log(__LINE__, sock, &sock->address, IOEVENT,
2105 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2106 "internal_connect: success");
2109 send_connectdone_event(sock, &cdev);
2111 UNLOCK(&sock->lock);
2115 * Loop through the socket, returning ISC_R_EOF for each done event pending.
2117 static void
2118 send_recvdone_abort(isc_socket_t *sock, isc_result_t result) {
2119 isc_socketevent_t *dev;
2121 while (!ISC_LIST_EMPTY(sock->recv_list)) {
2122 dev = ISC_LIST_HEAD(sock->recv_list);
2123 dev->result = result;
2124 send_recvdone_event(sock, &dev);
2129 * Take the data we received in our private buffer, and if any recv() calls on
2130 * our list are satisfied, send the corresponding done event.
2132 * If we need more data (there are still items on the recv_list after we consume all
2133 * our data) then arrange for another system recv() call to fill our buffers.
2135 static void
2136 internal_recv(isc_socket_t *sock, int nbytes)
2138 INSIST(VALID_SOCKET(sock));
2140 LOCK(&sock->lock);
2141 CONSISTENT(sock);
2143 socket_log(__LINE__, sock, NULL, IOEVENT,
2144 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV,
2145 "internal_recv: %d bytes received", nbytes);
2148 * If we got here, the I/O operation succeeded. However, we might still have removed this
2149 * event from our notification list (or never placed it on it due to immediate completion.)
2150 * Handle the reference counting here, and handle the cancellation event just after.
2152 INSIST(sock->pending_iocp > 0);
2153 sock->pending_iocp--;
2154 INSIST(sock->pending_recv > 0);
2155 sock->pending_recv--;
2158 * The only way we could have gotten here is that our I/O has successfully completed.
2159 * Update our pointers, and move on. The only odd case here is that we might not
2160 * have received enough data on a TCP stream to satisfy the minimum requirements. If
2161 * this is the case, we will re-issue the recv() call for what we need.
2163 * We do check for a recv() of 0 bytes on a TCP stream. This means the remote end
2164 * has closed.
2166 if (nbytes == 0 && sock->type == isc_sockettype_tcp) {
2167 send_recvdone_abort(sock, ISC_R_EOF);
2168 maybe_free_socket(&sock, __LINE__);
2169 return;
2171 sock->recvbuf.remaining = nbytes;
2172 sock->recvbuf.consume_position = sock->recvbuf.base;
2173 completeio_recv(sock);
2176 * If there are more receivers waiting for data, queue another receive
2177 * here.
2179 queue_receive_request(sock);
2182 * Unlock and/or destroy if we are the last thing this socket has left to do.
2184 maybe_free_socket(&sock, __LINE__);
2187 static void
2188 internal_send(isc_socket_t *sock, isc_socketevent_t *dev,
2189 struct msghdr *messagehdr, int nbytes, int send_errno, IoCompletionInfo *lpo)
2191 buflist_t *buffer;
2194 * Find out what socket this is and lock it.
2196 INSIST(VALID_SOCKET(sock));
2198 LOCK(&sock->lock);
2199 CONSISTENT(sock);
2201 socket_log(__LINE__, sock, NULL, IOEVENT,
2202 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND,
2203 "internal_send: task got socket event %p", dev);
2205 buffer = ISC_LIST_HEAD(lpo->bufferlist);
2206 while (buffer != NULL) {
2207 ISC_LIST_DEQUEUE(lpo->bufferlist, buffer, link);
2209 socket_log(__LINE__, sock, NULL, TRACE,
2210 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2211 "free_buffer %p %p", buffer, buffer->buf);
2213 HeapFree(hHeapHandle, 0, buffer->buf);
2214 HeapFree(hHeapHandle, 0, buffer);
2215 buffer = ISC_LIST_HEAD(lpo->bufferlist);
2218 INSIST(sock->pending_iocp > 0);
2219 sock->pending_iocp--;
2220 INSIST(sock->pending_send > 0);
2221 sock->pending_send--;
2223 /* If the event is no longer in the list we can just return */
2224 if (!senddone_is_active(sock, dev))
2225 goto done;
2228 * Set the error code and send things on its way.
2230 switch (completeio_send(sock, dev, messagehdr, nbytes, send_errno)) {
2231 case DOIO_SOFT:
2232 break;
2233 case DOIO_HARD:
2234 case DOIO_SUCCESS:
2235 send_senddone_event(sock, &dev);
2236 break;
2239 done:
2240 maybe_free_socket(&sock, __LINE__);
2244 * These return if the done event passed in is on the list (or for connect, is
2245 * the one we're waiting for. Using these ensures we will not double-send an
2246 * event.
2248 static isc_boolean_t
2249 senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev)
2251 isc_socketevent_t *ldev;
2253 ldev = ISC_LIST_HEAD(sock->send_list);
2254 while (ldev != NULL && ldev != dev)
2255 ldev = ISC_LIST_NEXT(ldev, ev_link);
2257 return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
2260 static isc_boolean_t
2261 acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev)
2263 isc_socket_newconnev_t *ldev;
2265 ldev = ISC_LIST_HEAD(sock->accept_list);
2266 while (ldev != NULL && ldev != dev)
2267 ldev = ISC_LIST_NEXT(ldev, ev_link);
2269 return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
2272 static isc_boolean_t
2273 connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev)
2275 return (sock->connect_ev == dev ? ISC_TRUE : ISC_FALSE);
2279 * This is the I/O Completion Port Worker Function. It loops forever
2280 * waiting for I/O to complete and then forwards them for further
2281 * processing. There are a number of these in separate threads.
2283 static isc_threadresult_t WINAPI
2284 SocketIoThread(LPVOID ThreadContext) {
2285 isc_socketmgr_t *manager = ThreadContext;
2286 BOOL bSuccess = FALSE;
2287 DWORD nbytes;
2288 IoCompletionInfo *lpo = NULL;
2289 isc_socket_t *sock = NULL;
2290 int request;
2291 struct msghdr *messagehdr = NULL;
2292 int errval;
2293 char strbuf[ISC_STRERRORSIZE];
2294 int errstatus;
2296 REQUIRE(VALID_MANAGER(manager));
2299 * Set the thread priority high enough so I/O will
2300 * preempt normal recv packet processing, but not
2301 * higher than the timer sync thread.
2303 if (!SetThreadPriority(GetCurrentThread(),
2304 THREAD_PRIORITY_ABOVE_NORMAL)) {
2305 errval = GetLastError();
2306 isc__strerror(errval, strbuf, sizeof(strbuf));
2307 FATAL_ERROR(__FILE__, __LINE__,
2308 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2309 ISC_MSG_FAILED,
2310 "Can't set thread priority: %s"),
2311 strbuf);
2315 * Loop forever waiting on I/O Completions and then processing them
2317 while (TRUE) {
2318 bSuccess = GetQueuedCompletionStatus(manager->hIoCompletionPort,
2319 &nbytes, (LPDWORD)&sock,
2320 (LPWSAOVERLAPPED *)&lpo,
2321 INFINITE);
2322 if (lpo == NULL) /* Received request to exit */
2323 break;
2325 REQUIRE(VALID_SOCKET(sock));
2327 request = lpo->request_type;
2329 errstatus = 0;
2330 if (!bSuccess) {
2331 isc_result_t isc_result;
2334 * Did the I/O operation complete?
2336 errstatus = WSAGetLastError();
2337 isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__);
2339 LOCK(&sock->lock);
2340 CONSISTENT(sock);
2341 switch (request) {
2342 case SOCKET_RECV:
2343 INSIST(sock->pending_iocp > 0);
2344 sock->pending_iocp--;
2345 INSIST(sock->pending_recv > 0);
2346 sock->pending_recv--;
2347 send_recvdone_abort(sock, isc_result);
2348 if (isc_result == ISC_R_UNEXPECTED) {
2349 UNEXPECTED_ERROR(__FILE__, __LINE__,
2350 "SOCKET_RECV: Windows error code: %d, returning ISC error %d",
2351 errstatus, isc_result);
2353 break;
2355 case SOCKET_SEND:
2356 INSIST(sock->pending_iocp > 0);
2357 sock->pending_iocp--;
2358 INSIST(sock->pending_send > 0);
2359 sock->pending_send--;
2360 if (senddone_is_active(sock, lpo->dev)) {
2361 lpo->dev->result = isc_result;
2362 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2363 "canceled_send");
2364 send_senddone_event(sock, &lpo->dev);
2366 break;
2368 case SOCKET_ACCEPT:
2369 INSIST(sock->pending_iocp > 0);
2370 sock->pending_iocp--;
2371 INSIST(sock->pending_accept > 0);
2372 sock->pending_accept--;
2373 if (acceptdone_is_active(sock, lpo->adev)) {
2374 closesocket(lpo->adev->newsocket->fd);
2375 lpo->adev->newsocket->fd = INVALID_SOCKET;
2376 lpo->adev->newsocket->references--;
2377 free_socket(&lpo->adev->newsocket, __LINE__);
2378 lpo->adev->result = isc_result;
2379 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2380 "canceled_accept");
2381 send_acceptdone_event(sock, &lpo->adev);
2383 break;
2385 case SOCKET_CONNECT:
2386 INSIST(sock->pending_iocp > 0);
2387 sock->pending_iocp--;
2388 INSIST(sock->pending_connect == 1);
2389 sock->pending_connect = 0;
2390 if (connectdone_is_active(sock, lpo->cdev)) {
2391 lpo->cdev->result = isc_result;
2392 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2393 "canceled_connect");
2394 send_connectdone_event(sock, &lpo->cdev);
2396 break;
2398 maybe_free_socket(&sock, __LINE__);
2400 if (lpo != NULL)
2401 HeapFree(hHeapHandle, 0, lpo);
2402 continue;
2405 messagehdr = &lpo->messagehdr;
2407 switch (request) {
2408 case SOCKET_RECV:
2409 internal_recv(sock, nbytes);
2410 break;
2411 case SOCKET_SEND:
2412 internal_send(sock, lpo->dev, messagehdr, nbytes, errstatus, lpo);
2413 break;
2414 case SOCKET_ACCEPT:
2415 internal_accept(sock, lpo, errstatus);
2416 break;
2417 case SOCKET_CONNECT:
2418 internal_connect(sock, lpo, errstatus);
2419 break;
2422 if (lpo != NULL)
2423 HeapFree(hHeapHandle, 0, lpo);
2427 * Exit Completion Port Thread
2429 manager_log(manager, TRACE,
2430 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2431 ISC_MSG_EXITING, "SocketIoThread exiting"));
2432 return ((isc_threadresult_t)0);
2436 * Create a new socket manager.
2438 isc_result_t
2439 isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
2440 return (isc_socketmgr_create2(mctx, managerp, 0));
2443 isc_result_t
2444 isc_socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp,
2445 unsigned int maxsocks)
2447 isc_socketmgr_t *manager;
2448 isc_result_t result;
2450 REQUIRE(managerp != NULL && *managerp == NULL);
2452 if (maxsocks != 0)
2453 return (ISC_R_NOTIMPLEMENTED);
2455 manager = isc_mem_get(mctx, sizeof(*manager));
2456 if (manager == NULL)
2457 return (ISC_R_NOMEMORY);
2459 InitSockets();
2461 manager->magic = SOCKET_MANAGER_MAGIC;
2462 manager->mctx = NULL;
2463 manager->stats = NULL;
2464 ISC_LIST_INIT(manager->socklist);
2465 result = isc_mutex_init(&manager->lock);
2466 if (result != ISC_R_SUCCESS) {
2467 isc_mem_put(mctx, manager, sizeof(*manager));
2468 return (result);
2470 if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
2471 DESTROYLOCK(&manager->lock);
2472 isc_mem_put(mctx, manager, sizeof(*manager));
2473 UNEXPECTED_ERROR(__FILE__, __LINE__,
2474 "isc_condition_init() %s",
2475 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2476 ISC_MSG_FAILED, "failed"));
2477 return (ISC_R_UNEXPECTED);
2480 isc_mem_attach(mctx, &manager->mctx);
2482 iocompletionport_init(manager); /* Create the Completion Ports */
2484 manager->bShutdown = ISC_FALSE;
2485 manager->totalSockets = 0;
2486 manager->iocp_total = 0;
2488 *managerp = manager;
2490 return (ISC_R_SUCCESS);
2493 isc_result_t
2494 isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) {
2495 REQUIRE(VALID_MANAGER(manager));
2496 REQUIRE(nsockp != NULL);
2498 return (ISC_R_NOTIMPLEMENTED);
2501 void
2502 isc_socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats) {
2503 REQUIRE(VALID_MANAGER(manager));
2504 REQUIRE(ISC_LIST_EMPTY(manager->socklist));
2505 REQUIRE(manager->stats == NULL);
2506 REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max);
2508 isc_stats_attach(stats, &manager->stats);
2511 void
2512 isc_socketmgr_destroy(isc_socketmgr_t **managerp) {
2513 isc_socketmgr_t *manager;
2514 int i;
2515 isc_mem_t *mctx;
2518 * Destroy a socket manager.
2521 REQUIRE(managerp != NULL);
2522 manager = *managerp;
2523 REQUIRE(VALID_MANAGER(manager));
2525 LOCK(&manager->lock);
2528 * Wait for all sockets to be destroyed.
2530 while (!ISC_LIST_EMPTY(manager->socklist)) {
2531 manager_log(manager, CREATION,
2532 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2533 ISC_MSG_SOCKETSREMAIN,
2534 "sockets exist"));
2535 WAIT(&manager->shutdown_ok, &manager->lock);
2538 UNLOCK(&manager->lock);
2541 * Here, we need to had some wait code for the completion port
2542 * thread.
2544 signal_iocompletionport_exit(manager);
2545 manager->bShutdown = ISC_TRUE;
2548 * Wait for threads to exit.
2550 for (i = 0; i < manager->maxIOCPThreads; i++) {
2551 if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i],
2552 NULL) != ISC_R_SUCCESS)
2553 UNEXPECTED_ERROR(__FILE__, __LINE__,
2554 "isc_thread_join() for Completion Port %s",
2555 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2556 ISC_MSG_FAILED, "failed"));
2559 * Clean up.
2562 CloseHandle(manager->hIoCompletionPort);
2564 (void)isc_condition_destroy(&manager->shutdown_ok);
2566 DESTROYLOCK(&manager->lock);
2567 if (manager->stats != NULL)
2568 isc_stats_detach(&manager->stats);
2569 manager->magic = 0;
2570 mctx= manager->mctx;
2571 isc_mem_put(mctx, manager, sizeof(*manager));
2573 isc_mem_detach(&mctx);
2575 *managerp = NULL;
2578 static void
2579 queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev)
2581 isc_task_t *ntask = NULL;
2583 isc_task_attach(task, &ntask);
2584 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2587 * Enqueue the request.
2589 INSIST(!ISC_LINK_LINKED(dev, ev_link));
2590 ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link);
2592 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2593 "queue_receive_event: event %p -> task %p",
2594 dev, ntask);
2598 * Check the pending receive queue, and if we have data pending, give it to this
2599 * caller. If we have none, queue an I/O request. If this caller is not the first
2600 * on the list, then we will just queue this event and return.
2602 * Caller must have the socket locked.
2604 static isc_result_t
2605 socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2606 unsigned int flags)
2608 int cc = 0;
2609 isc_task_t *ntask = NULL;
2610 isc_result_t result = ISC_R_SUCCESS;
2611 int recv_errno = 0;
2613 dev->ev_sender = task;
2615 if (sock->fd == INVALID_SOCKET)
2616 return (ISC_R_EOF);
2619 * Queue our event on the list of things to do. Call our function to
2620 * attempt to fill buffers as much as possible, and return done events.
2621 * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE
2622 * here and tell our caller that we could not satisfy it immediately.
2624 queue_receive_event(sock, task, dev);
2625 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2626 result = ISC_R_INPROGRESS;
2628 completeio_recv(sock);
2631 * If there are more receivers waiting for data, queue another receive
2632 * here. If the
2634 queue_receive_request(sock);
2636 return (result);
2639 isc_result_t
2640 isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2641 unsigned int minimum, isc_task_t *task,
2642 isc_taskaction_t action, const void *arg)
2644 isc_socketevent_t *dev;
2645 isc_socketmgr_t *manager;
2646 unsigned int iocount;
2647 isc_buffer_t *buffer;
2648 isc_result_t ret;
2650 REQUIRE(VALID_SOCKET(sock));
2651 LOCK(&sock->lock);
2652 CONSISTENT(sock);
2655 * Make sure that the socket is not closed. XXXMLG change error here?
2657 if (sock->fd == INVALID_SOCKET) {
2658 UNLOCK(&sock->lock);
2659 return (ISC_R_CONNREFUSED);
2662 REQUIRE(buflist != NULL);
2663 REQUIRE(!ISC_LIST_EMPTY(*buflist));
2664 REQUIRE(task != NULL);
2665 REQUIRE(action != NULL);
2667 manager = sock->manager;
2668 REQUIRE(VALID_MANAGER(manager));
2670 iocount = isc_bufferlist_availablecount(buflist);
2671 REQUIRE(iocount > 0);
2673 INSIST(sock->bound);
2675 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2676 if (dev == NULL) {
2677 UNLOCK(&sock->lock);
2678 return (ISC_R_NOMEMORY);
2682 * UDP sockets are always partial read
2684 if (sock->type == isc_sockettype_udp)
2685 dev->minimum = 1;
2686 else {
2687 if (minimum == 0)
2688 dev->minimum = iocount;
2689 else
2690 dev->minimum = minimum;
2694 * Move each buffer from the passed in list to our internal one.
2696 buffer = ISC_LIST_HEAD(*buflist);
2697 while (buffer != NULL) {
2698 ISC_LIST_DEQUEUE(*buflist, buffer, link);
2699 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2700 buffer = ISC_LIST_HEAD(*buflist);
2703 ret = socket_recv(sock, dev, task, 0);
2705 UNLOCK(&sock->lock);
2706 return (ret);
2709 isc_result_t
2710 isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum,
2711 isc_task_t *task, isc_taskaction_t action, const void *arg)
2713 isc_socketevent_t *dev;
2714 isc_socketmgr_t *manager;
2715 isc_result_t ret;
2717 REQUIRE(VALID_SOCKET(sock));
2718 LOCK(&sock->lock);
2719 CONSISTENT(sock);
2722 * make sure that the socket's not closed
2724 if (sock->fd == INVALID_SOCKET) {
2725 UNLOCK(&sock->lock);
2726 return (ISC_R_CONNREFUSED);
2728 REQUIRE(action != NULL);
2730 manager = sock->manager;
2731 REQUIRE(VALID_MANAGER(manager));
2733 INSIST(sock->bound);
2735 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2736 if (dev == NULL) {
2737 UNLOCK(&sock->lock);
2738 return (ISC_R_NOMEMORY);
2741 ret = isc_socket_recv2(sock, region, minimum, task, dev, 0);
2742 UNLOCK(&sock->lock);
2743 return (ret);
2746 isc_result_t
2747 isc_socket_recv2(isc_socket_t *sock, isc_region_t *region,
2748 unsigned int minimum, isc_task_t *task,
2749 isc_socketevent_t *event, unsigned int flags)
2751 isc_result_t ret;
2753 REQUIRE(VALID_SOCKET(sock));
2754 LOCK(&sock->lock);
2755 CONSISTENT(sock);
2757 event->result = ISC_R_UNEXPECTED;
2758 event->ev_sender = sock;
2760 * make sure that the socket's not closed
2762 if (sock->fd == INVALID_SOCKET) {
2763 UNLOCK(&sock->lock);
2764 return (ISC_R_CONNREFUSED);
2767 ISC_LIST_INIT(event->bufferlist);
2768 event->region = *region;
2769 event->n = 0;
2770 event->offset = 0;
2771 event->attributes = 0;
2774 * UDP sockets are always partial read.
2776 if (sock->type == isc_sockettype_udp)
2777 event->minimum = 1;
2778 else {
2779 if (minimum == 0)
2780 event->minimum = region->length;
2781 else
2782 event->minimum = minimum;
2785 ret = socket_recv(sock, event, task, flags);
2786 UNLOCK(&sock->lock);
2787 return (ret);
2791 * Caller must have the socket locked.
2793 static isc_result_t
2794 socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2795 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
2796 unsigned int flags)
2798 int io_state;
2799 int send_errno = 0;
2800 int cc = 0;
2801 isc_task_t *ntask = NULL;
2802 isc_result_t result = ISC_R_SUCCESS;
2804 dev->ev_sender = task;
2806 set_dev_address(address, sock, dev);
2807 if (pktinfo != NULL) {
2808 socket_log(__LINE__, sock, NULL, TRACE, isc_msgcat, ISC_MSGSET_SOCKET,
2809 ISC_MSG_PKTINFOPROVIDED,
2810 "pktinfo structure provided, ifindex %u (set to 0)",
2811 pktinfo->ipi6_ifindex);
2813 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
2814 dev->pktinfo = *pktinfo;
2816 * Set the pktinfo index to 0 here, to let the kernel decide
2817 * what interface it should send on.
2819 dev->pktinfo.ipi6_ifindex = 0;
2822 io_state = startio_send(sock, dev, &cc, &send_errno);
2823 switch (io_state) {
2824 case DOIO_PENDING: /* I/O started. Nothing more to do */
2825 case DOIO_SOFT:
2827 * We couldn't send all or part of the request right now, so
2828 * queue it unless ISC_SOCKFLAG_NORETRY is set.
2830 if ((flags & ISC_SOCKFLAG_NORETRY) == 0) {
2831 isc_task_attach(task, &ntask);
2832 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2835 * Enqueue the request.
2837 INSIST(!ISC_LINK_LINKED(dev, ev_link));
2838 ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link);
2840 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2841 "socket_send: event %p -> task %p",
2842 dev, ntask);
2844 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2845 result = ISC_R_INPROGRESS;
2846 break;
2849 case DOIO_SUCCESS:
2850 break;
2853 return (result);
2856 isc_result_t
2857 isc_socket_send(isc_socket_t *sock, isc_region_t *region,
2858 isc_task_t *task, isc_taskaction_t action, const void *arg)
2861 * REQUIRE() checking is performed in isc_socket_sendto().
2863 return (isc_socket_sendto(sock, region, task, action, arg, NULL,
2864 NULL));
2867 isc_result_t
2868 isc_socket_sendto(isc_socket_t *sock, isc_region_t *region,
2869 isc_task_t *task, isc_taskaction_t action, const void *arg,
2870 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
2872 isc_socketevent_t *dev;
2873 isc_socketmgr_t *manager;
2874 isc_result_t ret;
2876 REQUIRE(VALID_SOCKET(sock));
2877 REQUIRE(sock->type != isc_sockettype_fdwatch);
2879 LOCK(&sock->lock);
2880 CONSISTENT(sock);
2883 * make sure that the socket's not closed
2885 if (sock->fd == INVALID_SOCKET) {
2886 UNLOCK(&sock->lock);
2887 return (ISC_R_CONNREFUSED);
2889 REQUIRE(region != NULL);
2890 REQUIRE(task != NULL);
2891 REQUIRE(action != NULL);
2893 manager = sock->manager;
2894 REQUIRE(VALID_MANAGER(manager));
2896 INSIST(sock->bound);
2898 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
2899 if (dev == NULL) {
2900 UNLOCK(&sock->lock);
2901 return (ISC_R_NOMEMORY);
2903 dev->region = *region;
2905 ret = socket_send(sock, dev, task, address, pktinfo, 0);
2906 UNLOCK(&sock->lock);
2907 return (ret);
2910 isc_result_t
2911 isc_socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2912 isc_task_t *task, isc_taskaction_t action, const void *arg)
2914 return (isc_socket_sendtov(sock, buflist, task, action, arg, NULL,
2915 NULL));
2918 isc_result_t
2919 isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
2920 isc_task_t *task, isc_taskaction_t action, const void *arg,
2921 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
2923 isc_socketevent_t *dev;
2924 isc_socketmgr_t *manager;
2925 unsigned int iocount;
2926 isc_buffer_t *buffer;
2927 isc_result_t ret;
2929 REQUIRE(VALID_SOCKET(sock));
2931 LOCK(&sock->lock);
2932 CONSISTENT(sock);
2935 * make sure that the socket's not closed
2937 if (sock->fd == INVALID_SOCKET) {
2938 UNLOCK(&sock->lock);
2939 return (ISC_R_CONNREFUSED);
2941 REQUIRE(buflist != NULL);
2942 REQUIRE(!ISC_LIST_EMPTY(*buflist));
2943 REQUIRE(task != NULL);
2944 REQUIRE(action != NULL);
2946 manager = sock->manager;
2947 REQUIRE(VALID_MANAGER(manager));
2949 iocount = isc_bufferlist_usedcount(buflist);
2950 REQUIRE(iocount > 0);
2952 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
2953 if (dev == NULL) {
2954 UNLOCK(&sock->lock);
2955 return (ISC_R_NOMEMORY);
2959 * Move each buffer from the passed in list to our internal one.
2961 buffer = ISC_LIST_HEAD(*buflist);
2962 while (buffer != NULL) {
2963 ISC_LIST_DEQUEUE(*buflist, buffer, link);
2964 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2965 buffer = ISC_LIST_HEAD(*buflist);
2968 ret = socket_send(sock, dev, task, address, pktinfo, 0);
2969 UNLOCK(&sock->lock);
2970 return (ret);
2973 isc_result_t
2974 isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region,
2975 isc_task_t *task,
2976 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
2977 isc_socketevent_t *event, unsigned int flags)
2979 isc_result_t ret;
2981 REQUIRE(VALID_SOCKET(sock));
2982 LOCK(&sock->lock);
2983 CONSISTENT(sock);
2985 REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0);
2986 if ((flags & ISC_SOCKFLAG_NORETRY) != 0)
2987 REQUIRE(sock->type == isc_sockettype_udp);
2988 event->ev_sender = sock;
2989 event->result = ISC_R_UNEXPECTED;
2991 * make sure that the socket's not closed
2993 if (sock->fd == INVALID_SOCKET) {
2994 UNLOCK(&sock->lock);
2995 return (ISC_R_CONNREFUSED);
2997 ISC_LIST_INIT(event->bufferlist);
2998 event->region = *region;
2999 event->n = 0;
3000 event->offset = 0;
3001 event->attributes = 0;
3003 ret = socket_send(sock, event, task, address, pktinfo, flags);
3004 UNLOCK(&sock->lock);
3005 return (ret);
3008 isc_result_t
3009 isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr,
3010 unsigned int options) {
3011 int bind_errno;
3012 char strbuf[ISC_STRERRORSIZE];
3013 int on = 1;
3015 REQUIRE(VALID_SOCKET(sock));
3016 LOCK(&sock->lock);
3017 CONSISTENT(sock);
3020 * make sure that the socket's not closed
3022 if (sock->fd == INVALID_SOCKET) {
3023 UNLOCK(&sock->lock);
3024 return (ISC_R_CONNREFUSED);
3027 INSIST(!sock->bound);
3029 if (sock->pf != sockaddr->type.sa.sa_family) {
3030 UNLOCK(&sock->lock);
3031 return (ISC_R_FAMILYMISMATCH);
3034 * Only set SO_REUSEADDR when we want a specific port.
3036 if ((options & ISC_SOCKET_REUSEADDRESS) != 0 &&
3037 isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
3038 setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on,
3039 sizeof(on)) < 0) {
3040 UNEXPECTED_ERROR(__FILE__, __LINE__,
3041 "setsockopt(%d) %s", sock->fd,
3042 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
3043 ISC_MSG_FAILED, "failed"));
3044 /* Press on... */
3046 if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) {
3047 bind_errno = WSAGetLastError();
3048 UNLOCK(&sock->lock);
3049 switch (bind_errno) {
3050 case WSAEACCES:
3051 return (ISC_R_NOPERM);
3052 case WSAEADDRNOTAVAIL:
3053 return (ISC_R_ADDRNOTAVAIL);
3054 case WSAEADDRINUSE:
3055 return (ISC_R_ADDRINUSE);
3056 case WSAEINVAL:
3057 return (ISC_R_BOUND);
3058 default:
3059 isc__strerror(bind_errno, strbuf, sizeof(strbuf));
3060 UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s",
3061 strbuf);
3062 return (ISC_R_UNEXPECTED);
3066 socket_log(__LINE__, sock, sockaddr, TRACE,
3067 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound");
3068 sock->bound = 1;
3070 UNLOCK(&sock->lock);
3071 return (ISC_R_SUCCESS);
3074 isc_result_t
3075 isc_socket_filter(isc_socket_t *sock, const char *filter) {
3076 UNUSED(sock);
3077 UNUSED(filter);
3079 REQUIRE(VALID_SOCKET(sock));
3080 return (ISC_R_NOTIMPLEMENTED);
3084 * Set up to listen on a given socket. We do this by creating an internal
3085 * event that will be dispatched when the socket has read activity. The
3086 * watcher will send the internal event to the task when there is a new
3087 * connection.
3089 * Unlike in read, we don't preallocate a done event here. Every time there
3090 * is a new connection we'll have to allocate a new one anyway, so we might
3091 * as well keep things simple rather than having to track them.
3093 isc_result_t
3094 isc_socket_listen(isc_socket_t *sock, unsigned int backlog) {
3095 char strbuf[ISC_STRERRORSIZE];
3097 REQUIRE(VALID_SOCKET(sock));
3099 LOCK(&sock->lock);
3100 CONSISTENT(sock);
3103 * make sure that the socket's not closed
3105 if (sock->fd == INVALID_SOCKET) {
3106 UNLOCK(&sock->lock);
3107 return (ISC_R_CONNREFUSED);
3110 REQUIRE(!sock->listener);
3111 REQUIRE(sock->bound);
3112 REQUIRE(sock->type == isc_sockettype_tcp);
3114 if (backlog == 0)
3115 backlog = SOMAXCONN;
3117 if (listen(sock->fd, (int)backlog) < 0) {
3118 UNLOCK(&sock->lock);
3119 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3121 UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf);
3123 return (ISC_R_UNEXPECTED);
3126 socket_log(__LINE__, sock, NULL, TRACE,
3127 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "listening");
3128 sock->listener = 1;
3129 _set_state(sock, SOCK_LISTEN);
3131 UNLOCK(&sock->lock);
3132 return (ISC_R_SUCCESS);
3136 * This should try to do aggressive accept() XXXMLG
3138 isc_result_t
3139 isc_socket_accept(isc_socket_t *sock,
3140 isc_task_t *task, isc_taskaction_t action, const void *arg)
3142 isc_socket_newconnev_t *adev;
3143 isc_socketmgr_t *manager;
3144 isc_task_t *ntask = NULL;
3145 isc_socket_t *nsock;
3146 isc_result_t result;
3147 IoCompletionInfo *lpo;
3149 REQUIRE(VALID_SOCKET(sock));
3151 manager = sock->manager;
3152 REQUIRE(VALID_MANAGER(manager));
3154 LOCK(&sock->lock);
3155 CONSISTENT(sock);
3158 * make sure that the socket's not closed
3160 if (sock->fd == INVALID_SOCKET) {
3161 UNLOCK(&sock->lock);
3162 return (ISC_R_CONNREFUSED);
3165 REQUIRE(sock->listener);
3168 * Sender field is overloaded here with the task we will be sending
3169 * this event to. Just before the actual event is delivered the
3170 * actual ev_sender will be touched up to be the socket.
3172 adev = (isc_socket_newconnev_t *)
3173 isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN,
3174 action, arg, sizeof(*adev));
3175 if (adev == NULL) {
3176 UNLOCK(&sock->lock);
3177 return (ISC_R_NOMEMORY);
3179 ISC_LINK_INIT(adev, ev_link);
3181 result = allocate_socket(manager, sock->type, &nsock);
3182 if (result != ISC_R_SUCCESS) {
3183 isc_event_free((isc_event_t **)&adev);
3184 UNLOCK(&sock->lock);
3185 return (result);
3189 * AcceptEx() requires we pass in a socket.
3191 nsock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP);
3192 if (nsock->fd == INVALID_SOCKET) {
3193 free_socket(&nsock, __LINE__);
3194 isc_event_free((isc_event_t **)&adev);
3195 UNLOCK(&sock->lock);
3196 return (ISC_R_FAILURE); // XXXMLG need real error message
3200 * Attach to socket and to task.
3202 isc_task_attach(task, &ntask);
3203 nsock->references++;
3205 adev->ev_sender = ntask;
3206 adev->newsocket = nsock;
3207 _set_state(nsock, SOCK_ACCEPT);
3210 * Queue io completion for an accept().
3212 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3213 HEAP_ZERO_MEMORY,
3214 sizeof(IoCompletionInfo));
3215 RUNTIME_CHECK(lpo != NULL);
3216 lpo->acceptbuffer = (void *)HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY,
3217 (sizeof(SOCKADDR_STORAGE) + 16) * 2);
3218 RUNTIME_CHECK(lpo->acceptbuffer != NULL);
3220 lpo->adev = adev;
3221 lpo->request_type = SOCKET_ACCEPT;
3223 ISCAcceptEx(sock->fd,
3224 nsock->fd, /* Accepted Socket */
3225 lpo->acceptbuffer, /* Buffer for initial Recv */
3226 0, /* Length of Buffer */
3227 sizeof(SOCKADDR_STORAGE) + 16, /* Local address length + 16 */
3228 sizeof(SOCKADDR_STORAGE) + 16, /* Remote address lengh + 16 */
3229 (LPDWORD)&lpo->received_bytes, /* Bytes Recved */
3230 (LPOVERLAPPED)lpo /* Overlapped structure */
3232 iocompletionport_update(nsock);
3234 socket_log(__LINE__, sock, NULL, TRACE,
3235 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND,
3236 "accepting for nsock %p fd %d", nsock, nsock->fd);
3239 * Enqueue the event
3241 ISC_LIST_ENQUEUE(sock->accept_list, adev, ev_link);
3242 sock->pending_accept++;
3243 sock->pending_iocp++;
3245 UNLOCK(&sock->lock);
3246 return (ISC_R_SUCCESS);
3249 isc_result_t
3250 isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr,
3251 isc_task_t *task, isc_taskaction_t action, const void *arg)
3253 char strbuf[ISC_STRERRORSIZE];
3254 isc_socket_connev_t *cdev;
3255 isc_task_t *ntask = NULL;
3256 isc_socketmgr_t *manager;
3257 IoCompletionInfo *lpo;
3258 int bind_errno;
3260 REQUIRE(VALID_SOCKET(sock));
3261 REQUIRE(addr != NULL);
3262 REQUIRE(task != NULL);
3263 REQUIRE(action != NULL);
3265 manager = sock->manager;
3266 REQUIRE(VALID_MANAGER(manager));
3267 REQUIRE(addr != NULL);
3269 if (isc_sockaddr_ismulticast(addr))
3270 return (ISC_R_MULTICAST);
3272 LOCK(&sock->lock);
3273 CONSISTENT(sock);
3276 * make sure that the socket's not closed
3278 if (sock->fd == INVALID_SOCKET) {
3279 UNLOCK(&sock->lock);
3280 return (ISC_R_CONNREFUSED);
3284 * Windows sockets won't connect unless the socket is bound.
3286 if (!sock->bound) {
3287 isc_sockaddr_t any;
3289 isc_sockaddr_anyofpf(&any, isc_sockaddr_pf(addr));
3290 if (bind(sock->fd, &any.type.sa, any.length) < 0) {
3291 bind_errno = WSAGetLastError();
3292 UNLOCK(&sock->lock);
3293 switch (bind_errno) {
3294 case WSAEACCES:
3295 return (ISC_R_NOPERM);
3296 case WSAEADDRNOTAVAIL:
3297 return (ISC_R_ADDRNOTAVAIL);
3298 case WSAEADDRINUSE:
3299 return (ISC_R_ADDRINUSE);
3300 case WSAEINVAL:
3301 return (ISC_R_BOUND);
3302 default:
3303 isc__strerror(bind_errno, strbuf,
3304 sizeof(strbuf));
3305 UNEXPECTED_ERROR(__FILE__, __LINE__,
3306 "bind: %s", strbuf);
3307 return (ISC_R_UNEXPECTED);
3310 sock->bound = 1;
3313 REQUIRE(!sock->pending_connect);
3315 cdev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock,
3316 ISC_SOCKEVENT_CONNECT,
3317 action, arg,
3318 sizeof(*cdev));
3319 if (cdev == NULL) {
3320 UNLOCK(&sock->lock);
3321 return (ISC_R_NOMEMORY);
3323 ISC_LINK_INIT(cdev, ev_link);
3325 if (sock->type == isc_sockettype_tcp) {
3327 * Queue io completion for an accept().
3329 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3330 HEAP_ZERO_MEMORY,
3331 sizeof(IoCompletionInfo));
3332 lpo->cdev = cdev;
3333 lpo->request_type = SOCKET_CONNECT;
3335 sock->address = *addr;
3336 ISCConnectEx(sock->fd, &addr->type.sa, addr->length,
3337 NULL, 0, NULL, (LPOVERLAPPED)lpo);
3340 * Attach to task.
3342 isc_task_attach(task, &ntask);
3343 cdev->ev_sender = ntask;
3345 sock->pending_connect = 1;
3346 _set_state(sock, SOCK_CONNECT);
3349 * Enqueue the request.
3351 sock->connect_ev = cdev;
3352 sock->pending_iocp++;
3353 } else {
3354 WSAConnect(sock->fd, &addr->type.sa, addr->length, NULL, NULL, NULL, NULL);
3355 cdev->result = ISC_R_SUCCESS;
3356 isc_task_send(task, (isc_event_t **)&cdev);
3358 CONSISTENT(sock);
3359 UNLOCK(&sock->lock);
3361 return (ISC_R_SUCCESS);
3364 isc_result_t
3365 isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3366 isc_result_t result;
3368 REQUIRE(VALID_SOCKET(sock));
3369 REQUIRE(addressp != NULL);
3371 LOCK(&sock->lock);
3372 CONSISTENT(sock);
3375 * make sure that the socket's not closed
3377 if (sock->fd == INVALID_SOCKET) {
3378 UNLOCK(&sock->lock);
3379 return (ISC_R_CONNREFUSED);
3382 if (sock->connected) {
3383 *addressp = sock->address;
3384 result = ISC_R_SUCCESS;
3385 } else {
3386 result = ISC_R_NOTCONNECTED;
3389 UNLOCK(&sock->lock);
3391 return (result);
3394 isc_result_t
3395 isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3396 ISC_SOCKADDR_LEN_T len;
3397 isc_result_t result;
3398 char strbuf[ISC_STRERRORSIZE];
3400 REQUIRE(VALID_SOCKET(sock));
3401 REQUIRE(addressp != NULL);
3403 LOCK(&sock->lock);
3404 CONSISTENT(sock);
3407 * make sure that the socket's not closed
3409 if (sock->fd == INVALID_SOCKET) {
3410 UNLOCK(&sock->lock);
3411 return (ISC_R_CONNREFUSED);
3414 if (!sock->bound) {
3415 result = ISC_R_NOTBOUND;
3416 goto out;
3419 result = ISC_R_SUCCESS;
3421 len = sizeof(addressp->type);
3422 if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) {
3423 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3424 UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s",
3425 strbuf);
3426 result = ISC_R_UNEXPECTED;
3427 goto out;
3429 addressp->length = (unsigned int)len;
3431 out:
3432 UNLOCK(&sock->lock);
3434 return (result);
3438 * Run through the list of events on this socket, and cancel the ones
3439 * queued for task "task" of type "how". "how" is a bitmask.
3441 void
3442 isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
3444 REQUIRE(VALID_SOCKET(sock));
3447 * Quick exit if there is nothing to do. Don't even bother locking
3448 * in this case.
3450 if (how == 0)
3451 return;
3453 LOCK(&sock->lock);
3454 CONSISTENT(sock);
3457 * make sure that the socket's not closed
3459 if (sock->fd == INVALID_SOCKET) {
3460 UNLOCK(&sock->lock);
3461 return;
3465 * All of these do the same thing, more or less.
3466 * Each will:
3467 * o If the internal event is marked as "posted" try to
3468 * remove it from the task's queue. If this fails, mark it
3469 * as canceled instead, and let the task clean it up later.
3470 * o For each I/O request for that task of that type, post
3471 * its done event with status of "ISC_R_CANCELED".
3472 * o Reset any state needed.
3475 if ((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) {
3476 isc_socketevent_t *dev;
3477 isc_socketevent_t *next;
3478 isc_task_t *current_task;
3480 dev = ISC_LIST_HEAD(sock->recv_list);
3481 while (dev != NULL) {
3482 current_task = dev->ev_sender;
3483 next = ISC_LIST_NEXT(dev, ev_link);
3484 if ((task == NULL) || (task == current_task)) {
3485 dev->result = ISC_R_CANCELED;
3486 send_recvdone_event(sock, &dev);
3488 dev = next;
3491 how &= ~ISC_SOCKCANCEL_RECV;
3493 if ((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) {
3494 isc_socketevent_t *dev;
3495 isc_socketevent_t *next;
3496 isc_task_t *current_task;
3498 dev = ISC_LIST_HEAD(sock->send_list);
3500 while (dev != NULL) {
3501 current_task = dev->ev_sender;
3502 next = ISC_LIST_NEXT(dev, ev_link);
3503 if ((task == NULL) || (task == current_task)) {
3504 dev->result = ISC_R_CANCELED;
3505 send_senddone_event(sock, &dev);
3507 dev = next;
3510 how &= ~ISC_SOCKCANCEL_SEND;
3512 if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT)
3513 && !ISC_LIST_EMPTY(sock->accept_list)) {
3514 isc_socket_newconnev_t *dev;
3515 isc_socket_newconnev_t *next;
3516 isc_task_t *current_task;
3518 dev = ISC_LIST_HEAD(sock->accept_list);
3519 while (dev != NULL) {
3520 current_task = dev->ev_sender;
3521 next = ISC_LIST_NEXT(dev, ev_link);
3523 if ((task == NULL) || (task == current_task)) {
3525 dev->newsocket->references--;
3526 closesocket(dev->newsocket->fd);
3527 dev->newsocket->fd = INVALID_SOCKET;
3528 free_socket(&dev->newsocket, __LINE__);
3530 dev->result = ISC_R_CANCELED;
3531 send_acceptdone_event(sock, &dev);
3534 dev = next;
3537 how &= ~ISC_SOCKCANCEL_ACCEPT;
3540 * Connecting is not a list.
3542 if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT)
3543 && sock->connect_ev != NULL) {
3544 isc_socket_connev_t *dev;
3545 isc_task_t *current_task;
3547 INSIST(sock->pending_connect);
3549 dev = sock->connect_ev;
3550 current_task = dev->ev_sender;
3552 if ((task == NULL) || (task == current_task)) {
3553 closesocket(sock->fd);
3554 sock->fd = INVALID_SOCKET;
3555 _set_state(sock, SOCK_CLOSED);
3557 sock->connect_ev = NULL;
3558 dev->result = ISC_R_CANCELED;
3559 send_connectdone_event(sock, &dev);
3562 how &= ~ISC_SOCKCANCEL_CONNECT;
3564 maybe_free_socket(&sock, __LINE__);
3567 isc_sockettype_t
3568 isc_socket_gettype(isc_socket_t *sock) {
3569 isc_sockettype_t type;
3571 REQUIRE(VALID_SOCKET(sock));
3573 LOCK(&sock->lock);
3576 * make sure that the socket's not closed
3578 if (sock->fd == INVALID_SOCKET) {
3579 UNLOCK(&sock->lock);
3580 return (ISC_R_CONNREFUSED);
3583 type = sock->type;
3584 UNLOCK(&sock->lock);
3585 return (type);
3588 isc_boolean_t
3589 isc_socket_isbound(isc_socket_t *sock) {
3590 isc_boolean_t val;
3592 REQUIRE(VALID_SOCKET(sock));
3594 LOCK(&sock->lock);
3595 CONSISTENT(sock);
3598 * make sure that the socket's not closed
3600 if (sock->fd == INVALID_SOCKET) {
3601 UNLOCK(&sock->lock);
3602 return (ISC_FALSE);
3605 val = ((sock->bound) ? ISC_TRUE : ISC_FALSE);
3606 UNLOCK(&sock->lock);
3608 return (val);
3611 void
3612 isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) {
3613 #if defined(IPV6_V6ONLY)
3614 int onoff = yes ? 1 : 0;
3615 #else
3616 UNUSED(yes);
3617 #endif
3619 REQUIRE(VALID_SOCKET(sock));
3621 #ifdef IPV6_V6ONLY
3622 if (sock->pf == AF_INET6) {
3623 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY,
3624 (void *)&onoff, sizeof(onoff));
3626 #endif
3629 void
3630 isc_socket_cleanunix(isc_sockaddr_t *addr, isc_boolean_t active) {
3631 UNUSED(addr);
3632 UNUSED(active);
3635 isc_result_t
3636 isc_socket_permunix(isc_sockaddr_t *addr, isc_uint32_t perm,
3637 isc_uint32_t owner, isc_uint32_t group)
3639 UNUSED(addr);
3640 UNUSED(perm);
3641 UNUSED(owner);
3642 UNUSED(group);
3643 return (ISC_R_NOTIMPLEMENTED);
3646 void
3647 isc_socket_setname(isc_socket_t *socket, const char *name, void *tag) {
3650 * Name 'socket'.
3653 REQUIRE(VALID_SOCKET(socket));
3655 LOCK(&socket->lock);
3656 memset(socket->name, 0, sizeof(socket->name));
3657 strncpy(socket->name, name, sizeof(socket->name) - 1);
3658 socket->tag = tag;
3659 UNLOCK(&socket->lock);
3662 const char *
3663 isc_socket_getname(isc_socket_t *socket) {
3664 return (socket->name);
3667 void *
3668 isc_socket_gettag(isc_socket_t *socket) {
3669 return (socket->tag);
3672 void
3673 isc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) {
3674 UNUSED(manager);
3675 UNUSED(reserved);