4 * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
5 * Copyright (C) 2000-2003 Internet Software Consortium.
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
20 /* Id: socket.c,v 1.70.54.4 2009/01/29 22:40:36 jinmei Exp */
22 /* This code uses functions which are only available on Server 2003 and
23 * higher, and Windows XP and higher.
25 * This code is by nature multithreaded and takes advantage of various
26 * features to pass on information through the completion port for
27 * when I/O is completed. All sends, receives, accepts, and connects are
28 * completed through the completion port.
30 * The number of Completion Port Worker threads used is the total number
31 * of CPU's + 1. This increases the likelihood that a Worker Thread is
32 * available for processing a completed request.
34 * XXXPDM 5 August, 2002
37 #define MAKE_EXTERNAL 1
40 #include <sys/types.h>
43 #define _WINSOCKAPI_ /* Prevent inclusion of winsock.h in windows.h */
55 #include <isc/buffer.h>
56 #include <isc/bufferlist.h>
57 #include <isc/condition.h>
62 #include <isc/mutex.h>
66 #include <isc/platform.h>
67 #include <isc/print.h>
68 #include <isc/region.h>
69 #include <isc/socket.h>
70 #include <isc/stats.h>
71 #include <isc/strerror.h>
72 #include <isc/syslog.h>
74 #include <isc/thread.h>
76 #include <isc/win32os.h>
80 #include "errno2result.h"
83 * How in the world can Microsoft exist with APIs like this?
84 * We can't actually call this directly, because it turns out
85 * no library exports this function. Instead, we need to
86 * issue a runtime call to get the address.
88 LPFN_CONNECTEX ISCConnectEx
;
89 LPFN_ACCEPTEX ISCAcceptEx
;
90 LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs
;
93 * Run expensive internal consistency checks.
95 #ifdef ISC_SOCKET_CONSISTENCY_CHECKS
96 #define CONSISTENT(sock) consistent(sock)
98 #define CONSISTENT(sock) do {} while (0)
100 static void consistent(isc_socket_t
*sock
);
103 * Define this macro to control the behavior of connection
104 * resets on UDP sockets. See Microsoft KnowledgeBase Article Q263823
106 * NOTE: This requires that Windows 2000 systems install Service Pack 2
109 #ifndef SIO_UDP_CONNRESET
110 #define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12)
114 * Some systems define the socket length argument as an int, some as size_t,
115 * some as socklen_t. This is here so it can be easily changed if needed.
117 #ifndef ISC_SOCKADDR_LEN_T
118 #define ISC_SOCKADDR_LEN_T unsigned int
122 * Define what the possible "soft" errors can be. These are non-fatal returns
123 * of various network related functions, like recv() and so on.
125 #define SOFT_ERROR(e) ((e) == WSAEINTR || \
126 (e) == WSAEWOULDBLOCK || \
127 (e) == EWOULDBLOCK || \
133 * Pending errors are not really errors and should be
136 #define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0)
138 #define DOIO_SUCCESS 0 /* i/o ok, event sent */
139 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */
140 #define DOIO_HARD 2 /* i/o error, event sent */
141 #define DOIO_EOF 3 /* EOF, no event sent */
142 #define DOIO_PENDING 4 /* status when i/o is in process */
143 #define DOIO_NEEDMORE 5 /* IO was processed, but we need more due to minimum */
145 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
148 * DLVL(90) -- Function entry/exit and other tracing.
149 * DLVL(70) -- Socket "correctness" -- including returning of events, etc.
150 * DLVL(60) -- Socket data send/receive
151 * DLVL(50) -- Event tracing, including receiving/sending completion events.
152 * DLVL(20) -- Socket creation/destruction.
154 #define TRACE_LEVEL 90
155 #define CORRECTNESS_LEVEL 70
156 #define IOEVENT_LEVEL 60
157 #define EVENT_LEVEL 50
158 #define CREATION_LEVEL 20
160 #define TRACE DLVL(TRACE_LEVEL)
161 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL)
162 #define IOEVENT DLVL(IOEVENT_LEVEL)
163 #define EVENT DLVL(EVENT_LEVEL)
164 #define CREATION DLVL(CREATION_LEVEL)
166 typedef isc_event_t intev_t
;
172 SOCK_INITIALIZED
, /* Socket Initialized */
173 SOCK_OPEN
, /* Socket opened but nothing yet to do */
174 SOCK_DATA
, /* Socket sending or receiving data */
175 SOCK_LISTEN
, /* TCP Socket listening for connects */
176 SOCK_ACCEPT
, /* TCP socket is waiting to accept */
177 SOCK_CONNECT
, /* TCP Socket connecting */
178 SOCK_CLOSED
, /* Socket has been closed */
181 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o')
182 #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC)
185 * IPv6 control information. If the socket is an IPv6 socket we want
186 * to collect the destination address and interface so the client can
187 * set them on outgoing packets.
189 #ifdef ISC_PLATFORM_HAVEIPV6
196 * We really don't want to try and use these control messages. Win32
197 * doesn't have this mechanism before XP.
202 * Message header for recvmsg and sendmsg calls.
203 * Used value-result for recvmsg, value only for sendmsg.
206 SOCKADDR_STORAGE to_addr
; /* UDP send/recv address */
207 int to_addr_len
; /* length of the address */
208 WSABUF
*msg_iov
; /* scatter/gather array */
209 u_int msg_iovlen
; /* # elements in msg_iov */
210 void *msg_control
; /* ancillary data, see below */
211 u_int msg_controllen
; /* ancillary data buffer len */
212 int msg_totallen
; /* total length of this message */
216 * The size to raise the receive buffer to.
218 #define RCVBUFSIZE (32*1024)
221 * The number of times a send operation is repeated if the result
229 isc_socketmgr_t
*manager
;
231 isc_sockettype_t type
;
233 /* Pointers to scatter/gather buffers */
234 WSABUF iov
[ISC_SOCKET_MAXSCATTERGATHER
];
236 /* Locked by socket lock. */
237 ISC_LINK(isc_socket_t
) link
;
238 unsigned int references
; /* EXTERNAL references */
239 SOCKET fd
; /* file handle */
240 int pf
; /* protocol family */
245 * Each recv() call uses this buffer. It is a per-socket receive
246 * buffer that allows us to decouple the system recv() from the
247 * recv_list done events. This means the items on the recv_list
248 * can be removed without having to cancel pending system recv()
249 * calls. It also allows us to read-ahead in some cases.
252 SOCKADDR_STORAGE from_addr
; // UDP send/recv address
253 int from_addr_len
; // length of the address
254 char *base
; // the base of the buffer
255 char *consume_position
; // where to start copying data from next
256 unsigned int len
; // the actual size of this buffer
257 unsigned int remaining
; // the number of bytes remaining
260 ISC_LIST(isc_socketevent_t
) send_list
;
261 ISC_LIST(isc_socketevent_t
) recv_list
;
262 ISC_LIST(isc_socket_newconnev_t
) accept_list
;
263 isc_socket_connev_t
*connect_ev
;
265 isc_sockaddr_t address
; /* remote address */
267 unsigned int listener
: 1, /* listener socket */
269 pending_connect
: 1, /* connect pending */
270 bound
: 1; /* bound to local addr */
271 unsigned int pending_iocp
; /* Should equal the counters below. Debug. */
272 unsigned int pending_recv
; /* Number of outstanding recv() calls. */
273 unsigned int pending_send
; /* Number of outstanding send() calls. */
274 unsigned int pending_accept
; /* Number of outstanding accept() calls. */
275 unsigned int state
; /* Socket state. Debugging and consistency checking. */
276 int state_lineno
; /* line which last touched state */
279 #define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0)
284 typedef struct buflist buflist_t
;
289 ISC_LINK(buflist_t
) link
;
293 * I/O Completion ports Info structures
296 static HANDLE hHeapHandle
= NULL
;
297 typedef struct IoCompletionInfo
{
298 OVERLAPPED overlapped
;
299 isc_socketevent_t
*dev
; /* send()/recv() done event */
300 isc_socket_connev_t
*cdev
; /* connect() done event */
301 isc_socket_newconnev_t
*adev
; /* accept() done event */
303 DWORD received_bytes
;
305 struct msghdr messagehdr
;
306 ISC_LIST(buflist_t
) bufferlist
; /*%< list of buffers */
310 * Define a maximum number of I/O Completion Port worker threads
311 * to handle the load on the Completion Port. The actual number
312 * used is the number of CPU's + 1.
314 #define MAX_IOCPTHREADS 20
316 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g')
317 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
319 struct isc_socketmgr
{
326 /* Locked by manager lock. */
327 ISC_LIST(isc_socket_t
) socklist
;
328 isc_boolean_t bShutdown
;
329 isc_condition_t shutdown_ok
;
330 HANDLE hIoCompletionPort
;
332 HANDLE hIOCPThreads
[MAX_IOCPTHREADS
];
333 DWORD dwIOCPThreadIds
[MAX_IOCPTHREADS
];
337 * Modified by InterlockedIncrement() and InterlockedDecrement()
351 * send() and recv() iovec counts
353 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
354 #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
356 static isc_threadresult_t WINAPI
SocketIoThread(LPVOID ThreadContext
);
357 static void maybe_free_socket(isc_socket_t
**, int);
358 static void free_socket(isc_socket_t
**, int);
359 static isc_boolean_t
senddone_is_active(isc_socket_t
*sock
, isc_socketevent_t
*dev
);
360 static isc_boolean_t
acceptdone_is_active(isc_socket_t
*sock
, isc_socket_newconnev_t
*dev
);
361 static isc_boolean_t
connectdone_is_active(isc_socket_t
*sock
, isc_socket_connev_t
*dev
);
362 static void send_recvdone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
);
363 static void send_senddone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
);
364 static void send_acceptdone_event(isc_socket_t
*sock
, isc_socket_newconnev_t
**adev
);
365 static void send_connectdone_event(isc_socket_t
*sock
, isc_socket_connev_t
**cdev
);
366 static void send_recvdone_abort(isc_socket_t
*sock
, isc_result_t result
);
367 static void queue_receive_event(isc_socket_t
*sock
, isc_task_t
*task
, isc_socketevent_t
*dev
);
368 static void queue_receive_request(isc_socket_t
*sock
);
371 * This is used to dump the contents of the sock structure
372 * You should make sure that the sock is locked before
373 * dumping it. Since the code uses simple printf() statements
374 * it should only be used interactively.
377 sock_dump(isc_socket_t
*sock
) {
378 isc_socketevent_t
*ldev
;
379 isc_socket_newconnev_t
*ndev
;
385 isc_socket_getpeername(sock
, &addr
);
386 isc_sockaddr_format(&addr
, socktext
, sizeof(socktext
));
387 printf("Remote Socket: %s\n", socktext
);
388 isc_socket_getsockname(sock
, &addr
);
389 isc_sockaddr_format(&addr
, socktext
, sizeof(socktext
));
390 printf("This Socket: %s\n", socktext
);
393 printf("\n\t\tSock Dump\n");
394 printf("\t\tfd: %u\n", sock
->fd
);
395 printf("\t\treferences: %d\n", sock
->references
);
396 printf("\t\tpending_accept: %d\n", sock
->pending_accept
);
397 printf("\t\tconnecting: %d\n", sock
->pending_connect
);
398 printf("\t\tconnected: %d\n", sock
->connected
);
399 printf("\t\tbound: %d\n", sock
->bound
);
400 printf("\t\tpending_iocp: %d\n", sock
->pending_iocp
);
401 printf("\t\tsocket type: %d\n", sock
->type
);
403 printf("\n\t\tSock Recv List\n");
404 ldev
= ISC_LIST_HEAD(sock
->recv_list
);
405 while (ldev
!= NULL
) {
406 printf("\t\tdev: %p\n", ldev
);
407 ldev
= ISC_LIST_NEXT(ldev
, ev_link
);
410 printf("\n\t\tSock Send List\n");
411 ldev
= ISC_LIST_HEAD(sock
->send_list
);
412 while (ldev
!= NULL
) {
413 printf("\t\tdev: %p\n", ldev
);
414 ldev
= ISC_LIST_NEXT(ldev
, ev_link
);
417 printf("\n\t\tSock Accept List\n");
418 ndev
= ISC_LIST_HEAD(sock
->accept_list
);
419 while (ndev
!= NULL
) {
420 printf("\t\tdev: %p\n", ldev
);
421 ndev
= ISC_LIST_NEXT(ndev
, ev_link
);
426 socket_log(int lineno
, isc_socket_t
*sock
, isc_sockaddr_t
*address
,
427 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
428 isc_msgcat_t
*msgcat
, int msgset
, int message
,
429 const char *fmt
, ...) ISC_FORMAT_PRINTF(9, 10);
431 /* This function will add an entry to the I/O completion port
432 * that will signal the I/O thread to exit (gracefully)
435 signal_iocompletionport_exit(isc_socketmgr_t
*manager
) {
438 char strbuf
[ISC_STRERRORSIZE
];
440 REQUIRE(VALID_MANAGER(manager
));
441 for (i
= 0; i
< manager
->maxIOCPThreads
; i
++) {
442 if (!PostQueuedCompletionStatus(manager
->hIoCompletionPort
,
444 errval
= GetLastError();
445 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
446 FATAL_ERROR(__FILE__
, __LINE__
,
447 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
449 "Can't request service thread to exit: %s"),
456 * Create the worker threads for the I/O Completion Port
459 iocompletionport_createthreads(int total_threads
, isc_socketmgr_t
*manager
) {
461 char strbuf
[ISC_STRERRORSIZE
];
464 INSIST(total_threads
> 0);
465 REQUIRE(VALID_MANAGER(manager
));
467 * We need at least one
469 for (i
= 0; i
< total_threads
; i
++) {
470 manager
->hIOCPThreads
[i
] = CreateThread(NULL
, 0, SocketIoThread
,
472 &manager
->dwIOCPThreadIds
[i
]);
473 if (manager
->hIOCPThreads
[i
] == NULL
) {
474 errval
= GetLastError();
475 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
476 FATAL_ERROR(__FILE__
, __LINE__
,
477 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
479 "Can't create IOCP thread: %s"),
487 * Create/initialise the I/O completion port
490 iocompletionport_init(isc_socketmgr_t
*manager
) {
492 char strbuf
[ISC_STRERRORSIZE
];
494 REQUIRE(VALID_MANAGER(manager
));
496 * Create a private heap to handle the socket overlapped structure
497 * The minimum number of structures is 10, there is no maximum
499 hHeapHandle
= HeapCreate(0, 10 * sizeof(IoCompletionInfo
), 0);
500 if (hHeapHandle
== NULL
) {
501 errval
= GetLastError();
502 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
503 FATAL_ERROR(__FILE__
, __LINE__
,
504 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
506 "HeapCreate() failed during "
507 "initialization: %s"),
512 manager
->maxIOCPThreads
= min(isc_os_ncpus() + 1, MAX_IOCPTHREADS
);
514 /* Now Create the Completion Port */
515 manager
->hIoCompletionPort
= CreateIoCompletionPort(
516 INVALID_HANDLE_VALUE
, NULL
,
517 0, manager
->maxIOCPThreads
);
518 if (manager
->hIoCompletionPort
== NULL
) {
519 errval
= GetLastError();
520 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
521 FATAL_ERROR(__FILE__
, __LINE__
,
522 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
524 "CreateIoCompletionPort() failed "
525 "during initialization: %s"),
531 * Worker threads for servicing the I/O
533 iocompletionport_createthreads(manager
->maxIOCPThreads
, manager
);
537 * Associate a socket with an IO Completion Port. This allows us to queue events for it
538 * and have our worker pool of threads process them.
541 iocompletionport_update(isc_socket_t
*sock
) {
543 char strbuf
[ISC_STRERRORSIZE
];
545 REQUIRE(VALID_SOCKET(sock
));
547 hiocp
= CreateIoCompletionPort((HANDLE
)sock
->fd
,
548 sock
->manager
->hIoCompletionPort
, (ULONG_PTR
)sock
, 0);
551 DWORD errval
= GetLastError();
552 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
553 isc_log_iwrite(isc_lctx
,
554 ISC_LOGCATEGORY_GENERAL
,
555 ISC_LOGMODULE_SOCKET
, ISC_LOG_ERROR
,
556 isc_msgcat
, ISC_MSGSET_SOCKET
,
557 ISC_MSG_TOOMANYHANDLES
,
558 "iocompletionport_update: failed to open"
559 " io completion port: %s",
562 /* XXXMLG temporary hack to make failures detected.
563 * This function should return errors to the caller, not
566 FATAL_ERROR(__FILE__
, __LINE__
,
567 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
569 "CreateIoCompletionPort() failed "
570 "during initialization: %s"),
575 InterlockedIncrement(&sock
->manager
->iocp_total
);
579 * Routine to cleanup and then close the socket.
580 * Only close the socket here if it is NOT associated
581 * with an event, otherwise the WSAWaitForMultipleEvents
582 * may fail due to the fact that the Wait should not
583 * be running while closing an event or a socket.
584 * The socket is locked before calling this function
587 socket_close(isc_socket_t
*sock
) {
589 REQUIRE(sock
!= NULL
);
591 if (sock
->fd
!= INVALID_SOCKET
) {
592 closesocket(sock
->fd
);
593 sock
->fd
= INVALID_SOCKET
;
594 _set_state(sock
, SOCK_CLOSED
);
595 InterlockedDecrement(&sock
->manager
->totalSockets
);
599 static isc_once_t initialise_once
= ISC_ONCE_INIT
;
600 static isc_boolean_t initialised
= ISC_FALSE
;
604 WORD wVersionRequested
;
608 GUID GUIDConnectEx
= WSAID_CONNECTEX
;
609 GUID GUIDAcceptEx
= WSAID_ACCEPTEX
;
610 GUID GUIDGetAcceptExSockaddrs
= WSAID_GETACCEPTEXSOCKADDRS
;
613 /* Need Winsock 2.2 or better */
614 wVersionRequested
= MAKEWORD(2, 2);
616 err
= WSAStartup(wVersionRequested
, &wsaData
);
618 char strbuf
[ISC_STRERRORSIZE
];
619 isc__strerror(err
, strbuf
, sizeof(strbuf
));
620 FATAL_ERROR(__FILE__
, __LINE__
, "WSAStartup() %s: %s",
621 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
622 ISC_MSG_FAILED
, "failed"),
627 * The following APIs do not exist as functions in a library, but we must
628 * ask winsock for them. They are "extensions" -- but why they cannot be
629 * actual functions is beyond me. So, ask winsock for the pointers to the
632 sock
= socket(AF_INET
, SOCK_STREAM
, IPPROTO_TCP
);
633 INSIST(sock
!= INVALID_SOCKET
);
634 err
= WSAIoctl(sock
, SIO_GET_EXTENSION_FUNCTION_POINTER
,
635 &GUIDConnectEx
, sizeof(GUIDConnectEx
),
636 &ISCConnectEx
, sizeof(ISCConnectEx
),
637 &dwBytes
, NULL
, NULL
);
640 err
= WSAIoctl(sock
, SIO_GET_EXTENSION_FUNCTION_POINTER
,
641 &GUIDAcceptEx
, sizeof(GUIDAcceptEx
),
642 &ISCAcceptEx
, sizeof(ISCAcceptEx
),
643 &dwBytes
, NULL
, NULL
);
646 err
= WSAIoctl(sock
, SIO_GET_EXTENSION_FUNCTION_POINTER
,
647 &GUIDGetAcceptExSockaddrs
, sizeof(GUIDGetAcceptExSockaddrs
),
648 &ISCGetAcceptExSockaddrs
, sizeof(ISCGetAcceptExSockaddrs
),
649 &dwBytes
, NULL
, NULL
);
654 initialised
= ISC_TRUE
;
658 * Initialize socket services
662 RUNTIME_CHECK(isc_once_do(&initialise_once
,
663 initialise
) == ISC_R_SUCCESS
);
669 internal_sendmsg(isc_socket_t
*sock
, IoCompletionInfo
*lpo
,
670 struct msghdr
*messagehdr
, int flags
, int *Error
)
678 Result
= WSASendTo(sock
->fd
, messagehdr
->msg_iov
,
679 messagehdr
->msg_iovlen
, &BytesSent
,
680 Flags
, (SOCKADDR
*)&messagehdr
->to_addr
,
681 messagehdr
->to_addr_len
, (LPWSAOVERLAPPED
)lpo
,
684 total_sent
= (int)BytesSent
;
686 /* Check for errors.*/
687 if (Result
== SOCKET_ERROR
) {
688 *Error
= WSAGetLastError();
691 case WSA_IO_INCOMPLETE
:
692 case WSA_WAIT_IO_COMPLETION
:
694 case NO_ERROR
: /* Strange, but okay */
695 sock
->pending_iocp
++;
696 sock
->pending_send
++;
704 sock
->pending_iocp
++;
705 sock
->pending_send
++;
715 queue_receive_request(isc_socket_t
*sock
) {
722 IoCompletionInfo
*lpo
;
723 isc_result_t isc_result
;
726 * If we already have a receive pending, do nothing.
728 if (sock
->pending_recv
> 0)
732 * If no one is waiting, do nothing.
734 if (ISC_LIST_EMPTY(sock
->recv_list
))
737 INSIST(sock
->recvbuf
.remaining
== 0);
738 INSIST(sock
->fd
!= INVALID_SOCKET
);
740 iov
[0].len
= sock
->recvbuf
.len
;
741 iov
[0].buf
= sock
->recvbuf
.base
;
743 lpo
= (IoCompletionInfo
*)HeapAlloc(hHeapHandle
,
745 sizeof(IoCompletionInfo
));
746 RUNTIME_CHECK(lpo
!= NULL
);
747 lpo
->request_type
= SOCKET_RECV
;
749 sock
->recvbuf
.from_addr_len
= sizeof(sock
->recvbuf
.from_addr
);
752 Result
= WSARecvFrom((SOCKET
)sock
->fd
, iov
, 1,
754 (SOCKADDR
*)&sock
->recvbuf
.from_addr
,
755 &sock
->recvbuf
.from_addr_len
,
756 (LPWSAOVERLAPPED
)lpo
, NULL
);
758 /* Check for errors. */
759 if (Result
== SOCKET_ERROR
) {
760 Error
= WSAGetLastError();
764 sock
->pending_iocp
++;
765 sock
->pending_recv
++;
769 isc_result
= isc__errno2result(Error
);
770 if (isc_result
== ISC_R_UNEXPECTED
)
771 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
772 "WSARecvFrom: Windows error code: %d, isc result %d",
774 send_recvdone_abort(sock
, isc_result
);
779 * The recv() finished immediately, but we will still get
780 * a completion event. Rather than duplicate code, let
781 * that thread handle sending the data along its way.
783 sock
->pending_iocp
++;
784 sock
->pending_recv
++;
787 socket_log(__LINE__
, sock
, NULL
, IOEVENT
,
788 isc_msgcat
, ISC_MSGSET_SOCKET
,
790 "queue_io_request: fd %d result %d error %d",
791 sock
->fd
, Result
, Error
);
797 manager_log(isc_socketmgr_t
*sockmgr
, isc_logcategory_t
*category
,
798 isc_logmodule_t
*module
, int level
, const char *fmt
, ...)
803 if (!isc_log_wouldlog(isc_lctx
, level
))
807 vsnprintf(msgbuf
, sizeof(msgbuf
), fmt
, ap
);
810 isc_log_write(isc_lctx
, category
, module
, level
,
811 "sockmgr %p: %s", sockmgr
, msgbuf
);
815 socket_log(int lineno
, isc_socket_t
*sock
, isc_sockaddr_t
*address
,
816 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
817 isc_msgcat_t
*msgcat
, int msgset
, int message
,
818 const char *fmt
, ...)
825 if (!isc_log_wouldlog(isc_lctx
, level
))
829 vsnprintf(msgbuf
, sizeof(msgbuf
), fmt
, ap
);
832 if (address
== NULL
) {
833 isc_log_iwrite(isc_lctx
, category
, module
, level
,
834 msgcat
, msgset
, message
,
835 "socket %p line %d: %s", sock
, lineno
, msgbuf
);
837 isc_sockaddr_format(address
, peerbuf
, sizeof(peerbuf
));
838 isc_log_iwrite(isc_lctx
, category
, module
, level
,
839 msgcat
, msgset
, message
,
840 "socket %p line %d peer %s: %s", sock
, lineno
,
847 * Make an fd SOCKET non-blocking.
850 make_nonblock(SOCKET fd
) {
852 unsigned long flags
= 1;
853 char strbuf
[ISC_STRERRORSIZE
];
855 /* Set the socket to non-blocking */
856 ret
= ioctlsocket(fd
, FIONBIO
, &flags
);
859 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
860 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
861 "ioctlsocket(%d, FIOBIO, %d): %s",
864 return (ISC_R_UNEXPECTED
);
867 return (ISC_R_SUCCESS
);
871 * Windows 2000 systems incorrectly cause UDP sockets using WASRecvFrom
872 * to not work correctly, returning a WSACONNRESET error when a WSASendTo
873 * fails with an "ICMP port unreachable" response and preventing the
874 * socket from using the WSARecvFrom in subsequent operations.
875 * The function below fixes this, but requires that Windows 2000
876 * Service Pack 2 or later be installed on the system. NT 4.0
877 * systems are not affected by this and work correctly.
878 * See Microsoft Knowledge Base Article Q263823 for details of this.
881 connection_reset_fix(SOCKET fd
) {
882 DWORD dwBytesReturned
= 0;
883 BOOL bNewBehavior
= FALSE
;
886 if (isc_win32os_majorversion() < 5)
887 return (ISC_R_SUCCESS
); /* NT 4.0 has no problem */
889 /* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */
890 status
= WSAIoctl(fd
, SIO_UDP_CONNRESET
, &bNewBehavior
,
891 sizeof(bNewBehavior
), NULL
, 0,
892 &dwBytesReturned
, NULL
, NULL
);
893 if (status
!= SOCKET_ERROR
)
894 return (ISC_R_SUCCESS
);
896 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
897 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s",
898 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
899 ISC_MSG_FAILED
, "failed"));
900 return (ISC_R_UNEXPECTED
);
905 * Construct an iov array and attach it to the msghdr passed in. This is
906 * the SEND constructor, which will use the used region of the buffer
907 * (if using a buffer list) or will use the internal region (if a single
908 * buffer I/O is requested).
910 * Nothing can be NULL, and the done event must list at least one buffer
911 * on the buffer linked list for this function to be meaningful.
914 build_msghdr_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
,
915 struct msghdr
*msg
, char *cmsg
, WSABUF
*iov
,
916 IoCompletionInfo
*lpo
)
918 unsigned int iovcount
;
919 isc_buffer_t
*buffer
;
925 memset(msg
, 0, sizeof(*msg
));
927 memcpy(&msg
->to_addr
, &dev
->address
.type
, dev
->address
.length
);
928 msg
->to_addr_len
= dev
->address
.length
;
930 buffer
= ISC_LIST_HEAD(dev
->bufferlist
);
935 * Single buffer I/O? Skip what we've done so far in this region.
937 if (buffer
== NULL
) {
938 write_count
= dev
->region
.length
- dev
->n
;
939 cpbuffer
= HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
, sizeof(buflist_t
));
940 RUNTIME_CHECK(cpbuffer
!= NULL
);
941 cpbuffer
->buf
= HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
, write_count
);
942 RUNTIME_CHECK(cpbuffer
->buf
!= NULL
);
944 socket_log(__LINE__
, sock
, NULL
, TRACE
,
945 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
946 "alloc_buffer %p %d %p %d", cpbuffer
, sizeof(buflist_t
),
947 cpbuffer
->buf
, write_count
);
949 memcpy(cpbuffer
->buf
,(dev
->region
.base
+ dev
->n
), write_count
);
950 cpbuffer
->buflen
= write_count
;
951 ISC_LIST_ENQUEUE(lpo
->bufferlist
, cpbuffer
, link
);
952 iov
[0].buf
= cpbuffer
->buf
;
953 iov
[0].len
= write_count
;
961 * Skip the data in the buffer list that we have already written.
964 while (buffer
!= NULL
) {
965 REQUIRE(ISC_BUFFER_VALID(buffer
));
966 if (skip_count
< isc_buffer_usedlength(buffer
))
968 skip_count
-= isc_buffer_usedlength(buffer
);
969 buffer
= ISC_LIST_NEXT(buffer
, link
);
972 while (buffer
!= NULL
) {
973 INSIST(iovcount
< MAXSCATTERGATHER_SEND
);
975 isc_buffer_usedregion(buffer
, &used
);
977 if (used
.length
> 0) {
978 int uselen
= used
.length
- skip_count
;
979 cpbuffer
= HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
, sizeof(buflist_t
));
980 RUNTIME_CHECK(cpbuffer
!= NULL
);
981 cpbuffer
->buf
= HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
, uselen
);
982 RUNTIME_CHECK(cpbuffer
->buf
!= NULL
);
984 socket_log(__LINE__
, sock
, NULL
, TRACE
,
985 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
986 "alloc_buffer %p %d %p %d", cpbuffer
, sizeof(buflist_t
),
987 cpbuffer
->buf
, write_count
);
989 memcpy(cpbuffer
->buf
,(used
.base
+ skip_count
), uselen
);
990 cpbuffer
->buflen
= uselen
;
991 iov
[iovcount
].buf
= cpbuffer
->buf
;
992 iov
[iovcount
].len
= used
.length
- skip_count
;
993 write_count
+= uselen
;
997 buffer
= ISC_LIST_NEXT(buffer
, link
);
1000 INSIST(skip_count
== 0);
1004 msg
->msg_iovlen
= iovcount
;
1005 msg
->msg_totallen
= write_count
;
1009 set_dev_address(isc_sockaddr_t
*address
, isc_socket_t
*sock
,
1010 isc_socketevent_t
*dev
)
1012 if (sock
->type
== isc_sockettype_udp
) {
1013 if (address
!= NULL
)
1014 dev
->address
= *address
;
1016 dev
->address
= sock
->address
;
1017 } else if (sock
->type
== isc_sockettype_tcp
) {
1018 INSIST(address
== NULL
);
1019 dev
->address
= sock
->address
;
1024 destroy_socketevent(isc_event_t
*event
) {
1025 isc_socketevent_t
*ev
= (isc_socketevent_t
*)event
;
1027 INSIST(ISC_LIST_EMPTY(ev
->bufferlist
));
1029 (ev
->destroy
)(event
);
1032 static isc_socketevent_t
*
1033 allocate_socketevent(isc_socket_t
*sock
, isc_eventtype_t eventtype
,
1034 isc_taskaction_t action
, const void *arg
)
1036 isc_socketevent_t
*ev
;
1038 ev
= (isc_socketevent_t
*)isc_event_allocate(sock
->manager
->mctx
,
1045 ev
->result
= ISC_R_IOERROR
; // XXXMLG temporary change to detect failure to set
1046 ISC_LINK_INIT(ev
, ev_link
);
1047 ISC_LIST_INIT(ev
->bufferlist
);
1048 ev
->region
.base
= NULL
;
1052 ev
->destroy
= ev
->ev_destroy
;
1053 ev
->ev_destroy
= destroy_socketevent
;
1058 #if defined(ISC_SOCKET_DEBUG)
1060 dump_msg(struct msghdr
*msg
, isc_socket_t
*sock
) {
1063 printf("MSGHDR %p, Socket #: %u\n", msg
, sock
->fd
);
1064 printf("\tname %p, namelen %d\n", msg
->msg_name
, msg
->msg_namelen
);
1065 printf("\tiov %p, iovlen %d\n", msg
->msg_iov
, msg
->msg_iovlen
);
1066 for (i
= 0; i
< (unsigned int)msg
->msg_iovlen
; i
++)
1067 printf("\t\t%d\tbase %p, len %d\n", i
,
1068 msg
->msg_iov
[i
].buf
,
1069 msg
->msg_iov
[i
].len
);
1074 * map the error code
1077 map_socket_error(isc_socket_t
*sock
, int windows_errno
, int *isc_errno
,
1078 char *errorstring
, size_t bufsize
) {
1081 switch (windows_errno
) {
1082 case WSAECONNREFUSED
:
1083 *isc_errno
= ISC_R_CONNREFUSED
;
1084 if (sock
->connected
)
1085 doreturn
= DOIO_HARD
;
1087 doreturn
= DOIO_SOFT
;
1089 case WSAENETUNREACH
:
1090 case ERROR_NETWORK_UNREACHABLE
:
1091 *isc_errno
= ISC_R_NETUNREACH
;
1092 if (sock
->connected
)
1093 doreturn
= DOIO_HARD
;
1095 doreturn
= DOIO_SOFT
;
1097 case ERROR_PORT_UNREACHABLE
:
1098 case ERROR_HOST_UNREACHABLE
:
1099 case WSAEHOSTUNREACH
:
1100 *isc_errno
= ISC_R_HOSTUNREACH
;
1101 if (sock
->connected
)
1102 doreturn
= DOIO_HARD
;
1104 doreturn
= DOIO_SOFT
;
1107 *isc_errno
= ISC_R_NETDOWN
;
1108 if (sock
->connected
)
1109 doreturn
= DOIO_HARD
;
1111 doreturn
= DOIO_SOFT
;
1114 *isc_errno
= ISC_R_HOSTDOWN
;
1115 if (sock
->connected
)
1116 doreturn
= DOIO_HARD
;
1118 doreturn
= DOIO_SOFT
;
1121 *isc_errno
= ISC_R_NOPERM
;
1122 if (sock
->connected
)
1123 doreturn
= DOIO_HARD
;
1125 doreturn
= DOIO_SOFT
;
1129 case WSAECONNABORTED
:
1131 *isc_errno
= ISC_R_CONNECTIONRESET
;
1132 if (sock
->connected
)
1133 doreturn
= DOIO_HARD
;
1135 doreturn
= DOIO_SOFT
;
1138 *isc_errno
= ISC_R_NOTCONNECTED
;
1139 if (sock
->connected
)
1140 doreturn
= DOIO_HARD
;
1142 doreturn
= DOIO_SOFT
;
1144 case ERROR_OPERATION_ABORTED
:
1145 case ERROR_CONNECTION_ABORTED
:
1146 case ERROR_REQUEST_ABORTED
:
1147 *isc_errno
= ISC_R_CONNECTIONRESET
;
1148 doreturn
= DOIO_HARD
;
1151 *isc_errno
= ISC_R_NORESOURCES
;
1152 doreturn
= DOIO_HARD
;
1154 case WSAEAFNOSUPPORT
:
1155 *isc_errno
= ISC_R_FAMILYNOSUPPORT
;
1156 doreturn
= DOIO_HARD
;
1158 case WSAEADDRNOTAVAIL
:
1159 *isc_errno
= ISC_R_ADDRNOTAVAIL
;
1160 doreturn
= DOIO_HARD
;
1162 case WSAEDESTADDRREQ
:
1163 *isc_errno
= ISC_R_BADADDRESSFORM
;
1164 doreturn
= DOIO_HARD
;
1166 case ERROR_NETNAME_DELETED
:
1167 *isc_errno
= ISC_R_NETDOWN
;
1168 doreturn
= DOIO_HARD
;
1171 *isc_errno
= ISC_R_IOERROR
;
1172 doreturn
= DOIO_HARD
;
1175 if (doreturn
== DOIO_HARD
) {
1176 isc__strerror(windows_errno
, errorstring
, bufsize
);
1182 fill_recv(isc_socket_t
*sock
, isc_socketevent_t
*dev
) {
1185 isc_buffer_t
*buffer
;
1187 INSIST(dev
->n
< dev
->minimum
);
1188 INSIST(sock
->recvbuf
.remaining
> 0);
1189 INSIST(sock
->pending_recv
== 0);
1191 if (sock
->type
== isc_sockettype_udp
) {
1192 dev
->address
.length
= sock
->recvbuf
.from_addr_len
;
1193 memcpy(&dev
->address
.type
, &sock
->recvbuf
.from_addr
,
1194 sock
->recvbuf
.from_addr_len
);
1195 if (isc_sockaddr_getport(&dev
->address
) == 0) {
1196 if (isc_log_wouldlog(isc_lctx
, IOEVENT_LEVEL
)) {
1197 socket_log(__LINE__
, sock
, &dev
->address
, IOEVENT
,
1198 isc_msgcat
, ISC_MSGSET_SOCKET
,
1200 "dropping source port zero packet");
1202 sock
->recvbuf
.remaining
= 0;
1205 } else if (sock
->type
== isc_sockettype_tcp
) {
1206 dev
->address
= sock
->address
;
1210 * Run through the list of buffers we were given, and find the
1211 * first one with space. Once it is found, loop through, filling
1212 * the buffers as much as possible.
1214 buffer
= ISC_LIST_HEAD(dev
->bufferlist
);
1215 if (buffer
!= NULL
) { // Multi-buffer receive
1216 while (buffer
!= NULL
&& sock
->recvbuf
.remaining
> 0) {
1217 REQUIRE(ISC_BUFFER_VALID(buffer
));
1218 if (isc_buffer_availablelength(buffer
) > 0) {
1219 isc_buffer_availableregion(buffer
, &r
);
1220 copylen
= min(r
.length
, sock
->recvbuf
.remaining
);
1221 memcpy(r
.base
, sock
->recvbuf
.consume_position
, copylen
);
1222 sock
->recvbuf
.consume_position
+= copylen
;
1223 sock
->recvbuf
.remaining
-= copylen
;
1224 isc_buffer_add(buffer
, copylen
);
1227 buffer
= ISC_LIST_NEXT(buffer
, link
);
1229 } else { // Single-buffer receive
1230 copylen
= min(dev
->region
.length
- dev
->n
, sock
->recvbuf
.remaining
);
1231 memcpy(dev
->region
.base
+ dev
->n
, sock
->recvbuf
.consume_position
, copylen
);
1232 sock
->recvbuf
.consume_position
+= copylen
;
1233 sock
->recvbuf
.remaining
-= copylen
;
1238 * UDP receives are all-consuming. That is, if we have 4k worth of
1239 * data in our receive buffer, and the caller only gave us
1240 * 1k of space, we will toss the remaining 3k of data. TCP
1241 * will keep the extra data around and use it for later requests.
1243 if (sock
->type
== isc_sockettype_udp
)
1244 sock
->recvbuf
.remaining
= 0;
1248 * Copy out as much data from the internal buffer to done events.
1249 * As each done event is filled, send it along its way.
1252 completeio_recv(isc_socket_t
*sock
)
1254 isc_socketevent_t
*dev
;
1257 * If we are in the process of filling our buffer, we cannot
1258 * touch it yet, so don't.
1260 if (sock
->pending_recv
> 0)
1263 while (sock
->recvbuf
.remaining
> 0 && !ISC_LIST_EMPTY(sock
->recv_list
)) {
1264 dev
= ISC_LIST_HEAD(sock
->recv_list
);
1267 * See if we have sufficient data in our receive buffer
1268 * to handle this. If we do, copy out the data.
1270 fill_recv(sock
, dev
);
1273 * Did we satisfy it?
1275 if (dev
->n
>= dev
->minimum
) {
1276 dev
->result
= ISC_R_SUCCESS
;
1277 send_recvdone_event(sock
, &dev
);
1284 * DOIO_SUCCESS The operation succeeded. dev->result contains
1287 * DOIO_HARD A hard or unexpected I/O error was encountered.
1288 * dev->result contains the appropriate error.
1290 * DOIO_SOFT A soft I/O error was encountered. No senddone
1291 * event was sent. The operation should be retried.
1293 * No other return values are possible.
1296 completeio_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
,
1297 struct msghdr
*messagehdr
, int cc
, int send_errno
)
1299 char addrbuf
[ISC_SOCKADDR_FORMATSIZE
];
1300 char strbuf
[ISC_STRERRORSIZE
];
1302 if (send_errno
!= 0) {
1303 if (SOFT_ERROR(send_errno
))
1306 return (map_socket_error(sock
, send_errno
, &dev
->result
,
1307 strbuf
, sizeof(strbuf
)));
1310 * The other error types depend on whether or not the
1311 * socket is UDP or TCP. If it is UDP, some errors
1312 * that we expect to be fatal under TCP are merely
1313 * annoying, and are really soft errors.
1315 * However, these soft errors are still returned as
1318 isc_sockaddr_format(&dev
->address
, addrbuf
, sizeof(addrbuf
));
1319 isc__strerror(send_errno
, strbuf
, sizeof(strbuf
));
1320 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "completeio_send: %s: %s",
1322 dev
->result
= isc__errno2result(send_errno
);
1327 * If we write less than we expected, update counters, poke.
1330 if (cc
!= messagehdr
->msg_totallen
)
1334 * Exactly what we wanted to write. We're done with this
1335 * entry. Post its completion event.
1337 dev
->result
= ISC_R_SUCCESS
;
1338 return (DOIO_SUCCESS
);
1342 startio_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
, int *nbytes
,
1346 char strbuf
[ISC_STRERRORSIZE
];
1347 IoCompletionInfo
*lpo
;
1349 struct msghdr
*msghdr
;
1351 lpo
= (IoCompletionInfo
*)HeapAlloc(hHeapHandle
,
1353 sizeof(IoCompletionInfo
));
1354 RUNTIME_CHECK(lpo
!= NULL
);
1355 lpo
->request_type
= SOCKET_SEND
;
1357 msghdr
= &lpo
->messagehdr
;
1358 memset(msghdr
, 0, sizeof(struct msghdr
));
1359 ISC_LIST_INIT(lpo
->bufferlist
);
1361 build_msghdr_send(sock
, dev
, msghdr
, cmsg
, sock
->iov
, lpo
);
1363 *nbytes
= internal_sendmsg(sock
, lpo
, msghdr
, 0, send_errno
);
1367 * I/O has been initiated
1368 * completion will be through the completion port
1370 if (PENDING_ERROR(*send_errno
)) {
1371 status
= DOIO_PENDING
;
1375 if (SOFT_ERROR(*send_errno
)) {
1381 * If we got this far then something is wrong
1383 if (isc_log_wouldlog(isc_lctx
, IOEVENT_LEVEL
)) {
1384 isc__strerror(*send_errno
, strbuf
, sizeof(strbuf
));
1385 socket_log(__LINE__
, sock
, NULL
, IOEVENT
,
1386 isc_msgcat
, ISC_MSGSET_SOCKET
,
1387 ISC_MSG_INTERNALSEND
,
1388 "startio_send: internal_sendmsg(%d) %d "
1390 sock
->fd
, *nbytes
, *send_errno
, strbuf
);
1394 dev
->result
= ISC_R_SUCCESS
;
1397 _set_state(sock
, SOCK_DATA
);
1402 allocate_socket(isc_socketmgr_t
*manager
, isc_sockettype_t type
,
1403 isc_socket_t
**socketp
) {
1405 isc_result_t result
;
1407 sock
= isc_mem_get(manager
->mctx
, sizeof(*sock
));
1410 return (ISC_R_NOMEMORY
);
1413 sock
->references
= 0;
1415 sock
->manager
= manager
;
1417 sock
->fd
= INVALID_SOCKET
;
1419 ISC_LINK_INIT(sock
, link
);
1422 * set up list of readers and writers to be initially empty
1424 ISC_LIST_INIT(sock
->recv_list
);
1425 ISC_LIST_INIT(sock
->send_list
);
1426 ISC_LIST_INIT(sock
->accept_list
);
1427 sock
->connect_ev
= NULL
;
1428 sock
->pending_accept
= 0;
1429 sock
->pending_recv
= 0;
1430 sock
->pending_send
= 0;
1431 sock
->pending_iocp
= 0;
1433 sock
->connected
= 0;
1434 sock
->pending_connect
= 0;
1436 memset(sock
->name
, 0, sizeof(sock
->name
)); // zero the name field
1437 _set_state(sock
, SOCK_INITIALIZED
);
1439 sock
->recvbuf
.len
= 65536;
1440 sock
->recvbuf
.consume_position
= sock
->recvbuf
.base
;
1441 sock
->recvbuf
.remaining
= 0;
1442 sock
->recvbuf
.base
= isc_mem_get(manager
->mctx
, sock
->recvbuf
.len
); // max buffer size
1443 if (sock
->recvbuf
.base
== NULL
) {
1449 * initialize the lock
1451 result
= isc_mutex_init(&sock
->lock
);
1452 if (result
!= ISC_R_SUCCESS
) {
1454 isc_mem_put(manager
->mctx
, sock
->recvbuf
.base
, sock
->recvbuf
.len
);
1455 sock
->recvbuf
.base
= NULL
;
1459 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
1462 sock
->magic
= SOCKET_MAGIC
;
1465 return (ISC_R_SUCCESS
);
1468 isc_mem_put(manager
->mctx
, sock
, sizeof(*sock
));
1474 * Verify that the socket state is consistent.
1477 consistent(isc_socket_t
*sock
) {
1479 isc_socketevent_t
*dev
;
1480 isc_socket_newconnev_t
*nev
;
1483 isc_boolean_t crash
= ISC_FALSE
;
1485 REQUIRE(sock
->pending_iocp
== sock
->pending_recv
+ sock
->pending_send
1486 + sock
->pending_accept
+ sock
->pending_connect
);
1488 dev
= ISC_LIST_HEAD(sock
->send_list
);
1490 while (dev
!= NULL
) {
1492 dev
= ISC_LIST_NEXT(dev
, ev_link
);
1494 if (count
> sock
->pending_send
) {
1496 crash_reason
= "send_list > sock->pending_send";
1499 nev
= ISC_LIST_HEAD(sock
->accept_list
);
1501 while (nev
!= NULL
) {
1503 nev
= ISC_LIST_NEXT(nev
, ev_link
);
1505 if (count
> sock
->pending_accept
) {
1507 crash_reason
= "send_list > sock->pending_send";
1511 socket_log(__LINE__
, sock
, NULL
, CREATION
, isc_msgcat
, ISC_MSGSET_SOCKET
,
1512 ISC_MSG_DESTROYING
, "SOCKET INCONSISTENT: %s",
1515 INSIST(crash
== ISC_FALSE
);
1520 * Maybe free the socket.
1522 * This function will verify tht the socket is no longer in use in any way,
1523 * either internally or externally. This is the only place where this
1524 * check is to be made; if some bit of code believes that IT is done with
1525 * the socket (e.g., some reference counter reaches zero), it should call
1528 * When calling this function, the socket must be locked, and the manager
1531 * When this function returns, *socketp will be NULL. No tricks to try
1532 * to hold on to this pointer are allowed.
1535 maybe_free_socket(isc_socket_t
**socketp
, int lineno
) {
1536 isc_socket_t
*sock
= *socketp
;
1539 INSIST(VALID_SOCKET(sock
));
1542 if (sock
->pending_iocp
> 0
1543 || sock
->pending_recv
> 0
1544 || sock
->pending_send
> 0
1545 || sock
->pending_accept
> 0
1546 || sock
->references
> 0
1547 || sock
->pending_connect
== 1
1548 || !ISC_LIST_EMPTY(sock
->recv_list
)
1549 || !ISC_LIST_EMPTY(sock
->send_list
)
1550 || !ISC_LIST_EMPTY(sock
->accept_list
)
1551 || sock
->fd
!= INVALID_SOCKET
) {
1552 UNLOCK(&sock
->lock
);
1555 UNLOCK(&sock
->lock
);
1557 free_socket(&sock
, lineno
);
1561 free_socket(isc_socket_t
**sockp
, int lineno
) {
1562 isc_socketmgr_t
*manager
;
1563 isc_socket_t
*sock
= *sockp
;
1566 manager
= sock
->manager
;
1569 * Seems we can free the socket after all.
1571 manager
= sock
->manager
;
1572 socket_log(__LINE__
, sock
, NULL
, CREATION
, isc_msgcat
, ISC_MSGSET_SOCKET
,
1573 ISC_MSG_DESTROYING
, "freeing socket line %d fd %d lock %p semaphore %p",
1574 lineno
, sock
->fd
, &sock
->lock
, sock
->lock
.LockSemaphore
);
1577 DESTROYLOCK(&sock
->lock
);
1579 if (sock
->recvbuf
.base
!= NULL
)
1580 isc_mem_put(manager
->mctx
, sock
->recvbuf
.base
, sock
->recvbuf
.len
);
1582 LOCK(&manager
->lock
);
1583 if (ISC_LINK_LINKED(sock
, link
))
1584 ISC_LIST_UNLINK(manager
->socklist
, sock
, link
);
1585 isc_mem_put(manager
->mctx
, sock
, sizeof(*sock
));
1587 if (ISC_LIST_EMPTY(manager
->socklist
))
1588 SIGNAL(&manager
->shutdown_ok
);
1589 UNLOCK(&manager
->lock
);
1593 * Create a new 'type' socket managed by 'manager'. Events
1594 * will be posted to 'task' and when dispatched 'action' will be
1595 * called with 'arg' as the arg value. The new socket is returned
1599 isc_socket_create(isc_socketmgr_t
*manager
, int pf
, isc_sockettype_t type
,
1600 isc_socket_t
**socketp
) {
1601 isc_socket_t
*sock
= NULL
;
1602 isc_result_t result
;
1603 #if defined(USE_CMSG)
1606 #if defined(SO_RCVBUF)
1607 ISC_SOCKADDR_LEN_T optlen
;
1611 char strbuf
[ISC_STRERRORSIZE
];
1613 REQUIRE(VALID_MANAGER(manager
));
1614 REQUIRE(socketp
!= NULL
&& *socketp
== NULL
);
1615 REQUIRE(type
!= isc_sockettype_fdwatch
);
1617 result
= allocate_socket(manager
, type
, &sock
);
1618 if (result
!= ISC_R_SUCCESS
)
1623 case isc_sockettype_udp
:
1624 sock
->fd
= socket(pf
, SOCK_DGRAM
, IPPROTO_UDP
);
1625 if (sock
->fd
!= INVALID_SOCKET
) {
1626 result
= connection_reset_fix(sock
->fd
);
1627 if (result
!= ISC_R_SUCCESS
) {
1628 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
1629 "closed %d %d %d con_reset_fix_failed",
1630 sock
->pending_recv
, sock
->pending_send
,
1632 closesocket(sock
->fd
);
1633 _set_state(sock
, SOCK_CLOSED
);
1634 sock
->fd
= INVALID_SOCKET
;
1635 free_socket(&sock
, __LINE__
);
1640 case isc_sockettype_tcp
:
1641 sock
->fd
= socket(pf
, SOCK_STREAM
, IPPROTO_TCP
);
1645 if (sock
->fd
== INVALID_SOCKET
) {
1646 socket_errno
= WSAGetLastError();
1647 free_socket(&sock
, __LINE__
);
1649 switch (socket_errno
) {
1652 return (ISC_R_NORESOURCES
);
1654 case WSAEPROTONOSUPPORT
:
1655 case WSAEPFNOSUPPORT
:
1656 case WSAEAFNOSUPPORT
:
1657 return (ISC_R_FAMILYNOSUPPORT
);
1660 isc__strerror(socket_errno
, strbuf
, sizeof(strbuf
));
1661 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1663 isc_msgcat_get(isc_msgcat
,
1668 return (ISC_R_UNEXPECTED
);
1672 result
= make_nonblock(sock
->fd
);
1673 if (result
!= ISC_R_SUCCESS
) {
1674 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
1675 "closed %d %d %d make_nonblock_failed",
1676 sock
->pending_recv
, sock
->pending_send
,
1678 closesocket(sock
->fd
);
1679 sock
->fd
= INVALID_SOCKET
;
1680 free_socket(&sock
, __LINE__
);
1685 #if defined(USE_CMSG) || defined(SO_RCVBUF)
1686 if (type
== isc_sockettype_udp
) {
1688 #if defined(USE_CMSG)
1689 #if defined(ISC_PLATFORM_HAVEIPV6)
1690 #ifdef IPV6_RECVPKTINFO
1692 if ((pf
== AF_INET6
)
1693 && (setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
,
1694 (void *)&on
, sizeof(on
)) < 0)) {
1695 isc__strerror(WSAGetLastError(), strbuf
, sizeof(strbuf
));
1696 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1697 "setsockopt(%d, IPV6_RECVPKTINFO) "
1699 isc_msgcat_get(isc_msgcat
,
1707 if ((pf
== AF_INET6
)
1708 && (setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_PKTINFO
,
1709 (void *)&on
, sizeof(on
)) < 0)) {
1710 isc__strerror(WSAGetLastError(), strbuf
, sizeof(strbuf
));
1711 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1712 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1714 isc_msgcat_get(isc_msgcat
,
1720 #endif /* IPV6_RECVPKTINFO */
1721 #ifdef IPV6_USE_MIN_MTU /*2292bis, not too common yet*/
1722 /* use minimum MTU */
1723 if (pf
== AF_INET6
) {
1724 (void)setsockopt(sock
->fd
, IPPROTO_IPV6
,
1726 (void *)&on
, sizeof(on
));
1729 #endif /* ISC_PLATFORM_HAVEIPV6 */
1730 #endif /* defined(USE_CMSG) */
1732 #if defined(SO_RCVBUF)
1733 optlen
= sizeof(size
);
1734 if (getsockopt(sock
->fd
, SOL_SOCKET
, SO_RCVBUF
,
1735 (void *)&size
, &optlen
) >= 0 &&
1736 size
< RCVBUFSIZE
) {
1738 (void)setsockopt(sock
->fd
, SOL_SOCKET
, SO_RCVBUF
,
1739 (void *)&size
, sizeof(size
));
1744 #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */
1746 _set_state(sock
, SOCK_OPEN
);
1747 sock
->references
= 1;
1750 iocompletionport_update(sock
);
1753 * Note we don't have to lock the socket like we normally would because
1754 * there are no external references to it yet.
1756 LOCK(&manager
->lock
);
1757 ISC_LIST_APPEND(manager
->socklist
, sock
, link
);
1758 InterlockedIncrement(&manager
->totalSockets
);
1759 UNLOCK(&manager
->lock
);
1761 socket_log(__LINE__
, sock
, NULL
, CREATION
, isc_msgcat
, ISC_MSGSET_SOCKET
,
1762 ISC_MSG_CREATED
, "created %u type %u", sock
->fd
, type
);
1764 return (ISC_R_SUCCESS
);
1768 isc_socket_open(isc_socket_t
*sock
) {
1769 REQUIRE(VALID_SOCKET(sock
));
1770 REQUIRE(sock
->type
!= isc_sockettype_fdwatch
);
1772 return (ISC_R_NOTIMPLEMENTED
);
1776 * Attach to a socket. Caller must explicitly detach when it is done.
1779 isc_socket_attach(isc_socket_t
*sock
, isc_socket_t
**socketp
) {
1780 REQUIRE(VALID_SOCKET(sock
));
1781 REQUIRE(socketp
!= NULL
&& *socketp
== NULL
);
1786 UNLOCK(&sock
->lock
);
1792 * Dereference a socket. If this is the last reference to it, clean things
1793 * up by destroying the socket.
1796 isc_socket_detach(isc_socket_t
**socketp
) {
1798 isc_boolean_t kill_socket
= ISC_FALSE
;
1800 REQUIRE(socketp
!= NULL
);
1802 REQUIRE(VALID_SOCKET(sock
));
1803 REQUIRE(sock
->type
!= isc_sockettype_fdwatch
);
1807 REQUIRE(sock
->references
> 0);
1810 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
1811 "detach_socket %d %d %d",
1812 sock
->pending_recv
, sock
->pending_send
,
1815 if (sock
->references
== 0 && sock
->fd
!= INVALID_SOCKET
) {
1816 closesocket(sock
->fd
);
1817 sock
->fd
= INVALID_SOCKET
;
1818 _set_state(sock
, SOCK_CLOSED
);
1821 maybe_free_socket(&sock
, __LINE__
);
1827 isc_socket_close(isc_socket_t
*sock
) {
1828 REQUIRE(VALID_SOCKET(sock
));
1829 REQUIRE(sock
->type
!= isc_sockettype_fdwatch
);
1831 return (ISC_R_NOTIMPLEMENTED
);
1835 * Dequeue an item off the given socket's read queue, set the result code
1836 * in the done event to the one provided, and send it to the task it was
1839 * If the event to be sent is on a list, remove it before sending. If
1840 * asked to, send and detach from the task as well.
1842 * Caller must have the socket locked if the event is attached to the socket.
1845 send_recvdone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
) {
1848 task
= (*dev
)->ev_sender
;
1849 (*dev
)->ev_sender
= sock
;
1851 if (ISC_LINK_LINKED(*dev
, ev_link
))
1852 ISC_LIST_DEQUEUE(sock
->recv_list
, *dev
, ev_link
);
1854 if (((*dev
)->attributes
& ISC_SOCKEVENTATTR_ATTACHED
)
1855 == ISC_SOCKEVENTATTR_ATTACHED
)
1856 isc_task_sendanddetach(&task
, (isc_event_t
**)dev
);
1858 isc_task_send(task
, (isc_event_t
**)dev
);
1864 * See comments for send_recvdone_event() above.
1867 send_senddone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
) {
1870 INSIST(dev
!= NULL
&& *dev
!= NULL
);
1872 task
= (*dev
)->ev_sender
;
1873 (*dev
)->ev_sender
= sock
;
1875 if (ISC_LINK_LINKED(*dev
, ev_link
))
1876 ISC_LIST_DEQUEUE(sock
->send_list
, *dev
, ev_link
);
1878 if (((*dev
)->attributes
& ISC_SOCKEVENTATTR_ATTACHED
)
1879 == ISC_SOCKEVENTATTR_ATTACHED
)
1880 isc_task_sendanddetach(&task
, (isc_event_t
**)dev
);
1882 isc_task_send(task
, (isc_event_t
**)dev
);
1888 * See comments for send_recvdone_event() above.
1891 send_acceptdone_event(isc_socket_t
*sock
, isc_socket_newconnev_t
**adev
) {
1894 INSIST(adev
!= NULL
&& *adev
!= NULL
);
1896 task
= (*adev
)->ev_sender
;
1897 (*adev
)->ev_sender
= sock
;
1899 if (ISC_LINK_LINKED(*adev
, ev_link
))
1900 ISC_LIST_DEQUEUE(sock
->accept_list
, *adev
, ev_link
);
1902 isc_task_sendanddetach(&task
, (isc_event_t
**)adev
);
1908 * See comments for send_recvdone_event() above.
1911 send_connectdone_event(isc_socket_t
*sock
, isc_socket_connev_t
**cdev
) {
1914 INSIST(cdev
!= NULL
&& *cdev
!= NULL
);
1916 task
= (*cdev
)->ev_sender
;
1917 (*cdev
)->ev_sender
= sock
;
1919 sock
->connect_ev
= NULL
;
1921 isc_task_sendanddetach(&task
, (isc_event_t
**)cdev
);
1927 * On entry to this function, the event delivered is the internal
1928 * readable event, and the first item on the accept_list should be
1929 * the done event we want to send. If the list is empty, this is a no-op,
1930 * so just close the new connection, unlock, and return.
1932 * Note the socket is locked before entering here
1935 internal_accept(isc_socket_t
*sock
, IoCompletionInfo
*lpo
, int accept_errno
) {
1936 isc_socket_newconnev_t
*adev
;
1937 isc_result_t result
= ISC_R_SUCCESS
;
1938 isc_socket_t
*nsock
;
1939 struct sockaddr
*localaddr
;
1940 int localaddr_len
= sizeof(*localaddr
);
1941 struct sockaddr
*remoteaddr
;
1942 int remoteaddr_len
= sizeof(*remoteaddr
);
1944 INSIST(VALID_SOCKET(sock
));
1948 socket_log(__LINE__
, sock
, NULL
, TRACE
,
1949 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
1950 "internal_accept called");
1952 INSIST(sock
->listener
);
1954 INSIST(sock
->pending_iocp
> 0);
1955 sock
->pending_iocp
--;
1956 INSIST(sock
->pending_accept
> 0);
1957 sock
->pending_accept
--;
1962 * If the event is no longer in the list we can just return.
1964 if (!acceptdone_is_active(sock
, adev
))
1967 nsock
= adev
->newsocket
;
1970 * Pull off the done event.
1972 ISC_LIST_UNLINK(sock
->accept_list
, adev
, ev_link
);
1975 * Extract the addresses from the socket, copy them into the structure,
1976 * and return the new socket.
1978 ISCGetAcceptExSockaddrs(lpo
->acceptbuffer
, 0,
1979 sizeof(SOCKADDR_STORAGE
) + 16, sizeof(SOCKADDR_STORAGE
) + 16,
1980 (LPSOCKADDR
*)&localaddr
, &localaddr_len
,
1981 (LPSOCKADDR
*)&remoteaddr
, &remoteaddr_len
);
1982 memcpy(&adev
->address
.type
, remoteaddr
, remoteaddr_len
);
1983 adev
->address
.length
= remoteaddr_len
;
1984 nsock
->address
= adev
->address
;
1985 nsock
->pf
= adev
->address
.type
.sa
.sa_family
;
1987 socket_log(__LINE__
, nsock
, &nsock
->address
, TRACE
,
1988 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
1989 "internal_accept parent %p", sock
);
1991 result
= make_nonblock(adev
->newsocket
->fd
);
1992 INSIST(result
== ISC_R_SUCCESS
);
1994 INSIST(setsockopt(nsock
->fd
, SOL_SOCKET
, SO_UPDATE_ACCEPT_CONTEXT
,
1995 (char *)&sock
->fd
, sizeof(sock
->fd
)) == 0);
1998 * Hook it up into the manager.
2001 nsock
->connected
= 1;
2002 _set_state(nsock
, SOCK_OPEN
);
2004 LOCK(&nsock
->manager
->lock
);
2005 ISC_LIST_APPEND(nsock
->manager
->socklist
, nsock
, link
);
2006 InterlockedIncrement(&nsock
->manager
->totalSockets
);
2007 UNLOCK(&nsock
->manager
->lock
);
2009 socket_log(__LINE__
, sock
, &nsock
->address
, CREATION
,
2010 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTEDCXN
,
2011 "accepted_connection new_socket %p fd %d",
2014 adev
->result
= result
;
2015 send_acceptdone_event(sock
, &adev
);
2019 UNLOCK(&sock
->lock
);
2021 HeapFree(hHeapHandle
, 0, lpo
->acceptbuffer
);
2022 lpo
->acceptbuffer
= NULL
;
2026 * Called when a socket with a pending connect() finishes.
2027 * Note that the socket is locked before entering.
2030 internal_connect(isc_socket_t
*sock
, IoCompletionInfo
*lpo
, int connect_errno
) {
2031 isc_socket_connev_t
*cdev
;
2032 char strbuf
[ISC_STRERRORSIZE
];
2034 INSIST(VALID_SOCKET(sock
));
2038 INSIST(sock
->pending_iocp
> 0);
2039 sock
->pending_iocp
--;
2040 INSIST(sock
->pending_connect
== 1);
2041 sock
->pending_connect
= 0;
2044 * Has this event been canceled?
2047 if (!connectdone_is_active(sock
, cdev
)) {
2048 sock
->pending_connect
= 0;
2049 if (sock
->fd
!= INVALID_SOCKET
) {
2050 closesocket(sock
->fd
);
2051 sock
->fd
= INVALID_SOCKET
;
2052 _set_state(sock
, SOCK_CLOSED
);
2055 UNLOCK(&sock
->lock
);
2060 * Check possible Windows network event error status here.
2062 if (connect_errno
!= 0) {
2064 * If the error is SOFT, just try again on this
2065 * fd and pretend nothing strange happened.
2067 if (SOFT_ERROR(connect_errno
) ||
2068 connect_errno
== WSAEINPROGRESS
) {
2069 sock
->pending_connect
= 1;
2071 UNLOCK(&sock
->lock
);
2076 * Translate other errors into ISC_R_* flavors.
2078 switch (connect_errno
) {
2079 #define ERROR_MATCH(a, b) case a: cdev->result = b; break;
2080 ERROR_MATCH(WSAEACCES
, ISC_R_NOPERM
);
2081 ERROR_MATCH(WSAEADDRNOTAVAIL
, ISC_R_ADDRNOTAVAIL
);
2082 ERROR_MATCH(WSAEAFNOSUPPORT
, ISC_R_ADDRNOTAVAIL
);
2083 ERROR_MATCH(WSAECONNREFUSED
, ISC_R_CONNREFUSED
);
2084 ERROR_MATCH(WSAEHOSTUNREACH
, ISC_R_HOSTUNREACH
);
2085 ERROR_MATCH(WSAEHOSTDOWN
, ISC_R_HOSTDOWN
);
2086 ERROR_MATCH(WSAENETUNREACH
, ISC_R_NETUNREACH
);
2087 ERROR_MATCH(WSAENETDOWN
, ISC_R_NETDOWN
);
2088 ERROR_MATCH(WSAENOBUFS
, ISC_R_NORESOURCES
);
2089 ERROR_MATCH(WSAECONNRESET
, ISC_R_CONNECTIONRESET
);
2090 ERROR_MATCH(WSAECONNABORTED
, ISC_R_CONNECTIONRESET
);
2091 ERROR_MATCH(WSAETIMEDOUT
, ISC_R_TIMEDOUT
);
2094 cdev
->result
= ISC_R_UNEXPECTED
;
2095 isc__strerror(connect_errno
, strbuf
, sizeof(strbuf
));
2096 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2097 "internal_connect: connect() %s",
2101 INSIST(setsockopt(sock
->fd
, SOL_SOCKET
, SO_UPDATE_CONNECT_CONTEXT
, NULL
, 0) == 0);
2102 cdev
->result
= ISC_R_SUCCESS
;
2103 sock
->connected
= 1;
2104 socket_log(__LINE__
, sock
, &sock
->address
, IOEVENT
,
2105 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTEDCXN
,
2106 "internal_connect: success");
2109 send_connectdone_event(sock
, &cdev
);
2111 UNLOCK(&sock
->lock
);
2115 * Loop through the socket, returning ISC_R_EOF for each done event pending.
2118 send_recvdone_abort(isc_socket_t
*sock
, isc_result_t result
) {
2119 isc_socketevent_t
*dev
;
2121 while (!ISC_LIST_EMPTY(sock
->recv_list
)) {
2122 dev
= ISC_LIST_HEAD(sock
->recv_list
);
2123 dev
->result
= result
;
2124 send_recvdone_event(sock
, &dev
);
2129 * Take the data we received in our private buffer, and if any recv() calls on
2130 * our list are satisfied, send the corresponding done event.
2132 * If we need more data (there are still items on the recv_list after we consume all
2133 * our data) then arrange for another system recv() call to fill our buffers.
2136 internal_recv(isc_socket_t
*sock
, int nbytes
)
2138 INSIST(VALID_SOCKET(sock
));
2143 socket_log(__LINE__
, sock
, NULL
, IOEVENT
,
2144 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_INTERNALRECV
,
2145 "internal_recv: %d bytes received", nbytes
);
2148 * If we got here, the I/O operation succeeded. However, we might still have removed this
2149 * event from our notification list (or never placed it on it due to immediate completion.)
2150 * Handle the reference counting here, and handle the cancellation event just after.
2152 INSIST(sock
->pending_iocp
> 0);
2153 sock
->pending_iocp
--;
2154 INSIST(sock
->pending_recv
> 0);
2155 sock
->pending_recv
--;
2158 * The only way we could have gotten here is that our I/O has successfully completed.
2159 * Update our pointers, and move on. The only odd case here is that we might not
2160 * have received enough data on a TCP stream to satisfy the minimum requirements. If
2161 * this is the case, we will re-issue the recv() call for what we need.
2163 * We do check for a recv() of 0 bytes on a TCP stream. This means the remote end
2166 if (nbytes
== 0 && sock
->type
== isc_sockettype_tcp
) {
2167 send_recvdone_abort(sock
, ISC_R_EOF
);
2168 maybe_free_socket(&sock
, __LINE__
);
2171 sock
->recvbuf
.remaining
= nbytes
;
2172 sock
->recvbuf
.consume_position
= sock
->recvbuf
.base
;
2173 completeio_recv(sock
);
2176 * If there are more receivers waiting for data, queue another receive
2179 queue_receive_request(sock
);
2182 * Unlock and/or destroy if we are the last thing this socket has left to do.
2184 maybe_free_socket(&sock
, __LINE__
);
2188 internal_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
,
2189 struct msghdr
*messagehdr
, int nbytes
, int send_errno
, IoCompletionInfo
*lpo
)
2194 * Find out what socket this is and lock it.
2196 INSIST(VALID_SOCKET(sock
));
2201 socket_log(__LINE__
, sock
, NULL
, IOEVENT
,
2202 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_INTERNALSEND
,
2203 "internal_send: task got socket event %p", dev
);
2205 buffer
= ISC_LIST_HEAD(lpo
->bufferlist
);
2206 while (buffer
!= NULL
) {
2207 ISC_LIST_DEQUEUE(lpo
->bufferlist
, buffer
, link
);
2209 socket_log(__LINE__
, sock
, NULL
, TRACE
,
2210 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
2211 "free_buffer %p %p", buffer
, buffer
->buf
);
2213 HeapFree(hHeapHandle
, 0, buffer
->buf
);
2214 HeapFree(hHeapHandle
, 0, buffer
);
2215 buffer
= ISC_LIST_HEAD(lpo
->bufferlist
);
2218 INSIST(sock
->pending_iocp
> 0);
2219 sock
->pending_iocp
--;
2220 INSIST(sock
->pending_send
> 0);
2221 sock
->pending_send
--;
2223 /* If the event is no longer in the list we can just return */
2224 if (!senddone_is_active(sock
, dev
))
2228 * Set the error code and send things on its way.
2230 switch (completeio_send(sock
, dev
, messagehdr
, nbytes
, send_errno
)) {
2235 send_senddone_event(sock
, &dev
);
2240 maybe_free_socket(&sock
, __LINE__
);
2244 * These return if the done event passed in is on the list (or for connect, is
2245 * the one we're waiting for. Using these ensures we will not double-send an
2248 static isc_boolean_t
2249 senddone_is_active(isc_socket_t
*sock
, isc_socketevent_t
*dev
)
2251 isc_socketevent_t
*ldev
;
2253 ldev
= ISC_LIST_HEAD(sock
->send_list
);
2254 while (ldev
!= NULL
&& ldev
!= dev
)
2255 ldev
= ISC_LIST_NEXT(ldev
, ev_link
);
2257 return (ldev
== NULL
? ISC_FALSE
: ISC_TRUE
);
2260 static isc_boolean_t
2261 acceptdone_is_active(isc_socket_t
*sock
, isc_socket_newconnev_t
*dev
)
2263 isc_socket_newconnev_t
*ldev
;
2265 ldev
= ISC_LIST_HEAD(sock
->accept_list
);
2266 while (ldev
!= NULL
&& ldev
!= dev
)
2267 ldev
= ISC_LIST_NEXT(ldev
, ev_link
);
2269 return (ldev
== NULL
? ISC_FALSE
: ISC_TRUE
);
2272 static isc_boolean_t
2273 connectdone_is_active(isc_socket_t
*sock
, isc_socket_connev_t
*dev
)
2275 return (sock
->connect_ev
== dev
? ISC_TRUE
: ISC_FALSE
);
2279 * This is the I/O Completion Port Worker Function. It loops forever
2280 * waiting for I/O to complete and then forwards them for further
2281 * processing. There are a number of these in separate threads.
2283 static isc_threadresult_t WINAPI
2284 SocketIoThread(LPVOID ThreadContext
) {
2285 isc_socketmgr_t
*manager
= ThreadContext
;
2286 BOOL bSuccess
= FALSE
;
2288 IoCompletionInfo
*lpo
= NULL
;
2289 isc_socket_t
*sock
= NULL
;
2291 struct msghdr
*messagehdr
= NULL
;
2293 char strbuf
[ISC_STRERRORSIZE
];
2296 REQUIRE(VALID_MANAGER(manager
));
2299 * Set the thread priority high enough so I/O will
2300 * preempt normal recv packet processing, but not
2301 * higher than the timer sync thread.
2303 if (!SetThreadPriority(GetCurrentThread(),
2304 THREAD_PRIORITY_ABOVE_NORMAL
)) {
2305 errval
= GetLastError();
2306 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
2307 FATAL_ERROR(__FILE__
, __LINE__
,
2308 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
2310 "Can't set thread priority: %s"),
2315 * Loop forever waiting on I/O Completions and then processing them
2318 bSuccess
= GetQueuedCompletionStatus(manager
->hIoCompletionPort
,
2319 &nbytes
, (LPDWORD
)&sock
,
2320 (LPWSAOVERLAPPED
*)&lpo
,
2322 if (lpo
== NULL
) /* Received request to exit */
2325 REQUIRE(VALID_SOCKET(sock
));
2327 request
= lpo
->request_type
;
2331 isc_result_t isc_result
;
2334 * Did the I/O operation complete?
2336 errstatus
= WSAGetLastError();
2337 isc_result
= isc__errno2resultx(errstatus
, __FILE__
, __LINE__
);
2343 INSIST(sock
->pending_iocp
> 0);
2344 sock
->pending_iocp
--;
2345 INSIST(sock
->pending_recv
> 0);
2346 sock
->pending_recv
--;
2347 send_recvdone_abort(sock
, isc_result
);
2348 if (isc_result
== ISC_R_UNEXPECTED
) {
2349 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2350 "SOCKET_RECV: Windows error code: %d, returning ISC error %d",
2351 errstatus
, isc_result
);
2356 INSIST(sock
->pending_iocp
> 0);
2357 sock
->pending_iocp
--;
2358 INSIST(sock
->pending_send
> 0);
2359 sock
->pending_send
--;
2360 if (senddone_is_active(sock
, lpo
->dev
)) {
2361 lpo
->dev
->result
= isc_result
;
2362 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2364 send_senddone_event(sock
, &lpo
->dev
);
2369 INSIST(sock
->pending_iocp
> 0);
2370 sock
->pending_iocp
--;
2371 INSIST(sock
->pending_accept
> 0);
2372 sock
->pending_accept
--;
2373 if (acceptdone_is_active(sock
, lpo
->adev
)) {
2374 closesocket(lpo
->adev
->newsocket
->fd
);
2375 lpo
->adev
->newsocket
->fd
= INVALID_SOCKET
;
2376 lpo
->adev
->newsocket
->references
--;
2377 free_socket(&lpo
->adev
->newsocket
, __LINE__
);
2378 lpo
->adev
->result
= isc_result
;
2379 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2381 send_acceptdone_event(sock
, &lpo
->adev
);
2385 case SOCKET_CONNECT
:
2386 INSIST(sock
->pending_iocp
> 0);
2387 sock
->pending_iocp
--;
2388 INSIST(sock
->pending_connect
== 1);
2389 sock
->pending_connect
= 0;
2390 if (connectdone_is_active(sock
, lpo
->cdev
)) {
2391 lpo
->cdev
->result
= isc_result
;
2392 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2393 "canceled_connect");
2394 send_connectdone_event(sock
, &lpo
->cdev
);
2398 maybe_free_socket(&sock
, __LINE__
);
2401 HeapFree(hHeapHandle
, 0, lpo
);
2405 messagehdr
= &lpo
->messagehdr
;
2409 internal_recv(sock
, nbytes
);
2412 internal_send(sock
, lpo
->dev
, messagehdr
, nbytes
, errstatus
, lpo
);
2415 internal_accept(sock
, lpo
, errstatus
);
2417 case SOCKET_CONNECT
:
2418 internal_connect(sock
, lpo
, errstatus
);
2423 HeapFree(hHeapHandle
, 0, lpo
);
2427 * Exit Completion Port Thread
2429 manager_log(manager
, TRACE
,
2430 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2431 ISC_MSG_EXITING
, "SocketIoThread exiting"));
2432 return ((isc_threadresult_t
)0);
2436 * Create a new socket manager.
2439 isc_socketmgr_create(isc_mem_t
*mctx
, isc_socketmgr_t
**managerp
) {
2440 return (isc_socketmgr_create2(mctx
, managerp
, 0));
2444 isc_socketmgr_create2(isc_mem_t
*mctx
, isc_socketmgr_t
**managerp
,
2445 unsigned int maxsocks
)
2447 isc_socketmgr_t
*manager
;
2448 isc_result_t result
;
2450 REQUIRE(managerp
!= NULL
&& *managerp
== NULL
);
2453 return (ISC_R_NOTIMPLEMENTED
);
2455 manager
= isc_mem_get(mctx
, sizeof(*manager
));
2456 if (manager
== NULL
)
2457 return (ISC_R_NOMEMORY
);
2461 manager
->magic
= SOCKET_MANAGER_MAGIC
;
2462 manager
->mctx
= NULL
;
2463 manager
->stats
= NULL
;
2464 ISC_LIST_INIT(manager
->socklist
);
2465 result
= isc_mutex_init(&manager
->lock
);
2466 if (result
!= ISC_R_SUCCESS
) {
2467 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2470 if (isc_condition_init(&manager
->shutdown_ok
) != ISC_R_SUCCESS
) {
2471 DESTROYLOCK(&manager
->lock
);
2472 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2473 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2474 "isc_condition_init() %s",
2475 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2476 ISC_MSG_FAILED
, "failed"));
2477 return (ISC_R_UNEXPECTED
);
2480 isc_mem_attach(mctx
, &manager
->mctx
);
2482 iocompletionport_init(manager
); /* Create the Completion Ports */
2484 manager
->bShutdown
= ISC_FALSE
;
2485 manager
->totalSockets
= 0;
2486 manager
->iocp_total
= 0;
2488 *managerp
= manager
;
2490 return (ISC_R_SUCCESS
);
2494 isc_socketmgr_getmaxsockets(isc_socketmgr_t
*manager
, unsigned int *nsockp
) {
2495 REQUIRE(VALID_MANAGER(manager
));
2496 REQUIRE(nsockp
!= NULL
);
2498 return (ISC_R_NOTIMPLEMENTED
);
2502 isc_socketmgr_setstats(isc_socketmgr_t
*manager
, isc_stats_t
*stats
) {
2503 REQUIRE(VALID_MANAGER(manager
));
2504 REQUIRE(ISC_LIST_EMPTY(manager
->socklist
));
2505 REQUIRE(manager
->stats
== NULL
);
2506 REQUIRE(isc_stats_ncounters(stats
) == isc_sockstatscounter_max
);
2508 isc_stats_attach(stats
, &manager
->stats
);
2512 isc_socketmgr_destroy(isc_socketmgr_t
**managerp
) {
2513 isc_socketmgr_t
*manager
;
2518 * Destroy a socket manager.
2521 REQUIRE(managerp
!= NULL
);
2522 manager
= *managerp
;
2523 REQUIRE(VALID_MANAGER(manager
));
2525 LOCK(&manager
->lock
);
2528 * Wait for all sockets to be destroyed.
2530 while (!ISC_LIST_EMPTY(manager
->socklist
)) {
2531 manager_log(manager
, CREATION
,
2532 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
2533 ISC_MSG_SOCKETSREMAIN
,
2535 WAIT(&manager
->shutdown_ok
, &manager
->lock
);
2538 UNLOCK(&manager
->lock
);
2541 * Here, we need to had some wait code for the completion port
2544 signal_iocompletionport_exit(manager
);
2545 manager
->bShutdown
= ISC_TRUE
;
2548 * Wait for threads to exit.
2550 for (i
= 0; i
< manager
->maxIOCPThreads
; i
++) {
2551 if (isc_thread_join((isc_thread_t
) manager
->hIOCPThreads
[i
],
2552 NULL
) != ISC_R_SUCCESS
)
2553 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2554 "isc_thread_join() for Completion Port %s",
2555 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2556 ISC_MSG_FAILED
, "failed"));
2562 CloseHandle(manager
->hIoCompletionPort
);
2564 (void)isc_condition_destroy(&manager
->shutdown_ok
);
2566 DESTROYLOCK(&manager
->lock
);
2567 if (manager
->stats
!= NULL
)
2568 isc_stats_detach(&manager
->stats
);
2570 mctx
= manager
->mctx
;
2571 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2573 isc_mem_detach(&mctx
);
2579 queue_receive_event(isc_socket_t
*sock
, isc_task_t
*task
, isc_socketevent_t
*dev
)
2581 isc_task_t
*ntask
= NULL
;
2583 isc_task_attach(task
, &ntask
);
2584 dev
->attributes
|= ISC_SOCKEVENTATTR_ATTACHED
;
2587 * Enqueue the request.
2589 INSIST(!ISC_LINK_LINKED(dev
, ev_link
));
2590 ISC_LIST_ENQUEUE(sock
->recv_list
, dev
, ev_link
);
2592 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2593 "queue_receive_event: event %p -> task %p",
2598 * Check the pending receive queue, and if we have data pending, give it to this
2599 * caller. If we have none, queue an I/O request. If this caller is not the first
2600 * on the list, then we will just queue this event and return.
2602 * Caller must have the socket locked.
2605 socket_recv(isc_socket_t
*sock
, isc_socketevent_t
*dev
, isc_task_t
*task
,
2609 isc_task_t
*ntask
= NULL
;
2610 isc_result_t result
= ISC_R_SUCCESS
;
2613 dev
->ev_sender
= task
;
2615 if (sock
->fd
== INVALID_SOCKET
)
2619 * Queue our event on the list of things to do. Call our function to
2620 * attempt to fill buffers as much as possible, and return done events.
2621 * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE
2622 * here and tell our caller that we could not satisfy it immediately.
2624 queue_receive_event(sock
, task
, dev
);
2625 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) != 0)
2626 result
= ISC_R_INPROGRESS
;
2628 completeio_recv(sock
);
2631 * If there are more receivers waiting for data, queue another receive
2634 queue_receive_request(sock
);
2640 isc_socket_recvv(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
2641 unsigned int minimum
, isc_task_t
*task
,
2642 isc_taskaction_t action
, const void *arg
)
2644 isc_socketevent_t
*dev
;
2645 isc_socketmgr_t
*manager
;
2646 unsigned int iocount
;
2647 isc_buffer_t
*buffer
;
2650 REQUIRE(VALID_SOCKET(sock
));
2655 * Make sure that the socket is not closed. XXXMLG change error here?
2657 if (sock
->fd
== INVALID_SOCKET
) {
2658 UNLOCK(&sock
->lock
);
2659 return (ISC_R_CONNREFUSED
);
2662 REQUIRE(buflist
!= NULL
);
2663 REQUIRE(!ISC_LIST_EMPTY(*buflist
));
2664 REQUIRE(task
!= NULL
);
2665 REQUIRE(action
!= NULL
);
2667 manager
= sock
->manager
;
2668 REQUIRE(VALID_MANAGER(manager
));
2670 iocount
= isc_bufferlist_availablecount(buflist
);
2671 REQUIRE(iocount
> 0);
2673 INSIST(sock
->bound
);
2675 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_RECVDONE
, action
, arg
);
2677 UNLOCK(&sock
->lock
);
2678 return (ISC_R_NOMEMORY
);
2682 * UDP sockets are always partial read
2684 if (sock
->type
== isc_sockettype_udp
)
2688 dev
->minimum
= iocount
;
2690 dev
->minimum
= minimum
;
2694 * Move each buffer from the passed in list to our internal one.
2696 buffer
= ISC_LIST_HEAD(*buflist
);
2697 while (buffer
!= NULL
) {
2698 ISC_LIST_DEQUEUE(*buflist
, buffer
, link
);
2699 ISC_LIST_ENQUEUE(dev
->bufferlist
, buffer
, link
);
2700 buffer
= ISC_LIST_HEAD(*buflist
);
2703 ret
= socket_recv(sock
, dev
, task
, 0);
2705 UNLOCK(&sock
->lock
);
2710 isc_socket_recv(isc_socket_t
*sock
, isc_region_t
*region
, unsigned int minimum
,
2711 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
2713 isc_socketevent_t
*dev
;
2714 isc_socketmgr_t
*manager
;
2717 REQUIRE(VALID_SOCKET(sock
));
2722 * make sure that the socket's not closed
2724 if (sock
->fd
== INVALID_SOCKET
) {
2725 UNLOCK(&sock
->lock
);
2726 return (ISC_R_CONNREFUSED
);
2728 REQUIRE(action
!= NULL
);
2730 manager
= sock
->manager
;
2731 REQUIRE(VALID_MANAGER(manager
));
2733 INSIST(sock
->bound
);
2735 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_RECVDONE
, action
, arg
);
2737 UNLOCK(&sock
->lock
);
2738 return (ISC_R_NOMEMORY
);
2741 ret
= isc_socket_recv2(sock
, region
, minimum
, task
, dev
, 0);
2742 UNLOCK(&sock
->lock
);
2747 isc_socket_recv2(isc_socket_t
*sock
, isc_region_t
*region
,
2748 unsigned int minimum
, isc_task_t
*task
,
2749 isc_socketevent_t
*event
, unsigned int flags
)
2753 REQUIRE(VALID_SOCKET(sock
));
2757 event
->result
= ISC_R_UNEXPECTED
;
2758 event
->ev_sender
= sock
;
2760 * make sure that the socket's not closed
2762 if (sock
->fd
== INVALID_SOCKET
) {
2763 UNLOCK(&sock
->lock
);
2764 return (ISC_R_CONNREFUSED
);
2767 ISC_LIST_INIT(event
->bufferlist
);
2768 event
->region
= *region
;
2771 event
->attributes
= 0;
2774 * UDP sockets are always partial read.
2776 if (sock
->type
== isc_sockettype_udp
)
2780 event
->minimum
= region
->length
;
2782 event
->minimum
= minimum
;
2785 ret
= socket_recv(sock
, event
, task
, flags
);
2786 UNLOCK(&sock
->lock
);
2791 * Caller must have the socket locked.
2794 socket_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
, isc_task_t
*task
,
2795 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
,
2801 isc_task_t
*ntask
= NULL
;
2802 isc_result_t result
= ISC_R_SUCCESS
;
2804 dev
->ev_sender
= task
;
2806 set_dev_address(address
, sock
, dev
);
2807 if (pktinfo
!= NULL
) {
2808 socket_log(__LINE__
, sock
, NULL
, TRACE
, isc_msgcat
, ISC_MSGSET_SOCKET
,
2809 ISC_MSG_PKTINFOPROVIDED
,
2810 "pktinfo structure provided, ifindex %u (set to 0)",
2811 pktinfo
->ipi6_ifindex
);
2813 dev
->attributes
|= ISC_SOCKEVENTATTR_PKTINFO
;
2814 dev
->pktinfo
= *pktinfo
;
2816 * Set the pktinfo index to 0 here, to let the kernel decide
2817 * what interface it should send on.
2819 dev
->pktinfo
.ipi6_ifindex
= 0;
2822 io_state
= startio_send(sock
, dev
, &cc
, &send_errno
);
2824 case DOIO_PENDING
: /* I/O started. Nothing more to do */
2827 * We couldn't send all or part of the request right now, so
2828 * queue it unless ISC_SOCKFLAG_NORETRY is set.
2830 if ((flags
& ISC_SOCKFLAG_NORETRY
) == 0) {
2831 isc_task_attach(task
, &ntask
);
2832 dev
->attributes
|= ISC_SOCKEVENTATTR_ATTACHED
;
2835 * Enqueue the request.
2837 INSIST(!ISC_LINK_LINKED(dev
, ev_link
));
2838 ISC_LIST_ENQUEUE(sock
->send_list
, dev
, ev_link
);
2840 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2841 "socket_send: event %p -> task %p",
2844 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) != 0)
2845 result
= ISC_R_INPROGRESS
;
2857 isc_socket_send(isc_socket_t
*sock
, isc_region_t
*region
,
2858 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
2861 * REQUIRE() checking is performed in isc_socket_sendto().
2863 return (isc_socket_sendto(sock
, region
, task
, action
, arg
, NULL
,
2868 isc_socket_sendto(isc_socket_t
*sock
, isc_region_t
*region
,
2869 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
,
2870 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
)
2872 isc_socketevent_t
*dev
;
2873 isc_socketmgr_t
*manager
;
2876 REQUIRE(VALID_SOCKET(sock
));
2877 REQUIRE(sock
->type
!= isc_sockettype_fdwatch
);
2883 * make sure that the socket's not closed
2885 if (sock
->fd
== INVALID_SOCKET
) {
2886 UNLOCK(&sock
->lock
);
2887 return (ISC_R_CONNREFUSED
);
2889 REQUIRE(region
!= NULL
);
2890 REQUIRE(task
!= NULL
);
2891 REQUIRE(action
!= NULL
);
2893 manager
= sock
->manager
;
2894 REQUIRE(VALID_MANAGER(manager
));
2896 INSIST(sock
->bound
);
2898 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_SENDDONE
, action
, arg
);
2900 UNLOCK(&sock
->lock
);
2901 return (ISC_R_NOMEMORY
);
2903 dev
->region
= *region
;
2905 ret
= socket_send(sock
, dev
, task
, address
, pktinfo
, 0);
2906 UNLOCK(&sock
->lock
);
2911 isc_socket_sendv(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
2912 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
2914 return (isc_socket_sendtov(sock
, buflist
, task
, action
, arg
, NULL
,
2919 isc_socket_sendtov(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
2920 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
,
2921 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
)
2923 isc_socketevent_t
*dev
;
2924 isc_socketmgr_t
*manager
;
2925 unsigned int iocount
;
2926 isc_buffer_t
*buffer
;
2929 REQUIRE(VALID_SOCKET(sock
));
2935 * make sure that the socket's not closed
2937 if (sock
->fd
== INVALID_SOCKET
) {
2938 UNLOCK(&sock
->lock
);
2939 return (ISC_R_CONNREFUSED
);
2941 REQUIRE(buflist
!= NULL
);
2942 REQUIRE(!ISC_LIST_EMPTY(*buflist
));
2943 REQUIRE(task
!= NULL
);
2944 REQUIRE(action
!= NULL
);
2946 manager
= sock
->manager
;
2947 REQUIRE(VALID_MANAGER(manager
));
2949 iocount
= isc_bufferlist_usedcount(buflist
);
2950 REQUIRE(iocount
> 0);
2952 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_SENDDONE
, action
, arg
);
2954 UNLOCK(&sock
->lock
);
2955 return (ISC_R_NOMEMORY
);
2959 * Move each buffer from the passed in list to our internal one.
2961 buffer
= ISC_LIST_HEAD(*buflist
);
2962 while (buffer
!= NULL
) {
2963 ISC_LIST_DEQUEUE(*buflist
, buffer
, link
);
2964 ISC_LIST_ENQUEUE(dev
->bufferlist
, buffer
, link
);
2965 buffer
= ISC_LIST_HEAD(*buflist
);
2968 ret
= socket_send(sock
, dev
, task
, address
, pktinfo
, 0);
2969 UNLOCK(&sock
->lock
);
2974 isc_socket_sendto2(isc_socket_t
*sock
, isc_region_t
*region
,
2976 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
,
2977 isc_socketevent_t
*event
, unsigned int flags
)
2981 REQUIRE(VALID_SOCKET(sock
));
2985 REQUIRE((flags
& ~(ISC_SOCKFLAG_IMMEDIATE
|ISC_SOCKFLAG_NORETRY
)) == 0);
2986 if ((flags
& ISC_SOCKFLAG_NORETRY
) != 0)
2987 REQUIRE(sock
->type
== isc_sockettype_udp
);
2988 event
->ev_sender
= sock
;
2989 event
->result
= ISC_R_UNEXPECTED
;
2991 * make sure that the socket's not closed
2993 if (sock
->fd
== INVALID_SOCKET
) {
2994 UNLOCK(&sock
->lock
);
2995 return (ISC_R_CONNREFUSED
);
2997 ISC_LIST_INIT(event
->bufferlist
);
2998 event
->region
= *region
;
3001 event
->attributes
= 0;
3003 ret
= socket_send(sock
, event
, task
, address
, pktinfo
, flags
);
3004 UNLOCK(&sock
->lock
);
3009 isc_socket_bind(isc_socket_t
*sock
, isc_sockaddr_t
*sockaddr
,
3010 unsigned int options
) {
3012 char strbuf
[ISC_STRERRORSIZE
];
3015 REQUIRE(VALID_SOCKET(sock
));
3020 * make sure that the socket's not closed
3022 if (sock
->fd
== INVALID_SOCKET
) {
3023 UNLOCK(&sock
->lock
);
3024 return (ISC_R_CONNREFUSED
);
3027 INSIST(!sock
->bound
);
3029 if (sock
->pf
!= sockaddr
->type
.sa
.sa_family
) {
3030 UNLOCK(&sock
->lock
);
3031 return (ISC_R_FAMILYMISMATCH
);
3034 * Only set SO_REUSEADDR when we want a specific port.
3036 if ((options
& ISC_SOCKET_REUSEADDRESS
) != 0 &&
3037 isc_sockaddr_getport(sockaddr
) != (in_port_t
)0 &&
3038 setsockopt(sock
->fd
, SOL_SOCKET
, SO_REUSEADDR
, (void *)&on
,
3040 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
3041 "setsockopt(%d) %s", sock
->fd
,
3042 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
3043 ISC_MSG_FAILED
, "failed"));
3046 if (bind(sock
->fd
, &sockaddr
->type
.sa
, sockaddr
->length
) < 0) {
3047 bind_errno
= WSAGetLastError();
3048 UNLOCK(&sock
->lock
);
3049 switch (bind_errno
) {
3051 return (ISC_R_NOPERM
);
3052 case WSAEADDRNOTAVAIL
:
3053 return (ISC_R_ADDRNOTAVAIL
);
3055 return (ISC_R_ADDRINUSE
);
3057 return (ISC_R_BOUND
);
3059 isc__strerror(bind_errno
, strbuf
, sizeof(strbuf
));
3060 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "bind: %s",
3062 return (ISC_R_UNEXPECTED
);
3066 socket_log(__LINE__
, sock
, sockaddr
, TRACE
,
3067 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_BOUND
, "bound");
3070 UNLOCK(&sock
->lock
);
3071 return (ISC_R_SUCCESS
);
3075 isc_socket_filter(isc_socket_t
*sock
, const char *filter
) {
3079 REQUIRE(VALID_SOCKET(sock
));
3080 return (ISC_R_NOTIMPLEMENTED
);
3084 * Set up to listen on a given socket. We do this by creating an internal
3085 * event that will be dispatched when the socket has read activity. The
3086 * watcher will send the internal event to the task when there is a new
3089 * Unlike in read, we don't preallocate a done event here. Every time there
3090 * is a new connection we'll have to allocate a new one anyway, so we might
3091 * as well keep things simple rather than having to track them.
3094 isc_socket_listen(isc_socket_t
*sock
, unsigned int backlog
) {
3095 char strbuf
[ISC_STRERRORSIZE
];
3097 REQUIRE(VALID_SOCKET(sock
));
3103 * make sure that the socket's not closed
3105 if (sock
->fd
== INVALID_SOCKET
) {
3106 UNLOCK(&sock
->lock
);
3107 return (ISC_R_CONNREFUSED
);
3110 REQUIRE(!sock
->listener
);
3111 REQUIRE(sock
->bound
);
3112 REQUIRE(sock
->type
== isc_sockettype_tcp
);
3115 backlog
= SOMAXCONN
;
3117 if (listen(sock
->fd
, (int)backlog
) < 0) {
3118 UNLOCK(&sock
->lock
);
3119 isc__strerror(WSAGetLastError(), strbuf
, sizeof(strbuf
));
3121 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "listen: %s", strbuf
);
3123 return (ISC_R_UNEXPECTED
);
3126 socket_log(__LINE__
, sock
, NULL
, TRACE
,
3127 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_BOUND
, "listening");
3129 _set_state(sock
, SOCK_LISTEN
);
3131 UNLOCK(&sock
->lock
);
3132 return (ISC_R_SUCCESS
);
3136 * This should try to do aggressive accept() XXXMLG
3139 isc_socket_accept(isc_socket_t
*sock
,
3140 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
3142 isc_socket_newconnev_t
*adev
;
3143 isc_socketmgr_t
*manager
;
3144 isc_task_t
*ntask
= NULL
;
3145 isc_socket_t
*nsock
;
3146 isc_result_t result
;
3147 IoCompletionInfo
*lpo
;
3149 REQUIRE(VALID_SOCKET(sock
));
3151 manager
= sock
->manager
;
3152 REQUIRE(VALID_MANAGER(manager
));
3158 * make sure that the socket's not closed
3160 if (sock
->fd
== INVALID_SOCKET
) {
3161 UNLOCK(&sock
->lock
);
3162 return (ISC_R_CONNREFUSED
);
3165 REQUIRE(sock
->listener
);
3168 * Sender field is overloaded here with the task we will be sending
3169 * this event to. Just before the actual event is delivered the
3170 * actual ev_sender will be touched up to be the socket.
3172 adev
= (isc_socket_newconnev_t
*)
3173 isc_event_allocate(manager
->mctx
, task
, ISC_SOCKEVENT_NEWCONN
,
3174 action
, arg
, sizeof(*adev
));
3176 UNLOCK(&sock
->lock
);
3177 return (ISC_R_NOMEMORY
);
3179 ISC_LINK_INIT(adev
, ev_link
);
3181 result
= allocate_socket(manager
, sock
->type
, &nsock
);
3182 if (result
!= ISC_R_SUCCESS
) {
3183 isc_event_free((isc_event_t
**)&adev
);
3184 UNLOCK(&sock
->lock
);
3189 * AcceptEx() requires we pass in a socket.
3191 nsock
->fd
= socket(sock
->pf
, SOCK_STREAM
, IPPROTO_TCP
);
3192 if (nsock
->fd
== INVALID_SOCKET
) {
3193 free_socket(&nsock
, __LINE__
);
3194 isc_event_free((isc_event_t
**)&adev
);
3195 UNLOCK(&sock
->lock
);
3196 return (ISC_R_FAILURE
); // XXXMLG need real error message
3200 * Attach to socket and to task.
3202 isc_task_attach(task
, &ntask
);
3203 nsock
->references
++;
3205 adev
->ev_sender
= ntask
;
3206 adev
->newsocket
= nsock
;
3207 _set_state(nsock
, SOCK_ACCEPT
);
3210 * Queue io completion for an accept().
3212 lpo
= (IoCompletionInfo
*)HeapAlloc(hHeapHandle
,
3214 sizeof(IoCompletionInfo
));
3215 RUNTIME_CHECK(lpo
!= NULL
);
3216 lpo
->acceptbuffer
= (void *)HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
,
3217 (sizeof(SOCKADDR_STORAGE
) + 16) * 2);
3218 RUNTIME_CHECK(lpo
->acceptbuffer
!= NULL
);
3221 lpo
->request_type
= SOCKET_ACCEPT
;
3223 ISCAcceptEx(sock
->fd
,
3224 nsock
->fd
, /* Accepted Socket */
3225 lpo
->acceptbuffer
, /* Buffer for initial Recv */
3226 0, /* Length of Buffer */
3227 sizeof(SOCKADDR_STORAGE
) + 16, /* Local address length + 16 */
3228 sizeof(SOCKADDR_STORAGE
) + 16, /* Remote address lengh + 16 */
3229 (LPDWORD
)&lpo
->received_bytes
, /* Bytes Recved */
3230 (LPOVERLAPPED
)lpo
/* Overlapped structure */
3232 iocompletionport_update(nsock
);
3234 socket_log(__LINE__
, sock
, NULL
, TRACE
,
3235 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_BOUND
,
3236 "accepting for nsock %p fd %d", nsock
, nsock
->fd
);
3241 ISC_LIST_ENQUEUE(sock
->accept_list
, adev
, ev_link
);
3242 sock
->pending_accept
++;
3243 sock
->pending_iocp
++;
3245 UNLOCK(&sock
->lock
);
3246 return (ISC_R_SUCCESS
);
3250 isc_socket_connect(isc_socket_t
*sock
, isc_sockaddr_t
*addr
,
3251 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
3253 char strbuf
[ISC_STRERRORSIZE
];
3254 isc_socket_connev_t
*cdev
;
3255 isc_task_t
*ntask
= NULL
;
3256 isc_socketmgr_t
*manager
;
3257 IoCompletionInfo
*lpo
;
3260 REQUIRE(VALID_SOCKET(sock
));
3261 REQUIRE(addr
!= NULL
);
3262 REQUIRE(task
!= NULL
);
3263 REQUIRE(action
!= NULL
);
3265 manager
= sock
->manager
;
3266 REQUIRE(VALID_MANAGER(manager
));
3267 REQUIRE(addr
!= NULL
);
3269 if (isc_sockaddr_ismulticast(addr
))
3270 return (ISC_R_MULTICAST
);
3276 * make sure that the socket's not closed
3278 if (sock
->fd
== INVALID_SOCKET
) {
3279 UNLOCK(&sock
->lock
);
3280 return (ISC_R_CONNREFUSED
);
3284 * Windows sockets won't connect unless the socket is bound.
3289 isc_sockaddr_anyofpf(&any
, isc_sockaddr_pf(addr
));
3290 if (bind(sock
->fd
, &any
.type
.sa
, any
.length
) < 0) {
3291 bind_errno
= WSAGetLastError();
3292 UNLOCK(&sock
->lock
);
3293 switch (bind_errno
) {
3295 return (ISC_R_NOPERM
);
3296 case WSAEADDRNOTAVAIL
:
3297 return (ISC_R_ADDRNOTAVAIL
);
3299 return (ISC_R_ADDRINUSE
);
3301 return (ISC_R_BOUND
);
3303 isc__strerror(bind_errno
, strbuf
,
3305 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
3306 "bind: %s", strbuf
);
3307 return (ISC_R_UNEXPECTED
);
3313 REQUIRE(!sock
->pending_connect
);
3315 cdev
= (isc_socket_connev_t
*)isc_event_allocate(manager
->mctx
, sock
,
3316 ISC_SOCKEVENT_CONNECT
,
3320 UNLOCK(&sock
->lock
);
3321 return (ISC_R_NOMEMORY
);
3323 ISC_LINK_INIT(cdev
, ev_link
);
3325 if (sock
->type
== isc_sockettype_tcp
) {
3327 * Queue io completion for an accept().
3329 lpo
= (IoCompletionInfo
*)HeapAlloc(hHeapHandle
,
3331 sizeof(IoCompletionInfo
));
3333 lpo
->request_type
= SOCKET_CONNECT
;
3335 sock
->address
= *addr
;
3336 ISCConnectEx(sock
->fd
, &addr
->type
.sa
, addr
->length
,
3337 NULL
, 0, NULL
, (LPOVERLAPPED
)lpo
);
3342 isc_task_attach(task
, &ntask
);
3343 cdev
->ev_sender
= ntask
;
3345 sock
->pending_connect
= 1;
3346 _set_state(sock
, SOCK_CONNECT
);
3349 * Enqueue the request.
3351 sock
->connect_ev
= cdev
;
3352 sock
->pending_iocp
++;
3354 WSAConnect(sock
->fd
, &addr
->type
.sa
, addr
->length
, NULL
, NULL
, NULL
, NULL
);
3355 cdev
->result
= ISC_R_SUCCESS
;
3356 isc_task_send(task
, (isc_event_t
**)&cdev
);
3359 UNLOCK(&sock
->lock
);
3361 return (ISC_R_SUCCESS
);
3365 isc_socket_getpeername(isc_socket_t
*sock
, isc_sockaddr_t
*addressp
) {
3366 isc_result_t result
;
3368 REQUIRE(VALID_SOCKET(sock
));
3369 REQUIRE(addressp
!= NULL
);
3375 * make sure that the socket's not closed
3377 if (sock
->fd
== INVALID_SOCKET
) {
3378 UNLOCK(&sock
->lock
);
3379 return (ISC_R_CONNREFUSED
);
3382 if (sock
->connected
) {
3383 *addressp
= sock
->address
;
3384 result
= ISC_R_SUCCESS
;
3386 result
= ISC_R_NOTCONNECTED
;
3389 UNLOCK(&sock
->lock
);
3395 isc_socket_getsockname(isc_socket_t
*sock
, isc_sockaddr_t
*addressp
) {
3396 ISC_SOCKADDR_LEN_T len
;
3397 isc_result_t result
;
3398 char strbuf
[ISC_STRERRORSIZE
];
3400 REQUIRE(VALID_SOCKET(sock
));
3401 REQUIRE(addressp
!= NULL
);
3407 * make sure that the socket's not closed
3409 if (sock
->fd
== INVALID_SOCKET
) {
3410 UNLOCK(&sock
->lock
);
3411 return (ISC_R_CONNREFUSED
);
3415 result
= ISC_R_NOTBOUND
;
3419 result
= ISC_R_SUCCESS
;
3421 len
= sizeof(addressp
->type
);
3422 if (getsockname(sock
->fd
, &addressp
->type
.sa
, (void *)&len
) < 0) {
3423 isc__strerror(WSAGetLastError(), strbuf
, sizeof(strbuf
));
3424 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "getsockname: %s",
3426 result
= ISC_R_UNEXPECTED
;
3429 addressp
->length
= (unsigned int)len
;
3432 UNLOCK(&sock
->lock
);
3438 * Run through the list of events on this socket, and cancel the ones
3439 * queued for task "task" of type "how". "how" is a bitmask.
3442 isc_socket_cancel(isc_socket_t
*sock
, isc_task_t
*task
, unsigned int how
) {
3444 REQUIRE(VALID_SOCKET(sock
));
3447 * Quick exit if there is nothing to do. Don't even bother locking
3457 * make sure that the socket's not closed
3459 if (sock
->fd
== INVALID_SOCKET
) {
3460 UNLOCK(&sock
->lock
);
3465 * All of these do the same thing, more or less.
3467 * o If the internal event is marked as "posted" try to
3468 * remove it from the task's queue. If this fails, mark it
3469 * as canceled instead, and let the task clean it up later.
3470 * o For each I/O request for that task of that type, post
3471 * its done event with status of "ISC_R_CANCELED".
3472 * o Reset any state needed.
3475 if ((how
& ISC_SOCKCANCEL_RECV
) == ISC_SOCKCANCEL_RECV
) {
3476 isc_socketevent_t
*dev
;
3477 isc_socketevent_t
*next
;
3478 isc_task_t
*current_task
;
3480 dev
= ISC_LIST_HEAD(sock
->recv_list
);
3481 while (dev
!= NULL
) {
3482 current_task
= dev
->ev_sender
;
3483 next
= ISC_LIST_NEXT(dev
, ev_link
);
3484 if ((task
== NULL
) || (task
== current_task
)) {
3485 dev
->result
= ISC_R_CANCELED
;
3486 send_recvdone_event(sock
, &dev
);
3491 how
&= ~ISC_SOCKCANCEL_RECV
;
3493 if ((how
& ISC_SOCKCANCEL_SEND
) == ISC_SOCKCANCEL_SEND
) {
3494 isc_socketevent_t
*dev
;
3495 isc_socketevent_t
*next
;
3496 isc_task_t
*current_task
;
3498 dev
= ISC_LIST_HEAD(sock
->send_list
);
3500 while (dev
!= NULL
) {
3501 current_task
= dev
->ev_sender
;
3502 next
= ISC_LIST_NEXT(dev
, ev_link
);
3503 if ((task
== NULL
) || (task
== current_task
)) {
3504 dev
->result
= ISC_R_CANCELED
;
3505 send_senddone_event(sock
, &dev
);
3510 how
&= ~ISC_SOCKCANCEL_SEND
;
3512 if (((how
& ISC_SOCKCANCEL_ACCEPT
) == ISC_SOCKCANCEL_ACCEPT
)
3513 && !ISC_LIST_EMPTY(sock
->accept_list
)) {
3514 isc_socket_newconnev_t
*dev
;
3515 isc_socket_newconnev_t
*next
;
3516 isc_task_t
*current_task
;
3518 dev
= ISC_LIST_HEAD(sock
->accept_list
);
3519 while (dev
!= NULL
) {
3520 current_task
= dev
->ev_sender
;
3521 next
= ISC_LIST_NEXT(dev
, ev_link
);
3523 if ((task
== NULL
) || (task
== current_task
)) {
3525 dev
->newsocket
->references
--;
3526 closesocket(dev
->newsocket
->fd
);
3527 dev
->newsocket
->fd
= INVALID_SOCKET
;
3528 free_socket(&dev
->newsocket
, __LINE__
);
3530 dev
->result
= ISC_R_CANCELED
;
3531 send_acceptdone_event(sock
, &dev
);
3537 how
&= ~ISC_SOCKCANCEL_ACCEPT
;
3540 * Connecting is not a list.
3542 if (((how
& ISC_SOCKCANCEL_CONNECT
) == ISC_SOCKCANCEL_CONNECT
)
3543 && sock
->connect_ev
!= NULL
) {
3544 isc_socket_connev_t
*dev
;
3545 isc_task_t
*current_task
;
3547 INSIST(sock
->pending_connect
);
3549 dev
= sock
->connect_ev
;
3550 current_task
= dev
->ev_sender
;
3552 if ((task
== NULL
) || (task
== current_task
)) {
3553 closesocket(sock
->fd
);
3554 sock
->fd
= INVALID_SOCKET
;
3555 _set_state(sock
, SOCK_CLOSED
);
3557 sock
->connect_ev
= NULL
;
3558 dev
->result
= ISC_R_CANCELED
;
3559 send_connectdone_event(sock
, &dev
);
3562 how
&= ~ISC_SOCKCANCEL_CONNECT
;
3564 maybe_free_socket(&sock
, __LINE__
);
3568 isc_socket_gettype(isc_socket_t
*sock
) {
3569 isc_sockettype_t type
;
3571 REQUIRE(VALID_SOCKET(sock
));
3576 * make sure that the socket's not closed
3578 if (sock
->fd
== INVALID_SOCKET
) {
3579 UNLOCK(&sock
->lock
);
3580 return (ISC_R_CONNREFUSED
);
3584 UNLOCK(&sock
->lock
);
3589 isc_socket_isbound(isc_socket_t
*sock
) {
3592 REQUIRE(VALID_SOCKET(sock
));
3598 * make sure that the socket's not closed
3600 if (sock
->fd
== INVALID_SOCKET
) {
3601 UNLOCK(&sock
->lock
);
3605 val
= ((sock
->bound
) ? ISC_TRUE
: ISC_FALSE
);
3606 UNLOCK(&sock
->lock
);
3612 isc_socket_ipv6only(isc_socket_t
*sock
, isc_boolean_t yes
) {
3613 #if defined(IPV6_V6ONLY)
3614 int onoff
= yes
? 1 : 0;
3619 REQUIRE(VALID_SOCKET(sock
));
3622 if (sock
->pf
== AF_INET6
) {
3623 (void)setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_V6ONLY
,
3624 (void *)&onoff
, sizeof(onoff
));
3630 isc_socket_cleanunix(isc_sockaddr_t
*addr
, isc_boolean_t active
) {
3636 isc_socket_permunix(isc_sockaddr_t
*addr
, isc_uint32_t perm
,
3637 isc_uint32_t owner
, isc_uint32_t group
)
3643 return (ISC_R_NOTIMPLEMENTED
);
3647 isc_socket_setname(isc_socket_t
*socket
, const char *name
, void *tag
) {
3653 REQUIRE(VALID_SOCKET(socket
));
3655 LOCK(&socket
->lock
);
3656 memset(socket
->name
, 0, sizeof(socket
->name
));
3657 strncpy(socket
->name
, name
, sizeof(socket
->name
) - 1);
3659 UNLOCK(&socket
->lock
);
3663 isc_socket_getname(isc_socket_t
*socket
) {
3664 return (socket
->name
);
3668 isc_socket_gettag(isc_socket_t
*socket
) {
3669 return (socket
->tag
);
3673 isc__socketmgr_setreserved(isc_socketmgr_t
*manager
, isc_uint32_t reserved
) {