1 /* $NetBSD: socket.c,v 1.10 2015/07/08 17:29:00 christos Exp $ */
4 * Copyright (C) 2004-2015 Internet Systems Consortium, Inc. ("ISC")
5 * Copyright (C) 2000-2003 Internet Software Consortium.
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
22 /* This code uses functions which are only available on Server 2003 and
23 * higher, and Windows XP and higher.
25 * This code is by nature multithreaded and takes advantage of various
26 * features to pass on information through the completion port for
27 * when I/O is completed. All sends, receives, accepts, and connects are
28 * completed through the completion port.
30 * The number of Completion Port Worker threads used is the total number
31 * of CPU's + 1. This increases the likelihood that a Worker Thread is
32 * available for processing a completed request.
34 * XXXPDM 5 August, 2002
37 #define MAKE_EXTERNAL 1
40 #include <sys/types.h>
43 #define _WINSOCKAPI_ /* Prevent inclusion of winsock.h in windows.h */
56 #include <isc/buffer.h>
57 #include <isc/bufferlist.h>
58 #include <isc/condition.h>
63 #include <isc/mutex.h>
67 #include <isc/platform.h>
68 #include <isc/print.h>
69 #include <isc/region.h>
70 #include <isc/socket.h>
71 #include <isc/stats.h>
72 #include <isc/strerror.h>
73 #include <isc/syslog.h>
75 #include <isc/thread.h>
77 #include <isc/win32os.h>
81 #include "errno2result.h"
84 * Set by the -T dscp option on the command line. If set to a value
85 * other than -1, we check to make sure DSCP values match it, and
88 int isc_dscp_check_value
= -1;
91 * How in the world can Microsoft exist with APIs like this?
92 * We can't actually call this directly, because it turns out
93 * no library exports this function. Instead, we need to
94 * issue a runtime call to get the address.
96 LPFN_CONNECTEX ISCConnectEx
;
97 LPFN_ACCEPTEX ISCAcceptEx
;
98 LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs
;
101 * Run expensive internal consistency checks.
103 #ifdef ISC_SOCKET_CONSISTENCY_CHECKS
104 #define CONSISTENT(sock) consistent(sock)
106 #define CONSISTENT(sock) do {} while (/*CONSTCOND*/0)
108 static void consistent(isc_socket_t
*sock
);
111 * Define this macro to control the behavior of connection
112 * resets on UDP sockets. See Microsoft KnowledgeBase Article Q263823
114 * NOTE: This requires that Windows 2000 systems install Service Pack 2
117 #ifndef SIO_UDP_CONNRESET
118 #define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12)
122 * Some systems define the socket length argument as an int, some as size_t,
123 * some as socklen_t. This is here so it can be easily changed if needed.
125 #ifndef ISC_SOCKADDR_LEN_T
126 #define ISC_SOCKADDR_LEN_T unsigned int
130 * Define what the possible "soft" errors can be. These are non-fatal returns
131 * of various network related functions, like recv() and so on.
133 #define SOFT_ERROR(e) ((e) == WSAEINTR || \
134 (e) == WSAEWOULDBLOCK || \
135 (e) == EWOULDBLOCK || \
141 * Pending errors are not really errors and should be
144 #define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0)
146 #define DOIO_SUCCESS 0 /* i/o ok, event sent */
147 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */
148 #define DOIO_HARD 2 /* i/o error, event sent */
149 #define DOIO_EOF 3 /* EOF, no event sent */
150 #define DOIO_PENDING 4 /* status when i/o is in process */
151 #define DOIO_NEEDMORE 5 /* IO was processed, but we need more due to minimum */
153 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
156 * DLVL(90) -- Function entry/exit and other tracing.
157 * DLVL(70) -- Socket "correctness" -- including returning of events, etc.
158 * DLVL(60) -- Socket data send/receive
159 * DLVL(50) -- Event tracing, including receiving/sending completion events.
160 * DLVL(20) -- Socket creation/destruction.
162 #define TRACE_LEVEL 90
163 #define CORRECTNESS_LEVEL 70
164 #define IOEVENT_LEVEL 60
165 #define EVENT_LEVEL 50
166 #define CREATION_LEVEL 20
168 #define TRACE DLVL(TRACE_LEVEL)
169 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL)
170 #define IOEVENT DLVL(IOEVENT_LEVEL)
171 #define EVENT DLVL(EVENT_LEVEL)
172 #define CREATION DLVL(CREATION_LEVEL)
174 typedef isc_event_t intev_t
;
180 SOCK_INITIALIZED
, /* Socket Initialized */
181 SOCK_OPEN
, /* Socket opened but nothing yet to do */
182 SOCK_DATA
, /* Socket sending or receiving data */
183 SOCK_LISTEN
, /* TCP Socket listening for connects */
184 SOCK_ACCEPT
, /* TCP socket is waiting to accept */
185 SOCK_CONNECT
, /* TCP Socket connecting */
186 SOCK_CLOSED
, /* Socket has been closed */
189 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o')
190 #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC)
193 * IPv6 control information. If the socket is an IPv6 socket we want
194 * to collect the destination address and interface so the client can
195 * set them on outgoing packets.
197 #ifdef ISC_PLATFORM_HAVEIPV6
204 * We really don't want to try and use these control messages. Win32
205 * doesn't have this mechanism before XP.
210 * Message header for recvmsg and sendmsg calls.
211 * Used value-result for recvmsg, value only for sendmsg.
214 SOCKADDR_STORAGE to_addr
; /* UDP send/recv address */
215 int to_addr_len
; /* length of the address */
216 WSABUF
*msg_iov
; /* scatter/gather array */
217 u_int msg_iovlen
; /* # elements in msg_iov */
218 void *msg_control
; /* ancillary data, see below */
219 u_int msg_controllen
; /* ancillary data buffer len */
220 u_int msg_totallen
; /* total length of this message */
224 * The size to raise the receive buffer to.
226 #define RCVBUFSIZE (32*1024)
229 * The number of times a send operation is repeated if the result
237 isc_socketmgr_t
*manager
;
239 isc_sockettype_t type
;
241 /* Pointers to scatter/gather buffers */
242 WSABUF iov
[ISC_SOCKET_MAXSCATTERGATHER
];
244 /* Locked by socket lock. */
245 ISC_LINK(isc_socket_t
) link
;
246 unsigned int references
; /* EXTERNAL references */
247 SOCKET fd
; /* file handle */
248 int pf
; /* protocol family */
253 * Each recv() call uses this buffer. It is a per-socket receive
254 * buffer that allows us to decouple the system recv() from the
255 * recv_list done events. This means the items on the recv_list
256 * can be removed without having to cancel pending system recv()
257 * calls. It also allows us to read-ahead in some cases.
260 SOCKADDR_STORAGE from_addr
; // UDP send/recv address
261 int from_addr_len
; // length of the address
262 char *base
; // the base of the buffer
263 char *consume_position
; // where to start copying data from next
264 unsigned int len
; // the actual size of this buffer
265 unsigned int remaining
; // the number of bytes remaining
268 ISC_LIST(isc_socketevent_t
) send_list
;
269 ISC_LIST(isc_socketevent_t
) recv_list
;
270 ISC_LIST(isc_socket_newconnev_t
) accept_list
;
271 isc_socket_connev_t
*connect_ev
;
273 isc_sockaddr_t address
; /* remote address */
275 unsigned int listener
: 1, /* listener socket */
277 pending_connect
: 1, /* connect pending */
278 bound
: 1, /* bound to local addr */
279 dupped
: 1; /* created by isc_socket_dup() */
280 unsigned int pending_iocp
; /* Should equal the counters below. Debug. */
281 unsigned int pending_recv
; /* Number of outstanding recv() calls. */
282 unsigned int pending_send
; /* Number of outstanding send() calls. */
283 unsigned int pending_accept
; /* Number of outstanding accept() calls. */
284 unsigned int state
; /* Socket state. Debugging and consistency checking. */
285 int state_lineno
; /* line which last touched state */
288 #define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (/*CONSTCOND*/0)
293 typedef struct buflist buflist_t
;
298 ISC_LINK(buflist_t
) link
;
302 * I/O Completion ports Info structures
305 static HANDLE hHeapHandle
= NULL
;
306 typedef struct IoCompletionInfo
{
307 OVERLAPPED overlapped
;
308 isc_socketevent_t
*dev
; /* send()/recv() done event */
309 isc_socket_connev_t
*cdev
; /* connect() done event */
310 isc_socket_newconnev_t
*adev
; /* accept() done event */
312 DWORD received_bytes
;
314 struct msghdr messagehdr
;
315 ISC_LIST(buflist_t
) bufferlist
; /*%< list of buffers */
319 * Define a maximum number of I/O Completion Port worker threads
320 * to handle the load on the Completion Port. The actual number
321 * used is the number of CPU's + 1.
323 #define MAX_IOCPTHREADS 20
325 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g')
326 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
328 struct isc_socketmgr
{
335 /* Locked by manager lock. */
336 ISC_LIST(isc_socket_t
) socklist
;
337 isc_boolean_t bShutdown
;
338 isc_condition_t shutdown_ok
;
339 HANDLE hIoCompletionPort
;
341 HANDLE hIOCPThreads
[MAX_IOCPTHREADS
];
342 DWORD dwIOCPThreadIds
[MAX_IOCPTHREADS
];
346 * Modified by InterlockedIncrement() and InterlockedDecrement()
360 * send() and recv() iovec counts
362 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
363 #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
365 static isc_result_t
socket_create(isc_socketmgr_t
*manager0
, int pf
,
366 isc_sockettype_t type
,
367 isc_socket_t
**socketp
,
368 isc_socket_t
*dup_socket
);
369 static isc_threadresult_t WINAPI
SocketIoThread(LPVOID ThreadContext
);
370 static void maybe_free_socket(isc_socket_t
**, int);
371 static void free_socket(isc_socket_t
**, int);
372 static isc_boolean_t
senddone_is_active(isc_socket_t
*sock
, isc_socketevent_t
*dev
);
373 static isc_boolean_t
acceptdone_is_active(isc_socket_t
*sock
, isc_socket_newconnev_t
*dev
);
374 static isc_boolean_t
connectdone_is_active(isc_socket_t
*sock
, isc_socket_connev_t
*dev
);
375 static void send_recvdone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
);
376 static void send_senddone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
);
377 static void send_acceptdone_event(isc_socket_t
*sock
, isc_socket_newconnev_t
**adev
);
378 static void send_connectdone_event(isc_socket_t
*sock
, isc_socket_connev_t
**cdev
);
379 static void send_recvdone_abort(isc_socket_t
*sock
, isc_result_t result
);
380 static void queue_receive_event(isc_socket_t
*sock
, isc_task_t
*task
, isc_socketevent_t
*dev
);
381 static void queue_receive_request(isc_socket_t
*sock
);
384 * This is used to dump the contents of the sock structure
385 * You should make sure that the sock is locked before
386 * dumping it. Since the code uses simple printf() statements
387 * it should only be used interactively.
390 sock_dump(isc_socket_t
*sock
) {
391 isc_socketevent_t
*ldev
;
392 isc_socket_newconnev_t
*ndev
;
396 char socktext
[ISC_SOCKADDR_FORMATSIZE
];
399 result
= isc_socket_getpeername(sock
, &addr
);
400 if (result
== ISC_R_SUCCESS
) {
401 isc_sockaddr_format(&addr
, socktext
, sizeof(socktext
));
402 printf("Remote Socket: %s\n", socktext
);
404 result
= isc_socket_getsockname(sock
, &addr
);
405 if (result
== ISC_R_SUCCESS
) {
406 isc_sockaddr_format(&addr
, socktext
, sizeof(socktext
));
407 printf("This Socket: %s\n", socktext
);
411 printf("\n\t\tSock Dump\n");
412 printf("\t\tfd: %u\n", sock
->fd
);
413 printf("\t\treferences: %d\n", sock
->references
);
414 printf("\t\tpending_accept: %d\n", sock
->pending_accept
);
415 printf("\t\tconnecting: %d\n", sock
->pending_connect
);
416 printf("\t\tconnected: %d\n", sock
->connected
);
417 printf("\t\tbound: %d\n", sock
->bound
);
418 printf("\t\tpending_iocp: %d\n", sock
->pending_iocp
);
419 printf("\t\tsocket type: %d\n", sock
->type
);
421 printf("\n\t\tSock Recv List\n");
422 ldev
= ISC_LIST_HEAD(sock
->recv_list
);
423 while (ldev
!= NULL
) {
424 printf("\t\tdev: %p\n", ldev
);
425 ldev
= ISC_LIST_NEXT(ldev
, ev_link
);
428 printf("\n\t\tSock Send List\n");
429 ldev
= ISC_LIST_HEAD(sock
->send_list
);
430 while (ldev
!= NULL
) {
431 printf("\t\tdev: %p\n", ldev
);
432 ldev
= ISC_LIST_NEXT(ldev
, ev_link
);
435 printf("\n\t\tSock Accept List\n");
436 ndev
= ISC_LIST_HEAD(sock
->accept_list
);
437 while (ndev
!= NULL
) {
438 printf("\t\tdev: %p\n", ldev
);
439 ndev
= ISC_LIST_NEXT(ndev
, ev_link
);
444 socket_log(int lineno
, isc_socket_t
*sock
, isc_sockaddr_t
*address
,
445 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
446 isc_msgcat_t
*msgcat
, int msgset
, int message
,
447 const char *fmt
, ...) ISC_FORMAT_PRINTF(9, 10);
449 /* This function will add an entry to the I/O completion port
450 * that will signal the I/O thread to exit (gracefully)
453 signal_iocompletionport_exit(isc_socketmgr_t
*manager
) {
456 char strbuf
[ISC_STRERRORSIZE
];
458 REQUIRE(VALID_MANAGER(manager
));
459 for (i
= 0; i
< manager
->maxIOCPThreads
; i
++) {
460 if (!PostQueuedCompletionStatus(manager
->hIoCompletionPort
,
462 errval
= GetLastError();
463 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
464 FATAL_ERROR(__FILE__
, __LINE__
,
465 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
467 "Can't request service thread to exit: %s"),
474 * Create the worker threads for the I/O Completion Port
477 iocompletionport_createthreads(int total_threads
, isc_socketmgr_t
*manager
) {
479 char strbuf
[ISC_STRERRORSIZE
];
482 INSIST(total_threads
> 0);
483 REQUIRE(VALID_MANAGER(manager
));
485 * We need at least one
487 for (i
= 0; i
< total_threads
; i
++) {
488 manager
->hIOCPThreads
[i
] = CreateThread(NULL
, 0, SocketIoThread
,
490 &manager
->dwIOCPThreadIds
[i
]);
491 if (manager
->hIOCPThreads
[i
] == NULL
) {
492 errval
= GetLastError();
493 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
494 FATAL_ERROR(__FILE__
, __LINE__
,
495 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
497 "Can't create IOCP thread: %s"),
505 * Create/initialise the I/O completion port
508 iocompletionport_init(isc_socketmgr_t
*manager
) {
510 char strbuf
[ISC_STRERRORSIZE
];
512 REQUIRE(VALID_MANAGER(manager
));
514 * Create a private heap to handle the socket overlapped structure
515 * The minimum number of structures is 10, there is no maximum
517 hHeapHandle
= HeapCreate(0, 10 * sizeof(IoCompletionInfo
), 0);
518 if (hHeapHandle
== NULL
) {
519 errval
= GetLastError();
520 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
521 FATAL_ERROR(__FILE__
, __LINE__
,
522 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
524 "HeapCreate() failed during "
525 "initialization: %s"),
530 manager
->maxIOCPThreads
= min(isc_os_ncpus() + 1, MAX_IOCPTHREADS
);
532 /* Now Create the Completion Port */
533 manager
->hIoCompletionPort
= CreateIoCompletionPort(
534 INVALID_HANDLE_VALUE
, NULL
,
535 0, manager
->maxIOCPThreads
);
536 if (manager
->hIoCompletionPort
== NULL
) {
537 errval
= GetLastError();
538 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
539 FATAL_ERROR(__FILE__
, __LINE__
,
540 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
542 "CreateIoCompletionPort() failed "
543 "during initialization: %s"),
549 * Worker threads for servicing the I/O
551 iocompletionport_createthreads(manager
->maxIOCPThreads
, manager
);
555 * Associate a socket with an IO Completion Port. This allows us to queue events for it
556 * and have our worker pool of threads process them.
559 iocompletionport_update(isc_socket_t
*sock
) {
561 char strbuf
[ISC_STRERRORSIZE
];
563 REQUIRE(VALID_SOCKET(sock
));
565 hiocp
= CreateIoCompletionPort((HANDLE
)sock
->fd
,
566 sock
->manager
->hIoCompletionPort
, (ULONG_PTR
)sock
, 0);
569 DWORD errval
= GetLastError();
570 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
571 isc_log_iwrite(isc_lctx
,
572 ISC_LOGCATEGORY_GENERAL
,
573 ISC_LOGMODULE_SOCKET
, ISC_LOG_ERROR
,
574 isc_msgcat
, ISC_MSGSET_SOCKET
,
575 ISC_MSG_TOOMANYHANDLES
,
576 "iocompletionport_update: failed to open"
577 " io completion port: %s",
580 /* XXXMLG temporary hack to make failures detected.
581 * This function should return errors to the caller, not
584 FATAL_ERROR(__FILE__
, __LINE__
,
585 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
587 "CreateIoCompletionPort() failed "
588 "during initialization: %s"),
593 InterlockedIncrement(&sock
->manager
->iocp_total
);
597 * Routine to cleanup and then close the socket.
598 * Only close the socket here if it is NOT associated
599 * with an event, otherwise the WSAWaitForMultipleEvents
600 * may fail due to the fact that the Wait should not
601 * be running while closing an event or a socket.
602 * The socket is locked before calling this function
605 socket_close(isc_socket_t
*sock
) {
607 REQUIRE(sock
!= NULL
);
609 if (sock
->fd
!= INVALID_SOCKET
) {
610 closesocket(sock
->fd
);
611 sock
->fd
= INVALID_SOCKET
;
612 _set_state(sock
, SOCK_CLOSED
);
613 InterlockedDecrement(&sock
->manager
->totalSockets
);
617 static isc_once_t initialise_once
= ISC_ONCE_INIT
;
618 static isc_boolean_t initialised
= ISC_FALSE
;
622 WORD wVersionRequested
;
626 GUID GUIDConnectEx
= WSAID_CONNECTEX
;
627 GUID GUIDAcceptEx
= WSAID_ACCEPTEX
;
628 GUID GUIDGetAcceptExSockaddrs
= WSAID_GETACCEPTEXSOCKADDRS
;
631 /* Need Winsock 2.2 or better */
632 wVersionRequested
= MAKEWORD(2, 2);
634 err
= WSAStartup(wVersionRequested
, &wsaData
);
636 char strbuf
[ISC_STRERRORSIZE
];
637 isc__strerror(err
, strbuf
, sizeof(strbuf
));
638 FATAL_ERROR(__FILE__
, __LINE__
, "WSAStartup() %s: %s",
639 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
640 ISC_MSG_FAILED
, "failed"),
645 * The following APIs do not exist as functions in a library, but we must
646 * ask winsock for them. They are "extensions" -- but why they cannot be
647 * actual functions is beyond me. So, ask winsock for the pointers to the
650 sock
= socket(AF_INET
, SOCK_STREAM
, IPPROTO_TCP
);
651 INSIST(sock
!= INVALID_SOCKET
);
652 err
= WSAIoctl(sock
, SIO_GET_EXTENSION_FUNCTION_POINTER
,
653 &GUIDConnectEx
, sizeof(GUIDConnectEx
),
654 &ISCConnectEx
, sizeof(ISCConnectEx
),
655 &dwBytes
, NULL
, NULL
);
658 err
= WSAIoctl(sock
, SIO_GET_EXTENSION_FUNCTION_POINTER
,
659 &GUIDAcceptEx
, sizeof(GUIDAcceptEx
),
660 &ISCAcceptEx
, sizeof(ISCAcceptEx
),
661 &dwBytes
, NULL
, NULL
);
664 err
= WSAIoctl(sock
, SIO_GET_EXTENSION_FUNCTION_POINTER
,
665 &GUIDGetAcceptExSockaddrs
, sizeof(GUIDGetAcceptExSockaddrs
),
666 &ISCGetAcceptExSockaddrs
, sizeof(ISCGetAcceptExSockaddrs
),
667 &dwBytes
, NULL
, NULL
);
672 initialised
= ISC_TRUE
;
676 * Initialize socket services
680 RUNTIME_CHECK(isc_once_do(&initialise_once
,
681 initialise
) == ISC_R_SUCCESS
);
687 internal_sendmsg(isc_socket_t
*sock
, IoCompletionInfo
*lpo
,
688 struct msghdr
*messagehdr
, int flags
, int *Error
)
696 Result
= WSASendTo(sock
->fd
, messagehdr
->msg_iov
,
697 messagehdr
->msg_iovlen
, &BytesSent
,
698 Flags
, (SOCKADDR
*)&messagehdr
->to_addr
,
699 messagehdr
->to_addr_len
, (LPWSAOVERLAPPED
)lpo
,
702 total_sent
= (int)BytesSent
;
704 /* Check for errors.*/
705 if (Result
== SOCKET_ERROR
) {
706 *Error
= WSAGetLastError();
709 case WSA_IO_INCOMPLETE
:
710 case WSA_WAIT_IO_COMPLETION
:
712 case NO_ERROR
: /* Strange, but okay */
713 sock
->pending_iocp
++;
714 sock
->pending_send
++;
722 sock
->pending_iocp
++;
723 sock
->pending_send
++;
733 queue_receive_request(isc_socket_t
*sock
) {
740 IoCompletionInfo
*lpo
= NULL
;
741 isc_result_t isc_result
;
744 need_retry
= ISC_FALSE
;
747 * If we already have a receive pending, do nothing.
749 if (sock
->pending_recv
> 0) {
751 HeapFree(hHeapHandle
, 0, lpo
);
756 * If no one is waiting, do nothing.
758 if (ISC_LIST_EMPTY(sock
->recv_list
)) {
760 HeapFree(hHeapHandle
, 0, lpo
);
764 INSIST(sock
->recvbuf
.remaining
== 0);
765 INSIST(sock
->fd
!= INVALID_SOCKET
);
767 iov
[0].len
= sock
->recvbuf
.len
;
768 iov
[0].buf
= sock
->recvbuf
.base
;
771 lpo
= (IoCompletionInfo
*)HeapAlloc(hHeapHandle
,
773 sizeof(IoCompletionInfo
));
774 RUNTIME_CHECK(lpo
!= NULL
);
776 ZeroMemory(lpo
, sizeof(IoCompletionInfo
));
777 lpo
->request_type
= SOCKET_RECV
;
779 sock
->recvbuf
.from_addr_len
= sizeof(sock
->recvbuf
.from_addr
);
782 Result
= WSARecvFrom((SOCKET
)sock
->fd
, iov
, 1,
784 (SOCKADDR
*)&sock
->recvbuf
.from_addr
,
785 &sock
->recvbuf
.from_addr_len
,
786 (LPWSAOVERLAPPED
)lpo
, NULL
);
788 /* Check for errors. */
789 if (Result
== SOCKET_ERROR
) {
790 Error
= WSAGetLastError();
794 sock
->pending_iocp
++;
795 sock
->pending_recv
++;
798 /* direct error: no completion event */
799 case ERROR_HOST_UNREACHABLE
:
802 if (!sock
->connected
) {
804 need_retry
= ISC_TRUE
;
810 isc_result
= isc__errno2result(Error
);
811 if (isc_result
== ISC_R_UNEXPECTED
)
812 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
813 "WSARecvFrom: Windows error code: %d, isc result %d",
815 send_recvdone_abort(sock
, isc_result
);
816 HeapFree(hHeapHandle
, 0, lpo
);
822 * The recv() finished immediately, but we will still get
823 * a completion event. Rather than duplicate code, let
824 * that thread handle sending the data along its way.
826 sock
->pending_iocp
++;
827 sock
->pending_recv
++;
830 socket_log(__LINE__
, sock
, NULL
, IOEVENT
,
831 isc_msgcat
, ISC_MSGSET_SOCKET
,
833 "queue_io_request: fd %d result %d error %d",
834 sock
->fd
, Result
, Error
);
843 manager_log(isc_socketmgr_t
*sockmgr
, isc_logcategory_t
*category
,
844 isc_logmodule_t
*module
, int level
, const char *fmt
, ...)
849 if (!isc_log_wouldlog(isc_lctx
, level
))
853 vsnprintf(msgbuf
, sizeof(msgbuf
), fmt
, ap
);
856 isc_log_write(isc_lctx
, category
, module
, level
,
857 "sockmgr %p: %s", sockmgr
, msgbuf
);
861 socket_log(int lineno
, isc_socket_t
*sock
, isc_sockaddr_t
*address
,
862 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
863 isc_msgcat_t
*msgcat
, int msgset
, int message
,
864 const char *fmt
, ...)
871 if (!isc_log_wouldlog(isc_lctx
, level
))
875 vsnprintf(msgbuf
, sizeof(msgbuf
), fmt
, ap
);
878 if (address
== NULL
) {
879 isc_log_iwrite(isc_lctx
, category
, module
, level
,
880 msgcat
, msgset
, message
,
881 "socket %p line %d: %s", sock
, lineno
, msgbuf
);
883 isc_sockaddr_format(address
, peerbuf
, sizeof(peerbuf
));
884 isc_log_iwrite(isc_lctx
, category
, module
, level
,
885 msgcat
, msgset
, message
,
886 "socket %p line %d peer %s: %s", sock
, lineno
,
893 * Make an fd SOCKET non-blocking.
896 make_nonblock(SOCKET fd
) {
898 unsigned long flags
= 1;
899 char strbuf
[ISC_STRERRORSIZE
];
901 /* Set the socket to non-blocking */
902 ret
= ioctlsocket(fd
, FIONBIO
, &flags
);
905 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
906 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
907 "ioctlsocket(%d, FIOBIO, %d): %s",
910 return (ISC_R_UNEXPECTED
);
913 return (ISC_R_SUCCESS
);
917 * Windows 2000 systems incorrectly cause UDP sockets using WSARecvFrom
918 * to not work correctly, returning a WSACONNRESET error when a WSASendTo
919 * fails with an "ICMP port unreachable" response and preventing the
920 * socket from using the WSARecvFrom in subsequent operations.
921 * The function below fixes this, but requires that Windows 2000
922 * Service Pack 2 or later be installed on the system. NT 4.0
923 * systems are not affected by this and work correctly.
924 * See Microsoft Knowledge Base Article Q263823 for details of this.
927 connection_reset_fix(SOCKET fd
) {
928 DWORD dwBytesReturned
= 0;
929 BOOL bNewBehavior
= FALSE
;
932 if (isc_win32os_versioncheck(5, 0, 0, 0) < 0)
933 return (ISC_R_SUCCESS
); /* NT 4.0 has no problem */
935 /* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */
936 status
= WSAIoctl(fd
, SIO_UDP_CONNRESET
, &bNewBehavior
,
937 sizeof(bNewBehavior
), NULL
, 0,
938 &dwBytesReturned
, NULL
, NULL
);
939 if (status
!= SOCKET_ERROR
)
940 return (ISC_R_SUCCESS
);
942 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
943 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s",
944 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
945 ISC_MSG_FAILED
, "failed"));
946 return (ISC_R_UNEXPECTED
);
951 * Construct an iov array and attach it to the msghdr passed in. This is
952 * the SEND constructor, which will use the used region of the buffer
953 * (if using a buffer list) or will use the internal region (if a single
954 * buffer I/O is requested).
956 * Nothing can be NULL, and the done event must list at least one buffer
957 * on the buffer linked list for this function to be meaningful.
960 build_msghdr_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
,
961 struct msghdr
*msg
, char *cmsg
, WSABUF
*iov
,
962 IoCompletionInfo
*lpo
)
964 unsigned int iovcount
;
965 isc_buffer_t
*buffer
;
971 memset(msg
, 0, sizeof(*msg
));
973 memmove(&msg
->to_addr
, &dev
->address
.type
, dev
->address
.length
);
974 msg
->to_addr_len
= dev
->address
.length
;
976 buffer
= ISC_LIST_HEAD(dev
->bufferlist
);
981 * Single buffer I/O? Skip what we've done so far in this region.
983 if (buffer
== NULL
) {
984 write_count
= dev
->region
.length
- dev
->n
;
985 cpbuffer
= HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
, sizeof(buflist_t
));
986 RUNTIME_CHECK(cpbuffer
!= NULL
);
987 cpbuffer
->buf
= HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
, write_count
);
988 RUNTIME_CHECK(cpbuffer
->buf
!= NULL
);
990 socket_log(__LINE__
, sock
, NULL
, TRACE
,
991 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
992 "alloc_buffer %p %d %p %d", cpbuffer
, sizeof(buflist_t
),
993 cpbuffer
->buf
, write_count
);
995 memmove(cpbuffer
->buf
,(dev
->region
.base
+ dev
->n
), write_count
);
996 cpbuffer
->buflen
= (unsigned int)write_count
;
997 ISC_LIST_ENQUEUE(lpo
->bufferlist
, cpbuffer
, link
);
998 iov
[0].buf
= cpbuffer
->buf
;
999 iov
[0].len
= (u_long
)write_count
;
1007 * Skip the data in the buffer list that we have already written.
1009 skip_count
= dev
->n
;
1010 while (buffer
!= NULL
) {
1011 REQUIRE(ISC_BUFFER_VALID(buffer
));
1012 if (skip_count
< isc_buffer_usedlength(buffer
))
1014 skip_count
-= isc_buffer_usedlength(buffer
);
1015 buffer
= ISC_LIST_NEXT(buffer
, link
);
1018 while (buffer
!= NULL
) {
1019 INSIST(iovcount
< MAXSCATTERGATHER_SEND
);
1021 isc_buffer_usedregion(buffer
, &used
);
1023 if (used
.length
> 0) {
1024 int uselen
= (int)(used
.length
- skip_count
);
1025 cpbuffer
= HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
, sizeof(buflist_t
));
1026 RUNTIME_CHECK(cpbuffer
!= NULL
);
1027 cpbuffer
->buf
= HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
, uselen
);
1028 RUNTIME_CHECK(cpbuffer
->buf
!= NULL
);
1030 socket_log(__LINE__
, sock
, NULL
, TRACE
,
1031 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
1032 "alloc_buffer %p %d %p %d", cpbuffer
, sizeof(buflist_t
),
1033 cpbuffer
->buf
, write_count
);
1035 memmove(cpbuffer
->buf
,(used
.base
+ skip_count
), uselen
);
1036 cpbuffer
->buflen
= uselen
;
1037 iov
[iovcount
].buf
= cpbuffer
->buf
;
1038 iov
[iovcount
].len
= (u_long
)(used
.length
- skip_count
);
1039 write_count
+= uselen
;
1043 buffer
= ISC_LIST_NEXT(buffer
, link
);
1046 INSIST(skip_count
== 0);
1050 msg
->msg_iovlen
= iovcount
;
1051 msg
->msg_totallen
= (u_int
)write_count
;
1055 set_dev_address(isc_sockaddr_t
*address
, isc_socket_t
*sock
,
1056 isc_socketevent_t
*dev
)
1058 if (sock
->type
== isc_sockettype_udp
) {
1059 if (address
!= NULL
)
1060 dev
->address
= *address
;
1062 dev
->address
= sock
->address
;
1063 } else if (sock
->type
== isc_sockettype_tcp
) {
1064 INSIST(address
== NULL
);
1065 dev
->address
= sock
->address
;
1070 destroy_socketevent(isc_event_t
*event
) {
1071 isc_socketevent_t
*ev
= (isc_socketevent_t
*)event
;
1073 INSIST(ISC_LIST_EMPTY(ev
->bufferlist
));
1075 (ev
->destroy
)(event
);
1078 static isc_socketevent_t
*
1079 allocate_socketevent(isc_mem_t
*mctx
, isc_socket_t
*sock
,
1080 isc_eventtype_t eventtype
, isc_taskaction_t action
,
1083 isc_socketevent_t
*ev
;
1085 ev
= (isc_socketevent_t
*)isc_event_allocate(mctx
, sock
, eventtype
,
1091 ev
->result
= ISC_R_IOERROR
; // XXXMLG temporary change to detect failure to set
1092 ISC_LINK_INIT(ev
, ev_link
);
1093 ISC_LIST_INIT(ev
->bufferlist
);
1094 ev
->region
.base
= NULL
;
1098 ev
->destroy
= ev
->ev_destroy
;
1099 ev
->ev_destroy
= destroy_socketevent
;
1105 #if defined(ISC_SOCKET_DEBUG)
1107 dump_msg(struct msghdr
*msg
, isc_socket_t
*sock
) {
1110 printf("MSGHDR %p, Socket #: %u\n", msg
, sock
->fd
);
1111 printf("\tname %p, namelen %d\n", msg
->msg_name
, msg
->msg_namelen
);
1112 printf("\tiov %p, iovlen %d\n", msg
->msg_iov
, msg
->msg_iovlen
);
1113 for (i
= 0; i
< (unsigned int)msg
->msg_iovlen
; i
++)
1114 printf("\t\t%u\tbase %p, len %u\n", i
,
1115 msg
->msg_iov
[i
].buf
, msg
->msg_iov
[i
].len
);
1120 * map the error code
1123 map_socket_error(isc_socket_t
*sock
, int windows_errno
, int *isc_errno
,
1124 char *errorstring
, size_t bufsize
) {
1127 switch (windows_errno
) {
1128 case WSAECONNREFUSED
:
1129 *isc_errno
= ISC_R_CONNREFUSED
;
1130 if (sock
->connected
)
1131 doreturn
= DOIO_HARD
;
1133 doreturn
= DOIO_SOFT
;
1135 case WSAENETUNREACH
:
1136 case ERROR_NETWORK_UNREACHABLE
:
1137 *isc_errno
= ISC_R_NETUNREACH
;
1138 if (sock
->connected
)
1139 doreturn
= DOIO_HARD
;
1141 doreturn
= DOIO_SOFT
;
1143 case ERROR_PORT_UNREACHABLE
:
1144 case ERROR_HOST_UNREACHABLE
:
1145 case WSAEHOSTUNREACH
:
1146 *isc_errno
= ISC_R_HOSTUNREACH
;
1147 if (sock
->connected
)
1148 doreturn
= DOIO_HARD
;
1150 doreturn
= DOIO_SOFT
;
1153 *isc_errno
= ISC_R_NETDOWN
;
1154 if (sock
->connected
)
1155 doreturn
= DOIO_HARD
;
1157 doreturn
= DOIO_SOFT
;
1160 *isc_errno
= ISC_R_HOSTDOWN
;
1161 if (sock
->connected
)
1162 doreturn
= DOIO_HARD
;
1164 doreturn
= DOIO_SOFT
;
1167 *isc_errno
= ISC_R_NOPERM
;
1168 if (sock
->connected
)
1169 doreturn
= DOIO_HARD
;
1171 doreturn
= DOIO_SOFT
;
1175 case WSAECONNABORTED
:
1177 *isc_errno
= ISC_R_CONNECTIONRESET
;
1178 if (sock
->connected
)
1179 doreturn
= DOIO_HARD
;
1181 doreturn
= DOIO_SOFT
;
1184 *isc_errno
= ISC_R_NOTCONNECTED
;
1185 if (sock
->connected
)
1186 doreturn
= DOIO_HARD
;
1188 doreturn
= DOIO_SOFT
;
1190 case ERROR_OPERATION_ABORTED
:
1191 case ERROR_CONNECTION_ABORTED
:
1192 case ERROR_REQUEST_ABORTED
:
1193 *isc_errno
= ISC_R_CONNECTIONRESET
;
1194 doreturn
= DOIO_HARD
;
1197 *isc_errno
= ISC_R_NORESOURCES
;
1198 doreturn
= DOIO_HARD
;
1200 case WSAEAFNOSUPPORT
:
1201 *isc_errno
= ISC_R_FAMILYNOSUPPORT
;
1202 doreturn
= DOIO_HARD
;
1204 case WSAEADDRNOTAVAIL
:
1205 *isc_errno
= ISC_R_ADDRNOTAVAIL
;
1206 doreturn
= DOIO_HARD
;
1208 case WSAEDESTADDRREQ
:
1209 *isc_errno
= ISC_R_BADADDRESSFORM
;
1210 doreturn
= DOIO_HARD
;
1212 case ERROR_NETNAME_DELETED
:
1213 *isc_errno
= ISC_R_NETDOWN
;
1214 doreturn
= DOIO_HARD
;
1217 *isc_errno
= ISC_R_IOERROR
;
1218 doreturn
= DOIO_HARD
;
1221 if (doreturn
== DOIO_HARD
) {
1222 isc__strerror(windows_errno
, errorstring
, bufsize
);
1228 fill_recv(isc_socket_t
*sock
, isc_socketevent_t
*dev
) {
1231 isc_buffer_t
*buffer
;
1233 INSIST(dev
->n
< dev
->minimum
);
1234 INSIST(sock
->recvbuf
.remaining
> 0);
1235 INSIST(sock
->pending_recv
== 0);
1237 if (sock
->type
== isc_sockettype_udp
) {
1238 dev
->address
.length
= sock
->recvbuf
.from_addr_len
;
1239 memmove(&dev
->address
.type
, &sock
->recvbuf
.from_addr
,
1240 sock
->recvbuf
.from_addr_len
);
1241 if (isc_sockaddr_getport(&dev
->address
) == 0) {
1242 if (isc_log_wouldlog(isc_lctx
, IOEVENT_LEVEL
)) {
1243 socket_log(__LINE__
, sock
, &dev
->address
, IOEVENT
,
1244 isc_msgcat
, ISC_MSGSET_SOCKET
,
1246 "dropping source port zero packet");
1248 sock
->recvbuf
.remaining
= 0;
1251 } else if (sock
->type
== isc_sockettype_tcp
) {
1252 dev
->address
= sock
->address
;
1256 * Run through the list of buffers we were given, and find the
1257 * first one with space. Once it is found, loop through, filling
1258 * the buffers as much as possible.
1260 buffer
= ISC_LIST_HEAD(dev
->bufferlist
);
1261 if (buffer
!= NULL
) { // Multi-buffer receive
1262 while (buffer
!= NULL
&& sock
->recvbuf
.remaining
> 0) {
1263 REQUIRE(ISC_BUFFER_VALID(buffer
));
1264 if (isc_buffer_availablelength(buffer
) > 0) {
1265 isc_buffer_availableregion(buffer
, &r
);
1266 copylen
= min(r
.length
,
1267 sock
->recvbuf
.remaining
);
1268 memmove(r
.base
, sock
->recvbuf
.consume_position
,
1270 sock
->recvbuf
.consume_position
+= copylen
;
1271 sock
->recvbuf
.remaining
-= copylen
;
1272 isc_buffer_add(buffer
, copylen
);
1275 buffer
= ISC_LIST_NEXT(buffer
, link
);
1277 } else { // Single-buffer receive
1278 copylen
= min(dev
->region
.length
- dev
->n
, sock
->recvbuf
.remaining
);
1279 memmove(dev
->region
.base
+ dev
->n
,
1280 sock
->recvbuf
.consume_position
, copylen
);
1281 sock
->recvbuf
.consume_position
+= copylen
;
1282 sock
->recvbuf
.remaining
-= copylen
;
1287 * UDP receives are all-consuming. That is, if we have 4k worth of
1288 * data in our receive buffer, and the caller only gave us
1289 * 1k of space, we will toss the remaining 3k of data. TCP
1290 * will keep the extra data around and use it for later requests.
1292 if (sock
->type
== isc_sockettype_udp
)
1293 sock
->recvbuf
.remaining
= 0;
1297 * Copy out as much data from the internal buffer to done events.
1298 * As each done event is filled, send it along its way.
1301 completeio_recv(isc_socket_t
*sock
)
1303 isc_socketevent_t
*dev
;
1306 * If we are in the process of filling our buffer, we cannot
1307 * touch it yet, so don't.
1309 if (sock
->pending_recv
> 0)
1312 while (sock
->recvbuf
.remaining
> 0 && !ISC_LIST_EMPTY(sock
->recv_list
)) {
1313 dev
= ISC_LIST_HEAD(sock
->recv_list
);
1316 * See if we have sufficient data in our receive buffer
1317 * to handle this. If we do, copy out the data.
1319 fill_recv(sock
, dev
);
1322 * Did we satisfy it?
1324 if (dev
->n
>= dev
->minimum
) {
1325 dev
->result
= ISC_R_SUCCESS
;
1326 send_recvdone_event(sock
, &dev
);
1333 * DOIO_SUCCESS The operation succeeded. dev->result contains
1336 * DOIO_HARD A hard or unexpected I/O error was encountered.
1337 * dev->result contains the appropriate error.
1339 * DOIO_SOFT A soft I/O error was encountered. No senddone
1340 * event was sent. The operation should be retried.
1342 * No other return values are possible.
1345 completeio_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
,
1346 struct msghdr
*messagehdr
, int cc
, int send_errno
)
1348 char addrbuf
[ISC_SOCKADDR_FORMATSIZE
];
1349 char strbuf
[ISC_STRERRORSIZE
];
1351 if (send_errno
!= 0) {
1352 if (SOFT_ERROR(send_errno
))
1355 return (map_socket_error(sock
, send_errno
, &dev
->result
,
1356 strbuf
, sizeof(strbuf
)));
1359 * The other error types depend on whether or not the
1360 * socket is UDP or TCP. If it is UDP, some errors
1361 * that we expect to be fatal under TCP are merely
1362 * annoying, and are really soft errors.
1364 * However, these soft errors are still returned as
1367 isc_sockaddr_format(&dev
->address
, addrbuf
, sizeof(addrbuf
));
1368 isc__strerror(send_errno
, strbuf
, sizeof(strbuf
));
1369 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "completeio_send: %s: %s",
1371 dev
->result
= isc__errno2result(send_errno
);
1376 * If we write less than we expected, update counters, poke.
1379 if (cc
!= messagehdr
->msg_totallen
)
1383 * Exactly what we wanted to write. We're done with this
1384 * entry. Post its completion event.
1386 dev
->result
= ISC_R_SUCCESS
;
1387 return (DOIO_SUCCESS
);
1391 startio_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
, int *nbytes
,
1395 char strbuf
[ISC_STRERRORSIZE
];
1396 IoCompletionInfo
*lpo
;
1398 struct msghdr
*msghdr
;
1400 lpo
= (IoCompletionInfo
*)HeapAlloc(hHeapHandle
,
1402 sizeof(IoCompletionInfo
));
1403 RUNTIME_CHECK(lpo
!= NULL
);
1404 lpo
->request_type
= SOCKET_SEND
;
1406 msghdr
= &lpo
->messagehdr
;
1407 memset(msghdr
, 0, sizeof(struct msghdr
));
1408 ISC_LIST_INIT(lpo
->bufferlist
);
1410 build_msghdr_send(sock
, dev
, msghdr
, cmsg
, sock
->iov
, lpo
);
1412 *nbytes
= internal_sendmsg(sock
, lpo
, msghdr
, 0, send_errno
);
1416 * I/O has been initiated
1417 * completion will be through the completion port
1419 if (PENDING_ERROR(*send_errno
)) {
1420 status
= DOIO_PENDING
;
1424 if (SOFT_ERROR(*send_errno
)) {
1430 * If we got this far then something is wrong
1432 if (isc_log_wouldlog(isc_lctx
, IOEVENT_LEVEL
)) {
1433 isc__strerror(*send_errno
, strbuf
, sizeof(strbuf
));
1434 socket_log(__LINE__
, sock
, NULL
, IOEVENT
,
1435 isc_msgcat
, ISC_MSGSET_SOCKET
,
1436 ISC_MSG_INTERNALSEND
,
1437 "startio_send: internal_sendmsg(%d) %d "
1439 sock
->fd
, *nbytes
, *send_errno
, strbuf
);
1444 dev
->result
= ISC_R_SUCCESS
;
1447 _set_state(sock
, SOCK_DATA
);
1452 use_min_mtu(isc_socket_t
*sock
) {
1453 #ifdef IPV6_USE_MIN_MTU
1454 /* use minimum MTU */
1455 if (sock
->pf
== AF_INET6
) {
1457 (void)setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_USE_MIN_MTU
,
1458 (void *)&on
, sizeof(on
));
1466 allocate_socket(isc_socketmgr_t
*manager
, isc_sockettype_t type
,
1467 isc_socket_t
**socketp
) {
1469 isc_result_t result
;
1471 sock
= isc_mem_get(manager
->mctx
, sizeof(*sock
));
1474 return (ISC_R_NOMEMORY
);
1477 sock
->references
= 0;
1479 sock
->manager
= manager
;
1481 sock
->fd
= INVALID_SOCKET
;
1483 ISC_LINK_INIT(sock
, link
);
1486 * Set up list of readers and writers to be initially empty.
1488 ISC_LIST_INIT(sock
->recv_list
);
1489 ISC_LIST_INIT(sock
->send_list
);
1490 ISC_LIST_INIT(sock
->accept_list
);
1491 sock
->connect_ev
= NULL
;
1492 sock
->pending_accept
= 0;
1493 sock
->pending_recv
= 0;
1494 sock
->pending_send
= 0;
1495 sock
->pending_iocp
= 0;
1497 sock
->connected
= 0;
1498 sock
->pending_connect
= 0;
1501 memset(sock
->name
, 0, sizeof(sock
->name
)); // zero the name field
1502 _set_state(sock
, SOCK_INITIALIZED
);
1504 sock
->recvbuf
.len
= 65536;
1505 sock
->recvbuf
.consume_position
= sock
->recvbuf
.base
;
1506 sock
->recvbuf
.remaining
= 0;
1507 sock
->recvbuf
.base
= isc_mem_get(manager
->mctx
, sock
->recvbuf
.len
); // max buffer size
1508 if (sock
->recvbuf
.base
== NULL
) {
1509 result
= ISC_R_NOMEMORY
;
1514 * Initialize the lock.
1516 result
= isc_mutex_init(&sock
->lock
);
1517 if (result
!= ISC_R_SUCCESS
)
1520 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
1523 sock
->magic
= SOCKET_MAGIC
;
1526 return (ISC_R_SUCCESS
);
1529 if (sock
->recvbuf
.base
!= NULL
)
1530 isc_mem_put(manager
->mctx
, sock
->recvbuf
.base
, sock
->recvbuf
.len
);
1531 isc_mem_put(manager
->mctx
, sock
, sizeof(*sock
));
1537 * Verify that the socket state is consistent.
1540 consistent(isc_socket_t
*sock
) {
1542 isc_socketevent_t
*dev
;
1543 isc_socket_newconnev_t
*nev
;
1546 isc_boolean_t crash
= ISC_FALSE
;
1548 REQUIRE(sock
->pending_iocp
== sock
->pending_recv
+ sock
->pending_send
1549 + sock
->pending_accept
+ sock
->pending_connect
);
1551 dev
= ISC_LIST_HEAD(sock
->send_list
);
1553 while (dev
!= NULL
) {
1555 dev
= ISC_LIST_NEXT(dev
, ev_link
);
1557 if (count
> sock
->pending_send
) {
1559 crash_reason
= "send_list > sock->pending_send";
1562 nev
= ISC_LIST_HEAD(sock
->accept_list
);
1564 while (nev
!= NULL
) {
1566 nev
= ISC_LIST_NEXT(nev
, ev_link
);
1568 if (count
> sock
->pending_accept
) {
1570 crash_reason
= "send_list > sock->pending_send";
1574 socket_log(__LINE__
, sock
, NULL
, CREATION
, isc_msgcat
, ISC_MSGSET_SOCKET
,
1575 ISC_MSG_DESTROYING
, "SOCKET INCONSISTENT: %s",
1578 INSIST(crash
== ISC_FALSE
);
1583 * Maybe free the socket.
1585 * This function will verify tht the socket is no longer in use in any way,
1586 * either internally or externally. This is the only place where this
1587 * check is to be made; if some bit of code believes that IT is done with
1588 * the socket (e.g., some reference counter reaches zero), it should call
1591 * When calling this function, the socket must be locked, and the manager
1594 * When this function returns, *socketp will be NULL. No tricks to try
1595 * to hold on to this pointer are allowed.
1598 maybe_free_socket(isc_socket_t
**socketp
, int lineno
) {
1599 isc_socket_t
*sock
= *socketp
;
1602 INSIST(VALID_SOCKET(sock
));
1605 if (sock
->pending_iocp
> 0
1606 || sock
->pending_recv
> 0
1607 || sock
->pending_send
> 0
1608 || sock
->pending_accept
> 0
1609 || sock
->references
> 0
1610 || sock
->pending_connect
== 1
1611 || !ISC_LIST_EMPTY(sock
->recv_list
)
1612 || !ISC_LIST_EMPTY(sock
->send_list
)
1613 || !ISC_LIST_EMPTY(sock
->accept_list
)
1614 || sock
->fd
!= INVALID_SOCKET
) {
1615 UNLOCK(&sock
->lock
);
1618 UNLOCK(&sock
->lock
);
1620 free_socket(&sock
, lineno
);
1624 free_socket(isc_socket_t
**sockp
, int lineno
) {
1625 isc_socketmgr_t
*manager
;
1626 isc_socket_t
*sock
= *sockp
;
1630 * Seems we can free the socket after all.
1632 manager
= sock
->manager
;
1633 socket_log(__LINE__
, sock
, NULL
, CREATION
, isc_msgcat
,
1634 ISC_MSGSET_SOCKET
, ISC_MSG_DESTROYING
,
1635 "freeing socket line %d fd %d lock %p semaphore %p",
1636 lineno
, sock
->fd
, &sock
->lock
, sock
->lock
.LockSemaphore
);
1639 DESTROYLOCK(&sock
->lock
);
1641 if (sock
->recvbuf
.base
!= NULL
)
1642 isc_mem_put(manager
->mctx
, sock
->recvbuf
.base
,
1645 LOCK(&manager
->lock
);
1646 if (ISC_LINK_LINKED(sock
, link
))
1647 ISC_LIST_UNLINK(manager
->socklist
, sock
, link
);
1648 isc_mem_put(manager
->mctx
, sock
, sizeof(*sock
));
1650 if (ISC_LIST_EMPTY(manager
->socklist
))
1651 SIGNAL(&manager
->shutdown_ok
);
1652 UNLOCK(&manager
->lock
);
1656 * Create a new 'type' socket managed by 'manager'. Events
1657 * will be posted to 'task' and when dispatched 'action' will be
1658 * called with 'arg' as the arg value. The new socket is returned
1662 socket_create(isc_socketmgr_t
*manager
, int pf
, isc_sockettype_t type
,
1663 isc_socket_t
**socketp
, isc_socket_t
*dup_socket
)
1665 isc_socket_t
*sock
= NULL
;
1666 isc_result_t result
;
1667 #if defined(USE_CMSG)
1670 #if defined(SO_RCVBUF)
1671 ISC_SOCKADDR_LEN_T optlen
;
1675 char strbuf
[ISC_STRERRORSIZE
];
1677 REQUIRE(VALID_MANAGER(manager
));
1678 REQUIRE(socketp
!= NULL
&& *socketp
== NULL
);
1679 REQUIRE(type
!= isc_sockettype_fdwatch
);
1682 if (type
== isc_sockettype_raw
)
1683 return (ISC_R_NOTIMPLEMENTED
);
1686 result
= allocate_socket(manager
, type
, &sock
);
1687 if (result
!= ISC_R_SUCCESS
)
1692 case isc_sockettype_udp
:
1693 sock
->fd
= socket(pf
, SOCK_DGRAM
, IPPROTO_UDP
);
1694 if (sock
->fd
!= INVALID_SOCKET
) {
1695 result
= connection_reset_fix(sock
->fd
);
1696 if (result
!= ISC_R_SUCCESS
) {
1697 socket_log(__LINE__
, sock
,
1698 NULL
, EVENT
, NULL
, 0, 0,
1700 "con_reset_fix_failed",
1704 closesocket(sock
->fd
);
1705 _set_state(sock
, SOCK_CLOSED
);
1706 sock
->fd
= INVALID_SOCKET
;
1707 free_socket(&sock
, __LINE__
);
1712 case isc_sockettype_tcp
:
1713 sock
->fd
= socket(pf
, SOCK_STREAM
, IPPROTO_TCP
);
1716 case isc_sockettype_raw
:
1717 sock
->fd
= socket(pf
, SOCK_RAW
, 0);
1726 if (sock
->fd
== INVALID_SOCKET
) {
1727 socket_errno
= WSAGetLastError();
1728 free_socket(&sock
, __LINE__
);
1730 switch (socket_errno
) {
1733 return (ISC_R_NORESOURCES
);
1735 case WSAEPROTONOSUPPORT
:
1736 case WSAEPFNOSUPPORT
:
1737 case WSAEAFNOSUPPORT
:
1738 return (ISC_R_FAMILYNOSUPPORT
);
1741 isc__strerror(socket_errno
, strbuf
, sizeof(strbuf
));
1742 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1744 isc_msgcat_get(isc_msgcat
,
1749 return (ISC_R_UNEXPECTED
);
1753 result
= make_nonblock(sock
->fd
);
1754 if (result
!= ISC_R_SUCCESS
) {
1755 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
1756 "closed %d %d %d make_nonblock_failed",
1757 sock
->pending_recv
, sock
->pending_send
,
1759 closesocket(sock
->fd
);
1760 sock
->fd
= INVALID_SOCKET
;
1761 free_socket(&sock
, __LINE__
);
1766 * Use minimum mtu if possible.
1770 #if defined(USE_CMSG) || defined(SO_RCVBUF)
1771 if (type
== isc_sockettype_udp
) {
1773 #if defined(USE_CMSG)
1774 #if defined(ISC_PLATFORM_HAVEIPV6)
1775 #ifdef IPV6_RECVPKTINFO
1777 if ((pf
== AF_INET6
)
1778 && (setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
,
1779 (char *)&on
, sizeof(on
)) < 0)) {
1780 isc__strerror(WSAGetLastError(), strbuf
, sizeof(strbuf
));
1781 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1782 "setsockopt(%d, IPV6_RECVPKTINFO) "
1784 isc_msgcat_get(isc_msgcat
,
1792 if ((pf
== AF_INET6
)
1793 && (setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_PKTINFO
,
1794 (char *)&on
, sizeof(on
)) < 0)) {
1795 isc__strerror(WSAGetLastError(), strbuf
, sizeof(strbuf
));
1796 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1797 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1799 isc_msgcat_get(isc_msgcat
,
1805 #endif /* IPV6_RECVPKTINFO */
1806 #endif /* ISC_PLATFORM_HAVEIPV6 */
1807 #endif /* defined(USE_CMSG) */
1809 #if defined(SO_RCVBUF)
1810 optlen
= sizeof(size
);
1811 if (getsockopt(sock
->fd
, SOL_SOCKET
, SO_RCVBUF
,
1812 (char *)&size
, &optlen
) >= 0 &&
1813 size
< RCVBUFSIZE
) {
1815 (void)setsockopt(sock
->fd
, SOL_SOCKET
, SO_RCVBUF
,
1816 (char *)&size
, sizeof(size
));
1821 #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */
1823 _set_state(sock
, SOCK_OPEN
);
1824 sock
->references
= 1;
1827 iocompletionport_update(sock
);
1830 #ifndef ISC_ALLOW_MAPPED
1831 isc__socket_ipv6only(sock
, ISC_TRUE
);
1834 if (dup_socket
->bound
) {
1835 isc_sockaddr_t local
;
1837 result
= isc__socket_getsockname(dup_socket
, &local
);
1838 if (result
!= ISC_R_SUCCESS
) {
1839 isc_socket_close(sock
);
1842 result
= isc__socket_bind(sock
, &local
,
1843 ISC_SOCKET_REUSEADDRESS
);
1844 if (result
!= ISC_R_SUCCESS
) {
1845 isc_socket_close(sock
);
1853 * Note we don't have to lock the socket like we normally would because
1854 * there are no external references to it yet.
1856 LOCK(&manager
->lock
);
1857 ISC_LIST_APPEND(manager
->socklist
, sock
, link
);
1858 InterlockedIncrement(&manager
->totalSockets
);
1859 UNLOCK(&manager
->lock
);
1861 socket_log(__LINE__
, sock
, NULL
, CREATION
, isc_msgcat
,
1862 ISC_MSGSET_SOCKET
, ISC_MSG_CREATED
,
1863 "created %u type %u", sock
->fd
, type
);
1865 return (ISC_R_SUCCESS
);
1869 isc__socket_create(isc_socketmgr_t
*manager
, int pf
, isc_sockettype_t type
,
1870 isc_socket_t
**socketp
)
1872 return (socket_create(manager
, pf
, type
, socketp
, NULL
));
1876 isc__socket_dup(isc_socket_t
*sock
, isc_socket_t
**socketp
) {
1877 REQUIRE(VALID_SOCKET(sock
));
1878 REQUIRE(socketp
!= NULL
&& *socketp
== NULL
);
1880 return (socket_create(sock
->manager
, sock
->pf
, sock
->type
,
1885 isc_socket_open(isc_socket_t
*sock
) {
1886 REQUIRE(VALID_SOCKET(sock
));
1887 REQUIRE(sock
->type
!= isc_sockettype_fdwatch
);
1889 return (ISC_R_NOTIMPLEMENTED
);
1893 * Attach to a socket. Caller must explicitly detach when it is done.
1896 isc__socket_attach(isc_socket_t
*sock
, isc_socket_t
**socketp
) {
1897 REQUIRE(VALID_SOCKET(sock
));
1898 REQUIRE(socketp
!= NULL
&& *socketp
== NULL
);
1903 UNLOCK(&sock
->lock
);
1909 * Dereference a socket. If this is the last reference to it, clean things
1910 * up by destroying the socket.
1913 isc__socket_detach(isc_socket_t
**socketp
) {
1916 REQUIRE(socketp
!= NULL
);
1918 REQUIRE(VALID_SOCKET(sock
));
1919 REQUIRE(sock
->type
!= isc_sockettype_fdwatch
);
1923 REQUIRE(sock
->references
> 0);
1926 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
1927 "detach_socket %d %d %d",
1928 sock
->pending_recv
, sock
->pending_send
,
1931 if (sock
->references
== 0 && sock
->fd
!= INVALID_SOCKET
) {
1932 closesocket(sock
->fd
);
1933 sock
->fd
= INVALID_SOCKET
;
1934 _set_state(sock
, SOCK_CLOSED
);
1937 maybe_free_socket(&sock
, __LINE__
);
1943 isc_socket_close(isc_socket_t
*sock
) {
1944 REQUIRE(VALID_SOCKET(sock
));
1945 REQUIRE(sock
->type
!= isc_sockettype_fdwatch
);
1947 return (ISC_R_NOTIMPLEMENTED
);
1951 * Dequeue an item off the given socket's read queue, set the result code
1952 * in the done event to the one provided, and send it to the task it was
1955 * If the event to be sent is on a list, remove it before sending. If
1956 * asked to, send and detach from the task as well.
1958 * Caller must have the socket locked if the event is attached to the socket.
1961 send_recvdone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
) {
1964 task
= (*dev
)->ev_sender
;
1965 (*dev
)->ev_sender
= sock
;
1967 if (ISC_LINK_LINKED(*dev
, ev_link
))
1968 ISC_LIST_DEQUEUE(sock
->recv_list
, *dev
, ev_link
);
1970 if (((*dev
)->attributes
& ISC_SOCKEVENTATTR_ATTACHED
)
1971 == ISC_SOCKEVENTATTR_ATTACHED
)
1972 isc_task_sendanddetach(&task
, (isc_event_t
**)dev
);
1974 isc_task_send(task
, (isc_event_t
**)dev
);
1980 * See comments for send_recvdone_event() above.
1983 send_senddone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
) {
1986 INSIST(dev
!= NULL
&& *dev
!= NULL
);
1988 task
= (*dev
)->ev_sender
;
1989 (*dev
)->ev_sender
= sock
;
1991 if (ISC_LINK_LINKED(*dev
, ev_link
))
1992 ISC_LIST_DEQUEUE(sock
->send_list
, *dev
, ev_link
);
1994 if (((*dev
)->attributes
& ISC_SOCKEVENTATTR_ATTACHED
)
1995 == ISC_SOCKEVENTATTR_ATTACHED
)
1996 isc_task_sendanddetach(&task
, (isc_event_t
**)dev
);
1998 isc_task_send(task
, (isc_event_t
**)dev
);
2004 * See comments for send_recvdone_event() above.
2007 send_acceptdone_event(isc_socket_t
*sock
, isc_socket_newconnev_t
**adev
) {
2010 INSIST(adev
!= NULL
&& *adev
!= NULL
);
2012 task
= (*adev
)->ev_sender
;
2013 (*adev
)->ev_sender
= sock
;
2015 if (ISC_LINK_LINKED(*adev
, ev_link
))
2016 ISC_LIST_DEQUEUE(sock
->accept_list
, *adev
, ev_link
);
2018 isc_task_sendanddetach(&task
, (isc_event_t
**)adev
);
2024 * See comments for send_recvdone_event() above.
2027 send_connectdone_event(isc_socket_t
*sock
, isc_socket_connev_t
**cdev
) {
2030 INSIST(cdev
!= NULL
&& *cdev
!= NULL
);
2032 task
= (*cdev
)->ev_sender
;
2033 (*cdev
)->ev_sender
= sock
;
2035 sock
->connect_ev
= NULL
;
2037 isc_task_sendanddetach(&task
, (isc_event_t
**)cdev
);
2043 * On entry to this function, the event delivered is the internal
2044 * readable event, and the first item on the accept_list should be
2045 * the done event we want to send. If the list is empty, this is a no-op,
2046 * so just close the new connection, unlock, and return.
2048 * Note the socket is locked before entering here
2051 internal_accept(isc_socket_t
*sock
, IoCompletionInfo
*lpo
, int accept_errno
) {
2052 isc_socket_newconnev_t
*adev
;
2053 isc_result_t result
= ISC_R_SUCCESS
;
2054 isc_socket_t
*nsock
;
2055 struct sockaddr
*localaddr
;
2056 int localaddr_len
= sizeof(*localaddr
);
2057 struct sockaddr
*remoteaddr
;
2058 int remoteaddr_len
= sizeof(*remoteaddr
);
2060 INSIST(VALID_SOCKET(sock
));
2064 socket_log(__LINE__
, sock
, NULL
, TRACE
,
2065 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
2066 "internal_accept called");
2068 INSIST(sock
->listener
);
2070 INSIST(sock
->pending_iocp
> 0);
2071 sock
->pending_iocp
--;
2072 INSIST(sock
->pending_accept
> 0);
2073 sock
->pending_accept
--;
2078 * If the event is no longer in the list we can just return.
2080 if (!acceptdone_is_active(sock
, adev
))
2083 nsock
= adev
->newsocket
;
2086 * Pull off the done event.
2088 ISC_LIST_UNLINK(sock
->accept_list
, adev
, ev_link
);
2091 * Extract the addresses from the socket, copy them into the structure,
2092 * and return the new socket.
2094 ISCGetAcceptExSockaddrs(lpo
->acceptbuffer
, 0,
2095 sizeof(SOCKADDR_STORAGE
) + 16, sizeof(SOCKADDR_STORAGE
) + 16,
2096 (LPSOCKADDR
*)&localaddr
, &localaddr_len
,
2097 (LPSOCKADDR
*)&remoteaddr
, &remoteaddr_len
);
2098 memmove(&adev
->address
.type
, remoteaddr
, remoteaddr_len
);
2099 adev
->address
.length
= remoteaddr_len
;
2100 nsock
->address
= adev
->address
;
2101 nsock
->pf
= adev
->address
.type
.sa
.sa_family
;
2103 socket_log(__LINE__
, nsock
, &nsock
->address
, TRACE
,
2104 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
2105 "internal_accept parent %p", sock
);
2107 result
= make_nonblock(adev
->newsocket
->fd
);
2108 INSIST(result
== ISC_R_SUCCESS
);
2111 * Use minimum mtu if possible.
2113 use_min_mtu(adev
->newsocket
);
2115 INSIST(setsockopt(nsock
->fd
, SOL_SOCKET
, SO_UPDATE_ACCEPT_CONTEXT
,
2116 (char *)&sock
->fd
, sizeof(sock
->fd
)) == 0);
2119 * Hook it up into the manager.
2122 nsock
->connected
= 1;
2123 _set_state(nsock
, SOCK_OPEN
);
2125 LOCK(&nsock
->manager
->lock
);
2126 ISC_LIST_APPEND(nsock
->manager
->socklist
, nsock
, link
);
2127 InterlockedIncrement(&nsock
->manager
->totalSockets
);
2128 UNLOCK(&nsock
->manager
->lock
);
2130 socket_log(__LINE__
, sock
, &nsock
->address
, CREATION
,
2131 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTEDCXN
,
2132 "accepted_connection new_socket %p fd %d",
2135 adev
->result
= result
;
2136 send_acceptdone_event(sock
, &adev
);
2140 UNLOCK(&sock
->lock
);
2142 HeapFree(hHeapHandle
, 0, lpo
->acceptbuffer
);
2143 lpo
->acceptbuffer
= NULL
;
2147 * Called when a socket with a pending connect() finishes.
2148 * Note that the socket is locked before entering.
2151 internal_connect(isc_socket_t
*sock
, IoCompletionInfo
*lpo
, int connect_errno
) {
2152 isc_socket_connev_t
*cdev
;
2153 char strbuf
[ISC_STRERRORSIZE
];
2155 INSIST(VALID_SOCKET(sock
));
2159 INSIST(sock
->pending_iocp
> 0);
2160 sock
->pending_iocp
--;
2161 INSIST(sock
->pending_connect
== 1);
2162 sock
->pending_connect
= 0;
2165 * Has this event been canceled?
2168 if (!connectdone_is_active(sock
, cdev
)) {
2169 sock
->pending_connect
= 0;
2170 if (sock
->fd
!= INVALID_SOCKET
) {
2171 closesocket(sock
->fd
);
2172 sock
->fd
= INVALID_SOCKET
;
2173 _set_state(sock
, SOCK_CLOSED
);
2176 UNLOCK(&sock
->lock
);
2181 * Check possible Windows network event error status here.
2183 if (connect_errno
!= 0) {
2185 * If the error is SOFT, just try again on this
2186 * fd and pretend nothing strange happened.
2188 if (SOFT_ERROR(connect_errno
) ||
2189 connect_errno
== WSAEINPROGRESS
) {
2190 sock
->pending_connect
= 1;
2192 UNLOCK(&sock
->lock
);
2197 * Translate other errors into ISC_R_* flavors.
2199 switch (connect_errno
) {
2200 #define ERROR_MATCH(a, b) case a: cdev->result = b; break;
2201 ERROR_MATCH(WSAEACCES
, ISC_R_NOPERM
);
2202 ERROR_MATCH(WSAEADDRNOTAVAIL
, ISC_R_ADDRNOTAVAIL
);
2203 ERROR_MATCH(WSAEAFNOSUPPORT
, ISC_R_ADDRNOTAVAIL
);
2204 ERROR_MATCH(WSAECONNREFUSED
, ISC_R_CONNREFUSED
);
2205 ERROR_MATCH(WSAEHOSTUNREACH
, ISC_R_HOSTUNREACH
);
2206 ERROR_MATCH(WSAEHOSTDOWN
, ISC_R_HOSTDOWN
);
2207 ERROR_MATCH(WSAENETUNREACH
, ISC_R_NETUNREACH
);
2208 ERROR_MATCH(WSAENETDOWN
, ISC_R_NETDOWN
);
2209 ERROR_MATCH(WSAENOBUFS
, ISC_R_NORESOURCES
);
2210 ERROR_MATCH(WSAECONNRESET
, ISC_R_CONNECTIONRESET
);
2211 ERROR_MATCH(WSAECONNABORTED
, ISC_R_CONNECTIONRESET
);
2212 ERROR_MATCH(WSAETIMEDOUT
, ISC_R_TIMEDOUT
);
2215 cdev
->result
= ISC_R_UNEXPECTED
;
2216 isc__strerror(connect_errno
, strbuf
, sizeof(strbuf
));
2217 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2218 "internal_connect: connect() %s",
2222 INSIST(setsockopt(sock
->fd
, SOL_SOCKET
,
2223 SO_UPDATE_CONNECT_CONTEXT
, NULL
, 0) == 0);
2224 cdev
->result
= ISC_R_SUCCESS
;
2225 sock
->connected
= 1;
2226 socket_log(__LINE__
, sock
, &sock
->address
, IOEVENT
,
2227 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTEDCXN
,
2228 "internal_connect: success");
2231 send_connectdone_event(sock
, &cdev
);
2233 UNLOCK(&sock
->lock
);
2237 * Loop through the socket, returning ISC_R_EOF for each done event pending.
2240 send_recvdone_abort(isc_socket_t
*sock
, isc_result_t result
) {
2241 isc_socketevent_t
*dev
;
2243 while (!ISC_LIST_EMPTY(sock
->recv_list
)) {
2244 dev
= ISC_LIST_HEAD(sock
->recv_list
);
2245 dev
->result
= result
;
2246 send_recvdone_event(sock
, &dev
);
2251 * Take the data we received in our private buffer, and if any recv() calls on
2252 * our list are satisfied, send the corresponding done event.
2254 * If we need more data (there are still items on the recv_list after we consume all
2255 * our data) then arrange for another system recv() call to fill our buffers.
2258 internal_recv(isc_socket_t
*sock
, int nbytes
)
2260 INSIST(VALID_SOCKET(sock
));
2265 socket_log(__LINE__
, sock
, NULL
, IOEVENT
,
2266 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_INTERNALRECV
,
2267 "internal_recv: %d bytes received", nbytes
);
2270 * If we got here, the I/O operation succeeded. However, we might still have removed this
2271 * event from our notification list (or never placed it on it due to immediate completion.)
2272 * Handle the reference counting here, and handle the cancellation event just after.
2274 INSIST(sock
->pending_iocp
> 0);
2275 sock
->pending_iocp
--;
2276 INSIST(sock
->pending_recv
> 0);
2277 sock
->pending_recv
--;
2280 * The only way we could have gotten here is that our I/O has successfully completed.
2281 * Update our pointers, and move on. The only odd case here is that we might not
2282 * have received enough data on a TCP stream to satisfy the minimum requirements. If
2283 * this is the case, we will re-issue the recv() call for what we need.
2285 * We do check for a recv() of 0 bytes on a TCP stream. This means the remote end
2288 if (nbytes
== 0 && sock
->type
== isc_sockettype_tcp
) {
2289 send_recvdone_abort(sock
, ISC_R_EOF
);
2290 maybe_free_socket(&sock
, __LINE__
);
2293 sock
->recvbuf
.remaining
= nbytes
;
2294 sock
->recvbuf
.consume_position
= sock
->recvbuf
.base
;
2295 completeio_recv(sock
);
2298 * If there are more receivers waiting for data, queue another receive
2301 queue_receive_request(sock
);
2304 * Unlock and/or destroy if we are the last thing this socket has left to do.
2306 maybe_free_socket(&sock
, __LINE__
);
2310 internal_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
,
2311 struct msghdr
*messagehdr
, int nbytes
, int send_errno
, IoCompletionInfo
*lpo
)
2316 * Find out what socket this is and lock it.
2318 INSIST(VALID_SOCKET(sock
));
2323 socket_log(__LINE__
, sock
, NULL
, IOEVENT
,
2324 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_INTERNALSEND
,
2325 "internal_send: task got socket event %p", dev
);
2327 buffer
= ISC_LIST_HEAD(lpo
->bufferlist
);
2328 while (buffer
!= NULL
) {
2329 ISC_LIST_DEQUEUE(lpo
->bufferlist
, buffer
, link
);
2331 socket_log(__LINE__
, sock
, NULL
, TRACE
,
2332 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
2333 "free_buffer %p %p", buffer
, buffer
->buf
);
2335 HeapFree(hHeapHandle
, 0, buffer
->buf
);
2336 HeapFree(hHeapHandle
, 0, buffer
);
2337 buffer
= ISC_LIST_HEAD(lpo
->bufferlist
);
2340 INSIST(sock
->pending_iocp
> 0);
2341 sock
->pending_iocp
--;
2342 INSIST(sock
->pending_send
> 0);
2343 sock
->pending_send
--;
2345 /* If the event is no longer in the list we can just return */
2346 if (!senddone_is_active(sock
, dev
))
2350 * Set the error code and send things on its way.
2352 switch (completeio_send(sock
, dev
, messagehdr
, nbytes
, send_errno
)) {
2357 send_senddone_event(sock
, &dev
);
2362 maybe_free_socket(&sock
, __LINE__
);
2366 * These return if the done event passed in is on the list (or for connect, is
2367 * the one we're waiting for. Using these ensures we will not double-send an
2370 static isc_boolean_t
2371 senddone_is_active(isc_socket_t
*sock
, isc_socketevent_t
*dev
)
2373 isc_socketevent_t
*ldev
;
2375 ldev
= ISC_LIST_HEAD(sock
->send_list
);
2376 while (ldev
!= NULL
&& ldev
!= dev
)
2377 ldev
= ISC_LIST_NEXT(ldev
, ev_link
);
2379 return (ldev
== NULL
? ISC_FALSE
: ISC_TRUE
);
2382 static isc_boolean_t
2383 acceptdone_is_active(isc_socket_t
*sock
, isc_socket_newconnev_t
*dev
)
2385 isc_socket_newconnev_t
*ldev
;
2387 ldev
= ISC_LIST_HEAD(sock
->accept_list
);
2388 while (ldev
!= NULL
&& ldev
!= dev
)
2389 ldev
= ISC_LIST_NEXT(ldev
, ev_link
);
2391 return (ldev
== NULL
? ISC_FALSE
: ISC_TRUE
);
2394 static isc_boolean_t
2395 connectdone_is_active(isc_socket_t
*sock
, isc_socket_connev_t
*dev
)
2397 return (sock
->connect_ev
== dev
? ISC_TRUE
: ISC_FALSE
);
2401 // The Windows network stack seems to have two very distinct paths depending
2402 // on what is installed. Specifically, if something is looking at network
2403 // connections (like an anti-virus or anti-malware application, such as
2404 // McAfee products) Windows may return additional error conditions which
2405 // were not previously returned.
2407 // One specific one is when a TCP SYN scan is used. In this situation,
2408 // Windows responds with the SYN-ACK, but the scanner never responds with
2409 // the 3rd packet, the ACK. Windows consiers this a partially open connection.
2410 // Most Unix networking stacks, and Windows without McAfee installed, will
2411 // not return this to the caller. However, with this product installed,
2412 // Windows returns this as a failed status on the Accept() call. Here, we
2413 // will just re-issue the ISCAcceptEx() call as if nothing had happened.
2415 // This code should only be called when the listening socket has received
2416 // such an error. Additionally, the "parent" socket must be locked.
2417 // Additionally, the lpo argument is re-used here, and must not be freed
2421 restart_accept(isc_socket_t
*parent
, IoCompletionInfo
*lpo
)
2423 isc_socket_t
*nsock
= lpo
->adev
->newsocket
;
2427 * AcceptEx() requires we pass in a socket. Note that we carefully
2428 * do not close the previous socket in case of an error message returned by
2429 * our new socket() call. If we return an error here, our caller will
2432 new_fd
= socket(parent
->pf
, SOCK_STREAM
, IPPROTO_TCP
);
2433 if (nsock
->fd
== INVALID_SOCKET
) {
2434 return (ISC_R_FAILURE
); // parent will ask windows for error message
2436 closesocket(nsock
->fd
);
2439 memset(&lpo
->overlapped
, 0, sizeof(lpo
->overlapped
));
2441 ISCAcceptEx(parent
->fd
,
2442 nsock
->fd
, /* Accepted Socket */
2443 lpo
->acceptbuffer
, /* Buffer for initial Recv */
2444 0, /* Length of Buffer */
2445 sizeof(SOCKADDR_STORAGE
) + 16, /* Local address length + 16 */
2446 sizeof(SOCKADDR_STORAGE
) + 16, /* Remote address lengh + 16 */
2447 (LPDWORD
)&lpo
->received_bytes
, /* Bytes Recved */
2448 (LPOVERLAPPED
)lpo
/* Overlapped structure */
2451 InterlockedDecrement(&nsock
->manager
->iocp_total
);
2452 iocompletionport_update(nsock
);
2454 return (ISC_R_SUCCESS
);
2458 * This is the I/O Completion Port Worker Function. It loops forever
2459 * waiting for I/O to complete and then forwards them for further
2460 * processing. There are a number of these in separate threads.
2462 static isc_threadresult_t WINAPI
2463 SocketIoThread(LPVOID ThreadContext
) {
2464 isc_socketmgr_t
*manager
= ThreadContext
;
2465 BOOL bSuccess
= FALSE
;
2467 IoCompletionInfo
*lpo
= NULL
;
2468 isc_socket_t
*sock
= NULL
;
2470 struct msghdr
*messagehdr
= NULL
;
2472 char strbuf
[ISC_STRERRORSIZE
];
2475 REQUIRE(VALID_MANAGER(manager
));
2478 * Set the thread priority high enough so I/O will
2479 * preempt normal recv packet processing, but not
2480 * higher than the timer sync thread.
2482 if (!SetThreadPriority(GetCurrentThread(),
2483 THREAD_PRIORITY_ABOVE_NORMAL
)) {
2484 errval
= GetLastError();
2485 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
2486 FATAL_ERROR(__FILE__
, __LINE__
,
2487 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
2489 "Can't set thread priority: %s"),
2494 * Loop forever waiting on I/O Completions and then processing them
2498 bSuccess
= GetQueuedCompletionStatus(manager
->hIoCompletionPort
,
2501 (LPWSAOVERLAPPED
*)&lpo
,
2503 if (lpo
== NULL
) /* Received request to exit */
2506 REQUIRE(VALID_SOCKET(sock
));
2508 request
= lpo
->request_type
;
2512 isc_result_t isc_result
;
2515 * Did the I/O operation complete?
2517 errstatus
= GetLastError();
2518 isc_result
= isc__errno2resultx(errstatus
, __FILE__
, __LINE__
);
2524 INSIST(sock
->pending_iocp
> 0);
2525 sock
->pending_iocp
--;
2526 INSIST(sock
->pending_recv
> 0);
2527 sock
->pending_recv
--;
2528 if (!sock
->connected
&&
2529 ((errstatus
== ERROR_HOST_UNREACHABLE
) ||
2530 (errstatus
== WSAENETRESET
) ||
2531 (errstatus
== WSAECONNRESET
))) {
2532 /* ignore soft errors */
2533 queue_receive_request(sock
);
2536 send_recvdone_abort(sock
, isc_result
);
2537 if (isc_result
== ISC_R_UNEXPECTED
) {
2538 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2539 "SOCKET_RECV: Windows error code: %d, returning ISC error %d",
2540 errstatus
, isc_result
);
2545 INSIST(sock
->pending_iocp
> 0);
2546 sock
->pending_iocp
--;
2547 INSIST(sock
->pending_send
> 0);
2548 sock
->pending_send
--;
2549 if (senddone_is_active(sock
, lpo
->dev
)) {
2550 lpo
->dev
->result
= isc_result
;
2551 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2553 send_senddone_event(sock
, &lpo
->dev
);
2558 INSIST(sock
->pending_iocp
> 0);
2559 INSIST(sock
->pending_accept
> 0);
2561 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2562 "Accept: errstatus=%d isc_result=%d", errstatus
, isc_result
);
2564 if (acceptdone_is_active(sock
, lpo
->adev
)) {
2565 if (restart_accept(sock
, lpo
) == ISC_R_SUCCESS
) {
2566 UNLOCK(&sock
->lock
);
2569 errstatus
= GetLastError();
2570 isc_result
= isc__errno2resultx(errstatus
, __FILE__
, __LINE__
);
2571 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2572 "restart_accept() failed: errstatus=%d isc_result=%d",
2573 errstatus
, isc_result
);
2577 sock
->pending_iocp
--;
2578 sock
->pending_accept
--;
2579 if (acceptdone_is_active(sock
, lpo
->adev
)) {
2580 closesocket(lpo
->adev
->newsocket
->fd
);
2581 lpo
->adev
->newsocket
->fd
= INVALID_SOCKET
;
2582 lpo
->adev
->newsocket
->references
--;
2583 free_socket(&lpo
->adev
->newsocket
, __LINE__
);
2584 lpo
->adev
->result
= isc_result
;
2585 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2587 send_acceptdone_event(sock
, &lpo
->adev
);
2591 case SOCKET_CONNECT
:
2592 INSIST(sock
->pending_iocp
> 0);
2593 sock
->pending_iocp
--;
2594 INSIST(sock
->pending_connect
== 1);
2595 sock
->pending_connect
= 0;
2596 if (connectdone_is_active(sock
, lpo
->cdev
)) {
2597 lpo
->cdev
->result
= isc_result
;
2598 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2599 "canceled_connect");
2600 send_connectdone_event(sock
, &lpo
->cdev
);
2604 maybe_free_socket(&sock
, __LINE__
);
2607 HeapFree(hHeapHandle
, 0, lpo
);
2611 messagehdr
= &lpo
->messagehdr
;
2615 internal_recv(sock
, nbytes
);
2618 internal_send(sock
, lpo
->dev
, messagehdr
, nbytes
, errstatus
, lpo
);
2621 internal_accept(sock
, lpo
, errstatus
);
2623 case SOCKET_CONNECT
:
2624 internal_connect(sock
, lpo
, errstatus
);
2629 HeapFree(hHeapHandle
, 0, lpo
);
2633 * Exit Completion Port Thread
2635 manager_log(manager
, TRACE
,
2636 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2637 ISC_MSG_EXITING
, "SocketIoThread exiting"));
2638 return ((isc_threadresult_t
)0);
2642 * Create a new socket manager.
2645 isc__socketmgr_create(isc_mem_t
*mctx
, isc_socketmgr_t
**managerp
) {
2646 return (isc_socketmgr_create2(mctx
, managerp
, 0));
2650 isc__socketmgr_create2(isc_mem_t
*mctx
, isc_socketmgr_t
**managerp
,
2651 unsigned int maxsocks
)
2653 isc_socketmgr_t
*manager
;
2654 isc_result_t result
;
2656 REQUIRE(managerp
!= NULL
&& *managerp
== NULL
);
2659 return (ISC_R_NOTIMPLEMENTED
);
2661 manager
= isc_mem_get(mctx
, sizeof(*manager
));
2662 if (manager
== NULL
)
2663 return (ISC_R_NOMEMORY
);
2667 manager
->magic
= SOCKET_MANAGER_MAGIC
;
2668 manager
->mctx
= NULL
;
2669 manager
->stats
= NULL
;
2670 ISC_LIST_INIT(manager
->socklist
);
2671 result
= isc_mutex_init(&manager
->lock
);
2672 if (result
!= ISC_R_SUCCESS
) {
2673 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2676 if (isc_condition_init(&manager
->shutdown_ok
) != ISC_R_SUCCESS
) {
2677 DESTROYLOCK(&manager
->lock
);
2678 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2679 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2680 "isc_condition_init() %s",
2681 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2682 ISC_MSG_FAILED
, "failed"));
2683 return (ISC_R_UNEXPECTED
);
2686 isc_mem_attach(mctx
, &manager
->mctx
);
2688 iocompletionport_init(manager
); /* Create the Completion Ports */
2690 manager
->bShutdown
= ISC_FALSE
;
2691 manager
->totalSockets
= 0;
2692 manager
->iocp_total
= 0;
2694 *managerp
= manager
;
2696 return (ISC_R_SUCCESS
);
2700 isc_socketmgr_getmaxsockets(isc_socketmgr_t
*manager
, unsigned int *nsockp
) {
2701 REQUIRE(VALID_MANAGER(manager
));
2702 REQUIRE(nsockp
!= NULL
);
2704 return (ISC_R_NOTIMPLEMENTED
);
2708 isc_socketmgr_setstats(isc_socketmgr_t
*manager
, isc_stats_t
*stats
) {
2709 REQUIRE(VALID_MANAGER(manager
));
2710 REQUIRE(ISC_LIST_EMPTY(manager
->socklist
));
2711 REQUIRE(manager
->stats
== NULL
);
2712 REQUIRE(isc_stats_ncounters(stats
) == isc_sockstatscounter_max
);
2714 isc_stats_attach(stats
, &manager
->stats
);
2718 isc__socketmgr_destroy(isc_socketmgr_t
**managerp
) {
2719 isc_socketmgr_t
*manager
;
2724 * Destroy a socket manager.
2727 REQUIRE(managerp
!= NULL
);
2728 manager
= *managerp
;
2729 REQUIRE(VALID_MANAGER(manager
));
2731 LOCK(&manager
->lock
);
2734 * Wait for all sockets to be destroyed.
2736 while (!ISC_LIST_EMPTY(manager
->socklist
)) {
2737 manager_log(manager
, CREATION
,
2738 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
2739 ISC_MSG_SOCKETSREMAIN
,
2741 WAIT(&manager
->shutdown_ok
, &manager
->lock
);
2744 UNLOCK(&manager
->lock
);
2747 * Here, we need to had some wait code for the completion port
2750 signal_iocompletionport_exit(manager
);
2751 manager
->bShutdown
= ISC_TRUE
;
2754 * Wait for threads to exit.
2756 for (i
= 0; i
< manager
->maxIOCPThreads
; i
++) {
2757 if (isc_thread_join((isc_thread_t
) manager
->hIOCPThreads
[i
],
2758 NULL
) != ISC_R_SUCCESS
)
2759 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2760 "isc_thread_join() for Completion Port %s",
2761 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2762 ISC_MSG_FAILED
, "failed"));
2768 CloseHandle(manager
->hIoCompletionPort
);
2770 (void)isc_condition_destroy(&manager
->shutdown_ok
);
2772 DESTROYLOCK(&manager
->lock
);
2773 if (manager
->stats
!= NULL
)
2774 isc_stats_detach(&manager
->stats
);
2776 mctx
= manager
->mctx
;
2777 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2779 isc_mem_detach(&mctx
);
2785 queue_receive_event(isc_socket_t
*sock
, isc_task_t
*task
, isc_socketevent_t
*dev
)
2787 isc_task_t
*ntask
= NULL
;
2789 isc_task_attach(task
, &ntask
);
2790 dev
->attributes
|= ISC_SOCKEVENTATTR_ATTACHED
;
2793 * Enqueue the request.
2795 INSIST(!ISC_LINK_LINKED(dev
, ev_link
));
2796 ISC_LIST_ENQUEUE(sock
->recv_list
, dev
, ev_link
);
2798 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2799 "queue_receive_event: event %p -> task %p",
2804 * Check the pending receive queue, and if we have data pending, give it to this
2805 * caller. If we have none, queue an I/O request. If this caller is not the first
2806 * on the list, then we will just queue this event and return.
2808 * Caller must have the socket locked.
2811 socket_recv(isc_socket_t
*sock
, isc_socketevent_t
*dev
, isc_task_t
*task
,
2814 isc_result_t result
= ISC_R_SUCCESS
;
2816 dev
->ev_sender
= task
;
2818 if (sock
->fd
== INVALID_SOCKET
)
2822 * Queue our event on the list of things to do. Call our function to
2823 * attempt to fill buffers as much as possible, and return done events.
2824 * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE
2825 * here and tell our caller that we could not satisfy it immediately.
2827 queue_receive_event(sock
, task
, dev
);
2828 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) != 0)
2829 result
= ISC_R_INPROGRESS
;
2831 completeio_recv(sock
);
2834 * If there are more receivers waiting for data, queue another receive
2837 queue_receive_request(sock
);
2843 isc__socket_recvv(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
2844 unsigned int minimum
, isc_task_t
*task
,
2845 isc_taskaction_t action
, void *arg
)
2847 isc_socketevent_t
*dev
;
2848 isc_socketmgr_t
*manager
;
2849 unsigned int iocount
;
2850 isc_buffer_t
*buffer
;
2853 REQUIRE(VALID_SOCKET(sock
));
2858 * Make sure that the socket is not closed. XXXMLG change error here?
2860 if (sock
->fd
== INVALID_SOCKET
) {
2861 UNLOCK(&sock
->lock
);
2862 return (ISC_R_CONNREFUSED
);
2865 REQUIRE(buflist
!= NULL
);
2866 REQUIRE(!ISC_LIST_EMPTY(*buflist
));
2867 REQUIRE(task
!= NULL
);
2868 REQUIRE(action
!= NULL
);
2870 manager
= sock
->manager
;
2871 REQUIRE(VALID_MANAGER(manager
));
2873 iocount
= isc_bufferlist_availablecount(buflist
);
2874 REQUIRE(iocount
> 0);
2876 INSIST(sock
->bound
);
2878 dev
= allocate_socketevent(manager
->mctx
, sock
,
2879 ISC_SOCKEVENT_RECVDONE
, action
, arg
);
2881 UNLOCK(&sock
->lock
);
2882 return (ISC_R_NOMEMORY
);
2886 * UDP sockets are always partial read
2888 if (sock
->type
== isc_sockettype_udp
)
2892 dev
->minimum
= iocount
;
2894 dev
->minimum
= minimum
;
2898 * Move each buffer from the passed in list to our internal one.
2900 buffer
= ISC_LIST_HEAD(*buflist
);
2901 while (buffer
!= NULL
) {
2902 ISC_LIST_DEQUEUE(*buflist
, buffer
, link
);
2903 ISC_LIST_ENQUEUE(dev
->bufferlist
, buffer
, link
);
2904 buffer
= ISC_LIST_HEAD(*buflist
);
2907 ret
= socket_recv(sock
, dev
, task
, 0);
2909 UNLOCK(&sock
->lock
);
2914 isc__socket_recv(isc_socket_t
*sock
, isc_region_t
*region
,
2915 unsigned int minimum
, isc_task_t
*task
,
2916 isc_taskaction_t action
, void *arg
)
2918 isc_socketevent_t
*dev
;
2919 isc_socketmgr_t
*manager
;
2922 REQUIRE(VALID_SOCKET(sock
));
2927 * make sure that the socket's not closed
2929 if (sock
->fd
== INVALID_SOCKET
) {
2930 UNLOCK(&sock
->lock
);
2931 return (ISC_R_CONNREFUSED
);
2933 REQUIRE(action
!= NULL
);
2935 manager
= sock
->manager
;
2936 REQUIRE(VALID_MANAGER(manager
));
2938 INSIST(sock
->bound
);
2940 dev
= allocate_socketevent(manager
->mctx
, sock
,
2941 ISC_SOCKEVENT_RECVDONE
, action
, arg
);
2943 UNLOCK(&sock
->lock
);
2944 return (ISC_R_NOMEMORY
);
2947 ret
= isc_socket_recv2(sock
, region
, minimum
, task
, dev
, 0);
2948 UNLOCK(&sock
->lock
);
2953 isc__socket_recv2(isc_socket_t
*sock
, isc_region_t
*region
,
2954 unsigned int minimum
, isc_task_t
*task
,
2955 isc_socketevent_t
*event
, unsigned int flags
)
2959 REQUIRE(VALID_SOCKET(sock
));
2963 event
->result
= ISC_R_UNEXPECTED
;
2964 event
->ev_sender
= sock
;
2966 * make sure that the socket's not closed
2968 if (sock
->fd
== INVALID_SOCKET
) {
2969 UNLOCK(&sock
->lock
);
2970 return (ISC_R_CONNREFUSED
);
2973 ISC_LIST_INIT(event
->bufferlist
);
2974 event
->region
= *region
;
2977 event
->attributes
= 0;
2980 * UDP sockets are always partial read.
2982 if (sock
->type
== isc_sockettype_udp
)
2986 event
->minimum
= region
->length
;
2988 event
->minimum
= minimum
;
2991 ret
= socket_recv(sock
, event
, task
, flags
);
2992 UNLOCK(&sock
->lock
);
2997 * Caller must have the socket locked.
3000 socket_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
, isc_task_t
*task
,
3001 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
,
3007 isc_task_t
*ntask
= NULL
;
3008 isc_result_t result
= ISC_R_SUCCESS
;
3010 dev
->ev_sender
= task
;
3012 set_dev_address(address
, sock
, dev
);
3013 if (pktinfo
!= NULL
) {
3014 socket_log(__LINE__
, sock
, NULL
, TRACE
, isc_msgcat
, ISC_MSGSET_SOCKET
,
3015 ISC_MSG_PKTINFOPROVIDED
,
3016 "pktinfo structure provided, ifindex %u (set to 0)",
3017 pktinfo
->ipi6_ifindex
);
3019 dev
->attributes
|= ISC_SOCKEVENTATTR_PKTINFO
;
3020 dev
->pktinfo
= *pktinfo
;
3022 * Set the pktinfo index to 0 here, to let the kernel decide
3023 * what interface it should send on.
3025 dev
->pktinfo
.ipi6_ifindex
= 0;
3028 io_state
= startio_send(sock
, dev
, &cc
, &send_errno
);
3030 case DOIO_PENDING
: /* I/O started. Enqueue completion event. */
3033 * We couldn't send all or part of the request right now, so
3034 * queue it unless ISC_SOCKFLAG_NORETRY is set.
3036 if ((flags
& ISC_SOCKFLAG_NORETRY
) == 0 ||
3037 io_state
== DOIO_PENDING
) {
3038 isc_task_attach(task
, &ntask
);
3039 dev
->attributes
|= ISC_SOCKEVENTATTR_ATTACHED
;
3042 * Enqueue the request.
3044 INSIST(!ISC_LINK_LINKED(dev
, ev_link
));
3045 ISC_LIST_ENQUEUE(sock
->send_list
, dev
, ev_link
);
3047 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
3048 "socket_send: event %p -> task %p",
3051 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) != 0)
3052 result
= ISC_R_INPROGRESS
;
3064 isc__socket_send(isc_socket_t
*sock
, isc_region_t
*region
,
3065 isc_task_t
*task
, isc_taskaction_t action
, void *arg
)
3068 * REQUIRE() checking is performed in isc_socket_sendto().
3070 return (isc_socket_sendto(sock
, region
, task
, action
, arg
, NULL
,
3075 isc__socket_sendto(isc_socket_t
*sock
, isc_region_t
*region
,
3076 isc_task_t
*task
, isc_taskaction_t action
, void *arg
,
3077 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
)
3079 isc_socketevent_t
*dev
;
3080 isc_socketmgr_t
*manager
;
3083 REQUIRE(VALID_SOCKET(sock
));
3084 REQUIRE(sock
->type
!= isc_sockettype_fdwatch
);
3090 * make sure that the socket's not closed
3092 if (sock
->fd
== INVALID_SOCKET
) {
3093 UNLOCK(&sock
->lock
);
3094 return (ISC_R_CONNREFUSED
);
3096 REQUIRE(region
!= NULL
);
3097 REQUIRE(task
!= NULL
);
3098 REQUIRE(action
!= NULL
);
3100 manager
= sock
->manager
;
3101 REQUIRE(VALID_MANAGER(manager
));
3103 INSIST(sock
->bound
);
3105 dev
= allocate_socketevent(manager
->mctx
, sock
,
3106 ISC_SOCKEVENT_SENDDONE
, action
, arg
);
3108 UNLOCK(&sock
->lock
);
3109 return (ISC_R_NOMEMORY
);
3111 dev
->region
= *region
;
3113 ret
= socket_send(sock
, dev
, task
, address
, pktinfo
, 0);
3114 UNLOCK(&sock
->lock
);
3119 isc__socket_sendv(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
3120 isc_task_t
*task
, isc_taskaction_t action
, void *arg
)
3122 return (isc_socket_sendtov2(sock
, buflist
, task
, action
, arg
, NULL
,
3127 isc__socket_sendtov(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
3128 isc_task_t
*task
, isc_taskaction_t action
, void *arg
,
3129 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
)
3131 return (isc_socket_sendtov2(sock
, buflist
, task
, action
, arg
, address
,
3136 isc__socket_sendtov2(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
3137 isc_task_t
*task
, isc_taskaction_t action
, void *arg
,
3138 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
,
3141 isc_socketevent_t
*dev
;
3142 isc_socketmgr_t
*manager
;
3143 unsigned int iocount
;
3144 isc_buffer_t
*buffer
;
3147 REQUIRE(VALID_SOCKET(sock
));
3153 * make sure that the socket's not closed
3155 if (sock
->fd
== INVALID_SOCKET
) {
3156 UNLOCK(&sock
->lock
);
3157 return (ISC_R_CONNREFUSED
);
3159 REQUIRE(buflist
!= NULL
);
3160 REQUIRE(!ISC_LIST_EMPTY(*buflist
));
3161 REQUIRE(task
!= NULL
);
3162 REQUIRE(action
!= NULL
);
3164 manager
= sock
->manager
;
3165 REQUIRE(VALID_MANAGER(manager
));
3167 iocount
= isc_bufferlist_usedcount(buflist
);
3168 REQUIRE(iocount
> 0);
3170 dev
= allocate_socketevent(manager
->mctx
, sock
,
3171 ISC_SOCKEVENT_SENDDONE
, action
, arg
);
3173 UNLOCK(&sock
->lock
);
3174 return (ISC_R_NOMEMORY
);
3178 * Move each buffer from the passed in list to our internal one.
3180 buffer
= ISC_LIST_HEAD(*buflist
);
3181 while (buffer
!= NULL
) {
3182 ISC_LIST_DEQUEUE(*buflist
, buffer
, link
);
3183 ISC_LIST_ENQUEUE(dev
->bufferlist
, buffer
, link
);
3184 buffer
= ISC_LIST_HEAD(*buflist
);
3187 ret
= socket_send(sock
, dev
, task
, address
, pktinfo
, flags
);
3188 UNLOCK(&sock
->lock
);
3193 isc__socket_sendto2(isc_socket_t
*sock
, isc_region_t
*region
,
3195 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
,
3196 isc_socketevent_t
*event
, unsigned int flags
)
3200 REQUIRE(VALID_SOCKET(sock
));
3204 REQUIRE((flags
& ~(ISC_SOCKFLAG_IMMEDIATE
|ISC_SOCKFLAG_NORETRY
)) == 0);
3205 if ((flags
& ISC_SOCKFLAG_NORETRY
) != 0)
3206 REQUIRE(sock
->type
== isc_sockettype_udp
);
3207 event
->ev_sender
= sock
;
3208 event
->result
= ISC_R_UNEXPECTED
;
3210 * make sure that the socket's not closed
3212 if (sock
->fd
== INVALID_SOCKET
) {
3213 UNLOCK(&sock
->lock
);
3214 return (ISC_R_CONNREFUSED
);
3216 ISC_LIST_INIT(event
->bufferlist
);
3217 event
->region
= *region
;
3220 event
->attributes
= 0;
3222 ret
= socket_send(sock
, event
, task
, address
, pktinfo
, flags
);
3223 UNLOCK(&sock
->lock
);
3228 isc__socket_bind(isc_socket_t
*sock
, isc_sockaddr_t
*sockaddr
,
3229 unsigned int options
) {
3231 char strbuf
[ISC_STRERRORSIZE
];
3234 REQUIRE(VALID_SOCKET(sock
));
3239 * make sure that the socket's not closed
3241 if (sock
->fd
== INVALID_SOCKET
) {
3242 UNLOCK(&sock
->lock
);
3243 return (ISC_R_CONNREFUSED
);
3246 INSIST(!sock
->bound
);
3247 INSIST(!sock
->dupped
);
3249 if (sock
->pf
!= sockaddr
->type
.sa
.sa_family
) {
3250 UNLOCK(&sock
->lock
);
3251 return (ISC_R_FAMILYMISMATCH
);
3254 * Only set SO_REUSEADDR when we want a specific port.
3256 if ((options
& ISC_SOCKET_REUSEADDRESS
) != 0 &&
3257 isc_sockaddr_getport(sockaddr
) != (in_port_t
)0 &&
3258 setsockopt(sock
->fd
, SOL_SOCKET
, SO_REUSEADDR
, (char *)&on
,
3260 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
3261 "setsockopt(%d) %s", sock
->fd
,
3262 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
3263 ISC_MSG_FAILED
, "failed"));
3266 if (bind(sock
->fd
, &sockaddr
->type
.sa
, sockaddr
->length
) < 0) {
3267 bind_errno
= WSAGetLastError();
3268 UNLOCK(&sock
->lock
);
3269 switch (bind_errno
) {
3271 return (ISC_R_NOPERM
);
3272 case WSAEADDRNOTAVAIL
:
3273 return (ISC_R_ADDRNOTAVAIL
);
3275 return (ISC_R_ADDRINUSE
);
3277 return (ISC_R_BOUND
);
3279 isc__strerror(bind_errno
, strbuf
, sizeof(strbuf
));
3280 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "bind: %s",
3282 return (ISC_R_UNEXPECTED
);
3286 socket_log(__LINE__
, sock
, sockaddr
, TRACE
,
3287 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_BOUND
, "bound");
3290 UNLOCK(&sock
->lock
);
3291 return (ISC_R_SUCCESS
);
3295 isc__socket_filter(isc_socket_t
*sock
, const char *filter
) {
3299 REQUIRE(VALID_SOCKET(sock
));
3300 return (ISC_R_NOTIMPLEMENTED
);
3304 * Set up to listen on a given socket. We do this by creating an internal
3305 * event that will be dispatched when the socket has read activity. The
3306 * watcher will send the internal event to the task when there is a new
3309 * Unlike in read, we don't preallocate a done event here. Every time there
3310 * is a new connection we'll have to allocate a new one anyway, so we might
3311 * as well keep things simple rather than having to track them.
3314 isc__socket_listen(isc_socket_t
*sock
, unsigned int backlog
) {
3315 char strbuf
[ISC_STRERRORSIZE
];
3317 REQUIRE(VALID_SOCKET(sock
));
3323 * make sure that the socket's not closed
3325 if (sock
->fd
== INVALID_SOCKET
) {
3326 UNLOCK(&sock
->lock
);
3327 return (ISC_R_CONNREFUSED
);
3330 REQUIRE(!sock
->listener
);
3331 REQUIRE(sock
->bound
);
3332 REQUIRE(sock
->type
== isc_sockettype_tcp
);
3335 backlog
= SOMAXCONN
;
3337 if (listen(sock
->fd
, (int)backlog
) < 0) {
3338 UNLOCK(&sock
->lock
);
3339 isc__strerror(WSAGetLastError(), strbuf
, sizeof(strbuf
));
3341 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "listen: %s", strbuf
);
3343 return (ISC_R_UNEXPECTED
);
3346 socket_log(__LINE__
, sock
, NULL
, TRACE
,
3347 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_BOUND
, "listening");
3349 _set_state(sock
, SOCK_LISTEN
);
3351 UNLOCK(&sock
->lock
);
3352 return (ISC_R_SUCCESS
);
3356 * This should try to do aggressive accept() XXXMLG
3359 isc__socket_accept(isc_socket_t
*sock
,
3360 isc_task_t
*task
, isc_taskaction_t action
, void *arg
)
3362 isc_socket_newconnev_t
*adev
;
3363 isc_socketmgr_t
*manager
;
3364 isc_task_t
*ntask
= NULL
;
3365 isc_socket_t
*nsock
;
3366 isc_result_t result
;
3367 IoCompletionInfo
*lpo
;
3369 REQUIRE(VALID_SOCKET(sock
));
3371 manager
= sock
->manager
;
3372 REQUIRE(VALID_MANAGER(manager
));
3378 * make sure that the socket's not closed
3380 if (sock
->fd
== INVALID_SOCKET
) {
3381 UNLOCK(&sock
->lock
);
3382 return (ISC_R_CONNREFUSED
);
3385 REQUIRE(sock
->listener
);
3388 * Sender field is overloaded here with the task we will be sending
3389 * this event to. Just before the actual event is delivered the
3390 * actual ev_sender will be touched up to be the socket.
3392 adev
= (isc_socket_newconnev_t
*)
3393 isc_event_allocate(manager
->mctx
, task
, ISC_SOCKEVENT_NEWCONN
,
3394 action
, arg
, sizeof(*adev
));
3396 UNLOCK(&sock
->lock
);
3397 return (ISC_R_NOMEMORY
);
3399 ISC_LINK_INIT(adev
, ev_link
);
3401 result
= allocate_socket(manager
, sock
->type
, &nsock
);
3402 if (result
!= ISC_R_SUCCESS
) {
3403 isc_event_free((isc_event_t
**)&adev
);
3404 UNLOCK(&sock
->lock
);
3409 * AcceptEx() requires we pass in a socket.
3411 nsock
->fd
= socket(sock
->pf
, SOCK_STREAM
, IPPROTO_TCP
);
3412 if (nsock
->fd
== INVALID_SOCKET
) {
3413 free_socket(&nsock
, __LINE__
);
3414 isc_event_free((isc_event_t
**)&adev
);
3415 UNLOCK(&sock
->lock
);
3416 return (ISC_R_FAILURE
); // XXXMLG need real error message
3420 * Attach to socket and to task.
3422 isc_task_attach(task
, &ntask
);
3423 if (isc_task_exiting(ntask
)) {
3424 free_socket(&nsock
, __LINE__
);
3425 isc_task_detach(&ntask
);
3426 isc_event_free(ISC_EVENT_PTR(&adev
));
3427 UNLOCK(&sock
->lock
);
3428 return (ISC_R_SHUTTINGDOWN
);
3430 nsock
->references
++;
3432 adev
->ev_sender
= ntask
;
3433 adev
->newsocket
= nsock
;
3434 _set_state(nsock
, SOCK_ACCEPT
);
3437 * Queue io completion for an accept().
3439 lpo
= (IoCompletionInfo
*)HeapAlloc(hHeapHandle
,
3441 sizeof(IoCompletionInfo
));
3442 RUNTIME_CHECK(lpo
!= NULL
);
3443 lpo
->acceptbuffer
= (void *)HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
,
3444 (sizeof(SOCKADDR_STORAGE
) + 16) * 2);
3445 RUNTIME_CHECK(lpo
->acceptbuffer
!= NULL
);
3448 lpo
->request_type
= SOCKET_ACCEPT
;
3450 ISCAcceptEx(sock
->fd
,
3451 nsock
->fd
, /* Accepted Socket */
3452 lpo
->acceptbuffer
, /* Buffer for initial Recv */
3453 0, /* Length of Buffer */
3454 sizeof(SOCKADDR_STORAGE
) + 16, /* Local address length + 16 */
3455 sizeof(SOCKADDR_STORAGE
) + 16, /* Remote address lengh + 16 */
3456 (LPDWORD
)&lpo
->received_bytes
, /* Bytes Recved */
3457 (LPOVERLAPPED
)lpo
/* Overlapped structure */
3459 iocompletionport_update(nsock
);
3461 socket_log(__LINE__
, sock
, NULL
, TRACE
,
3462 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_BOUND
,
3463 "accepting for nsock %p fd %d", nsock
, nsock
->fd
);
3468 ISC_LIST_ENQUEUE(sock
->accept_list
, adev
, ev_link
);
3469 sock
->pending_accept
++;
3470 sock
->pending_iocp
++;
3472 UNLOCK(&sock
->lock
);
3473 return (ISC_R_SUCCESS
);
3477 isc__socket_connect(isc_socket_t
*sock
, isc_sockaddr_t
*addr
,
3478 isc_task_t
*task
, isc_taskaction_t action
, void *arg
)
3480 char strbuf
[ISC_STRERRORSIZE
];
3481 isc_socket_connev_t
*cdev
;
3482 isc_task_t
*ntask
= NULL
;
3483 isc_socketmgr_t
*manager
;
3484 IoCompletionInfo
*lpo
;
3487 REQUIRE(VALID_SOCKET(sock
));
3488 REQUIRE(addr
!= NULL
);
3489 REQUIRE(task
!= NULL
);
3490 REQUIRE(action
!= NULL
);
3492 manager
= sock
->manager
;
3493 REQUIRE(VALID_MANAGER(manager
));
3494 REQUIRE(addr
!= NULL
);
3496 if (isc_sockaddr_ismulticast(addr
))
3497 return (ISC_R_MULTICAST
);
3503 * make sure that the socket's not closed
3505 if (sock
->fd
== INVALID_SOCKET
) {
3506 UNLOCK(&sock
->lock
);
3507 return (ISC_R_CONNREFUSED
);
3511 * Windows sockets won't connect unless the socket is bound.
3516 isc_sockaddr_anyofpf(&any
, isc_sockaddr_pf(addr
));
3517 if (bind(sock
->fd
, &any
.type
.sa
, any
.length
) < 0) {
3518 bind_errno
= WSAGetLastError();
3519 UNLOCK(&sock
->lock
);
3520 switch (bind_errno
) {
3522 return (ISC_R_NOPERM
);
3523 case WSAEADDRNOTAVAIL
:
3524 return (ISC_R_ADDRNOTAVAIL
);
3526 return (ISC_R_ADDRINUSE
);
3528 return (ISC_R_BOUND
);
3530 isc__strerror(bind_errno
, strbuf
,
3532 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
3533 "bind: %s", strbuf
);
3534 return (ISC_R_UNEXPECTED
);
3540 REQUIRE(!sock
->pending_connect
);
3542 cdev
= (isc_socket_connev_t
*)isc_event_allocate(manager
->mctx
, sock
,
3543 ISC_SOCKEVENT_CONNECT
,
3547 UNLOCK(&sock
->lock
);
3548 return (ISC_R_NOMEMORY
);
3550 ISC_LINK_INIT(cdev
, ev_link
);
3552 if (sock
->type
== isc_sockettype_tcp
) {
3554 * Queue io completion for an accept().
3556 lpo
= (IoCompletionInfo
*)HeapAlloc(hHeapHandle
,
3558 sizeof(IoCompletionInfo
));
3560 lpo
->request_type
= SOCKET_CONNECT
;
3562 sock
->address
= *addr
;
3563 ISCConnectEx(sock
->fd
, &addr
->type
.sa
, addr
->length
,
3564 NULL
, 0, NULL
, (LPOVERLAPPED
)lpo
);
3569 isc_task_attach(task
, &ntask
);
3570 cdev
->ev_sender
= ntask
;
3572 sock
->pending_connect
= 1;
3573 _set_state(sock
, SOCK_CONNECT
);
3576 * Enqueue the request.
3578 sock
->connect_ev
= cdev
;
3579 sock
->pending_iocp
++;
3581 WSAConnect(sock
->fd
, &addr
->type
.sa
, addr
->length
, NULL
, NULL
, NULL
, NULL
);
3582 cdev
->result
= ISC_R_SUCCESS
;
3583 isc_task_send(task
, (isc_event_t
**)&cdev
);
3586 UNLOCK(&sock
->lock
);
3588 return (ISC_R_SUCCESS
);
3592 isc__socket_getpeername(isc_socket_t
*sock
, isc_sockaddr_t
*addressp
) {
3593 isc_result_t result
;
3595 REQUIRE(VALID_SOCKET(sock
));
3596 REQUIRE(addressp
!= NULL
);
3602 * make sure that the socket's not closed
3604 if (sock
->fd
== INVALID_SOCKET
) {
3605 UNLOCK(&sock
->lock
);
3606 return (ISC_R_CONNREFUSED
);
3609 if (sock
->connected
) {
3610 *addressp
= sock
->address
;
3611 result
= ISC_R_SUCCESS
;
3613 result
= ISC_R_NOTCONNECTED
;
3616 UNLOCK(&sock
->lock
);
3622 isc__socket_getsockname(isc_socket_t
*sock
, isc_sockaddr_t
*addressp
) {
3623 ISC_SOCKADDR_LEN_T len
;
3624 isc_result_t result
;
3625 char strbuf
[ISC_STRERRORSIZE
];
3627 REQUIRE(VALID_SOCKET(sock
));
3628 REQUIRE(addressp
!= NULL
);
3634 * make sure that the socket's not closed
3636 if (sock
->fd
== INVALID_SOCKET
) {
3637 UNLOCK(&sock
->lock
);
3638 return (ISC_R_CONNREFUSED
);
3642 result
= ISC_R_NOTBOUND
;
3646 result
= ISC_R_SUCCESS
;
3648 len
= sizeof(addressp
->type
);
3649 if (getsockname(sock
->fd
, &addressp
->type
.sa
, (void *)&len
) < 0) {
3650 isc__strerror(WSAGetLastError(), strbuf
, sizeof(strbuf
));
3651 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "getsockname: %s",
3653 result
= ISC_R_UNEXPECTED
;
3656 addressp
->length
= (unsigned int)len
;
3659 UNLOCK(&sock
->lock
);
3665 * Run through the list of events on this socket, and cancel the ones
3666 * queued for task "task" of type "how". "how" is a bitmask.
3669 isc__socket_cancel(isc_socket_t
*sock
, isc_task_t
*task
, unsigned int how
) {
3671 REQUIRE(VALID_SOCKET(sock
));
3674 * Quick exit if there is nothing to do. Don't even bother locking
3684 * make sure that the socket's not closed
3686 if (sock
->fd
== INVALID_SOCKET
) {
3687 UNLOCK(&sock
->lock
);
3692 * All of these do the same thing, more or less.
3694 * o If the internal event is marked as "posted" try to
3695 * remove it from the task's queue. If this fails, mark it
3696 * as canceled instead, and let the task clean it up later.
3697 * o For each I/O request for that task of that type, post
3698 * its done event with status of "ISC_R_CANCELED".
3699 * o Reset any state needed.
3702 if ((how
& ISC_SOCKCANCEL_RECV
) == ISC_SOCKCANCEL_RECV
) {
3703 isc_socketevent_t
*dev
;
3704 isc_socketevent_t
*next
;
3705 isc_task_t
*current_task
;
3707 dev
= ISC_LIST_HEAD(sock
->recv_list
);
3708 while (dev
!= NULL
) {
3709 current_task
= dev
->ev_sender
;
3710 next
= ISC_LIST_NEXT(dev
, ev_link
);
3711 if ((task
== NULL
) || (task
== current_task
)) {
3712 dev
->result
= ISC_R_CANCELED
;
3713 send_recvdone_event(sock
, &dev
);
3718 how
&= ~ISC_SOCKCANCEL_RECV
;
3720 if ((how
& ISC_SOCKCANCEL_SEND
) == ISC_SOCKCANCEL_SEND
) {
3721 isc_socketevent_t
*dev
;
3722 isc_socketevent_t
*next
;
3723 isc_task_t
*current_task
;
3725 dev
= ISC_LIST_HEAD(sock
->send_list
);
3727 while (dev
!= NULL
) {
3728 current_task
= dev
->ev_sender
;
3729 next
= ISC_LIST_NEXT(dev
, ev_link
);
3730 if ((task
== NULL
) || (task
== current_task
)) {
3731 dev
->result
= ISC_R_CANCELED
;
3732 send_senddone_event(sock
, &dev
);
3737 how
&= ~ISC_SOCKCANCEL_SEND
;
3739 if (((how
& ISC_SOCKCANCEL_ACCEPT
) == ISC_SOCKCANCEL_ACCEPT
)
3740 && !ISC_LIST_EMPTY(sock
->accept_list
)) {
3741 isc_socket_newconnev_t
*dev
;
3742 isc_socket_newconnev_t
*next
;
3743 isc_task_t
*current_task
;
3745 dev
= ISC_LIST_HEAD(sock
->accept_list
);
3746 while (dev
!= NULL
) {
3747 current_task
= dev
->ev_sender
;
3748 next
= ISC_LIST_NEXT(dev
, ev_link
);
3750 if ((task
== NULL
) || (task
== current_task
)) {
3752 dev
->newsocket
->references
--;
3753 closesocket(dev
->newsocket
->fd
);
3754 dev
->newsocket
->fd
= INVALID_SOCKET
;
3755 free_socket(&dev
->newsocket
, __LINE__
);
3757 dev
->result
= ISC_R_CANCELED
;
3758 send_acceptdone_event(sock
, &dev
);
3764 how
&= ~ISC_SOCKCANCEL_ACCEPT
;
3767 * Connecting is not a list.
3769 if (((how
& ISC_SOCKCANCEL_CONNECT
) == ISC_SOCKCANCEL_CONNECT
)
3770 && sock
->connect_ev
!= NULL
) {
3771 isc_socket_connev_t
*dev
;
3772 isc_task_t
*current_task
;
3774 INSIST(sock
->pending_connect
);
3776 dev
= sock
->connect_ev
;
3777 current_task
= dev
->ev_sender
;
3779 if ((task
== NULL
) || (task
== current_task
)) {
3780 closesocket(sock
->fd
);
3781 sock
->fd
= INVALID_SOCKET
;
3782 _set_state(sock
, SOCK_CLOSED
);
3784 sock
->connect_ev
= NULL
;
3785 dev
->result
= ISC_R_CANCELED
;
3786 send_connectdone_event(sock
, &dev
);
3789 how
&= ~ISC_SOCKCANCEL_CONNECT
;
3791 maybe_free_socket(&sock
, __LINE__
);
3795 isc__socket_gettype(isc_socket_t
*sock
) {
3796 isc_sockettype_t type
;
3798 REQUIRE(VALID_SOCKET(sock
));
3803 * make sure that the socket's not closed
3805 if (sock
->fd
== INVALID_SOCKET
) {
3806 UNLOCK(&sock
->lock
);
3807 return (ISC_R_CONNREFUSED
);
3811 UNLOCK(&sock
->lock
);
3816 isc__socket_isbound(isc_socket_t
*sock
) {
3819 REQUIRE(VALID_SOCKET(sock
));
3825 * make sure that the socket's not closed
3827 if (sock
->fd
== INVALID_SOCKET
) {
3828 UNLOCK(&sock
->lock
);
3832 val
= ((sock
->bound
) ? ISC_TRUE
: ISC_FALSE
);
3833 UNLOCK(&sock
->lock
);
3839 isc__socket_ipv6only(isc_socket_t
*sock
, isc_boolean_t yes
) {
3840 #if defined(IPV6_V6ONLY)
3841 int onoff
= yes
? 1 : 0;
3846 REQUIRE(VALID_SOCKET(sock
));
3849 if (sock
->pf
== AF_INET6
) {
3850 (void)setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_V6ONLY
,
3851 (char *)&onoff
, sizeof(onoff
));
3857 isc__socket_dscp(isc_socket_t
*sock
, isc_dscp_t dscp
) {
3858 #if !defined(IP_TOS) && !defined(IPV6_TCLASS)
3868 REQUIRE(VALID_SOCKET(sock
));
3871 if (sock
->pf
== AF_INET
) {
3872 (void)setsockopt(sock
->fd
, IPPROTO_IP
, IP_TOS
,
3873 (char *)&dscp
, sizeof(dscp
));
3877 if (sock
->pf
== AF_INET6
) {
3878 (void)setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_TCLASS
,
3879 (char *)&dscp
, sizeof(dscp
));
3885 isc__socket_cleanunix(isc_sockaddr_t
*addr
, isc_boolean_t active
) {
3891 isc__socket_permunix(isc_sockaddr_t
*addr
, isc_uint32_t perm
,
3892 isc_uint32_t owner
, isc_uint32_t group
)
3898 return (ISC_R_NOTIMPLEMENTED
);
3902 isc__socket_setname(isc_socket_t
*socket
, const char *name
, void *tag
) {
3908 REQUIRE(VALID_SOCKET(socket
));
3910 LOCK(&socket
->lock
);
3911 memset(socket
->name
, 0, sizeof(socket
->name
));
3912 strncpy(socket
->name
, name
, sizeof(socket
->name
) - 1);
3914 UNLOCK(&socket
->lock
);
3918 isc__socket_getname(isc_socket_t
*socket
) {
3919 return (socket
->name
);
3923 isc__socket_gettag(isc_socket_t
*socket
) {
3924 return (socket
->tag
);
3928 isc__socket_getfd(isc_socket_t
*socket
) {
3929 return ((short) socket
->fd
);
3933 isc__socketmgr_setreserved(isc_socketmgr_t
*manager
, isc_uint32_t reserved
) {
3939 isc___socketmgr_maxudp(isc_socketmgr_t
*manager
, int maxudp
) {
3946 isc_socket_socketevent(isc_mem_t
*mctx
, void *sender
,
3947 isc_eventtype_t eventtype
, isc_taskaction_t action
,
3950 return (allocate_socketevent(mctx
, sender
, eventtype
, action
, arg
));
3956 _socktype(isc_sockettype_t type
) {
3957 if (type
== isc_sockettype_udp
)
3959 else if (type
== isc_sockettype_tcp
)
3961 else if (type
== isc_sockettype_unix
)
3963 else if (type
== isc_sockettype_fdwatch
)
3966 return ("not-initialized");
3969 #define TRY0(a) do { xmlrc = (a); if (xmlrc < 0) goto error; } while(/*CONSTCOND*/0)
3971 isc_socketmgr_renderxml(isc_socketmgr_t
*mgr
, xmlTextWriterPtr writer
)
3973 isc_socket_t
*sock
= NULL
;
3974 char peerbuf
[ISC_SOCKADDR_FORMATSIZE
];
3975 isc_sockaddr_t addr
;
3976 ISC_SOCKADDR_LEN_T len
;
3981 #ifndef ISC_PLATFORM_USETHREADS
3982 TRY0(xmlTextWriterStartElement(writer
, ISC_XMLCHAR
"references"));
3983 TRY0(xmlTextWriterWriteFormatString(writer
, "%d", mgr
->refs
));
3984 TRY0(xmlTextWriterEndElement(writer
));
3987 TRY0(xmlTextWriterStartElement(writer
, ISC_XMLCHAR
"sockets"));
3988 sock
= ISC_LIST_HEAD(mgr
->socklist
);
3989 while (sock
!= NULL
) {
3991 TRY0(xmlTextWriterStartElement(writer
, ISC_XMLCHAR
"socket"));
3993 TRY0(xmlTextWriterStartElement(writer
, ISC_XMLCHAR
"id"));
3994 TRY0(xmlTextWriterWriteFormatString(writer
, "%p", sock
));
3995 TRY0(xmlTextWriterEndElement(writer
));
3997 if (sock
->name
[0] != 0) {
3998 TRY0(xmlTextWriterStartElement(writer
,
3999 ISC_XMLCHAR
"name"));
4000 TRY0(xmlTextWriterWriteFormatString(writer
, "%s",
4002 TRY0(xmlTextWriterEndElement(writer
)); /* name */
4005 TRY0(xmlTextWriterStartElement(writer
,
4006 ISC_XMLCHAR
"references"));
4007 TRY0(xmlTextWriterWriteFormatString(writer
, "%d",
4009 TRY0(xmlTextWriterEndElement(writer
));
4011 TRY0(xmlTextWriterWriteElement(writer
, ISC_XMLCHAR
"type",
4012 ISC_XMLCHAR
_socktype(sock
->type
)));
4014 if (sock
->connected
) {
4015 isc_sockaddr_format(&sock
->address
, peerbuf
,
4017 TRY0(xmlTextWriterWriteElement(writer
,
4018 ISC_XMLCHAR
"peer-address",
4019 ISC_XMLCHAR peerbuf
));
4023 if (getsockname(sock
->fd
, &addr
.type
.sa
, (void *)&len
) == 0) {
4024 isc_sockaddr_format(&addr
, peerbuf
, sizeof(peerbuf
));
4025 TRY0(xmlTextWriterWriteElement(writer
,
4026 ISC_XMLCHAR
"local-address",
4027 ISC_XMLCHAR peerbuf
));
4030 TRY0(xmlTextWriterStartElement(writer
, ISC_XMLCHAR
"states"));
4031 if (sock
->pending_recv
)
4032 TRY0(xmlTextWriterWriteElement(writer
,
4033 ISC_XMLCHAR
"state",
4034 ISC_XMLCHAR
"pending-receive"));
4035 if (sock
->pending_send
)
4036 TRY0(xmlTextWriterWriteElement(writer
,
4037 ISC_XMLCHAR
"state",
4038 ISC_XMLCHAR
"pending-send"));
4039 if (sock
->pending_accept
)
4040 TRY0(xmlTextWriterWriteElement(writer
,
4041 ISC_XMLCHAR
"state",
4042 ISC_XMLCHAR
"pending_accept"));
4044 TRY0(xmlTextWriterWriteElement(writer
,
4045 ISC_XMLCHAR
"state",
4046 ISC_XMLCHAR
"listener"));
4047 if (sock
->connected
)
4048 TRY0(xmlTextWriterWriteElement(writer
,
4049 ISC_XMLCHAR
"state",
4050 ISC_XMLCHAR
"connected"));
4051 if (sock
->pending_connect
)
4052 TRY0(xmlTextWriterWriteElement(writer
,
4053 ISC_XMLCHAR
"state",
4054 ISC_XMLCHAR
"connecting"));
4056 TRY0(xmlTextWriterWriteElement(writer
,
4057 ISC_XMLCHAR
"state",
4058 ISC_XMLCHAR
"bound"));
4060 TRY0(xmlTextWriterEndElement(writer
)); /* states */
4062 TRY0(xmlTextWriterEndElement(writer
)); /* socket */
4064 UNLOCK(&sock
->lock
);
4065 sock
= ISC_LIST_NEXT(sock
, link
);
4067 TRY0(xmlTextWriterEndElement(writer
)); /* sockets */
4071 UNLOCK(&sock
->lock
);
4077 #endif /* HAVE_LIBXML2 */
4080 * Replace ../socket_api.c
4084 isc__socket_register(void) {
4085 return (ISC_R_SUCCESS
);
4089 isc_socketmgr_createinctx(isc_mem_t
*mctx
, isc_appctx_t
*actx
,
4090 isc_socketmgr_t
**managerp
)
4092 isc_result_t result
;
4094 result
= isc_socketmgr_create(mctx
, managerp
);
4096 if (result
== ISC_R_SUCCESS
)
4097 isc_appctx_setsocketmgr(actx
, *managerp
);