4 * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
5 * Copyright (C) 2000-2003 Internet Software Consortium.
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
20 /* Id: socket.c,v 1.81 2009/11/10 18:31:47 each Exp */
22 /* This code uses functions which are only available on Server 2003 and
23 * higher, and Windows XP and higher.
25 * This code is by nature multithreaded and takes advantage of various
26 * features to pass on information through the completion port for
27 * when I/O is completed. All sends, receives, accepts, and connects are
28 * completed through the completion port.
30 * The number of Completion Port Worker threads used is the total number
31 * of CPU's + 1. This increases the likelihood that a Worker Thread is
32 * available for processing a completed request.
34 * XXXPDM 5 August, 2002
37 #define MAKE_EXTERNAL 1
40 #include <sys/types.h>
43 #define _WINSOCKAPI_ /* Prevent inclusion of winsock.h in windows.h */
55 #include <isc/buffer.h>
56 #include <isc/bufferlist.h>
57 #include <isc/condition.h>
62 #include <isc/mutex.h>
66 #include <isc/platform.h>
67 #include <isc/print.h>
68 #include <isc/region.h>
69 #include <isc/socket.h>
70 #include <isc/stats.h>
71 #include <isc/strerror.h>
72 #include <isc/syslog.h>
74 #include <isc/thread.h>
76 #include <isc/win32os.h>
80 #include "errno2result.h"
83 * How in the world can Microsoft exist with APIs like this?
84 * We can't actually call this directly, because it turns out
85 * no library exports this function. Instead, we need to
86 * issue a runtime call to get the address.
88 LPFN_CONNECTEX ISCConnectEx
;
89 LPFN_ACCEPTEX ISCAcceptEx
;
90 LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs
;
93 * Run expensive internal consistency checks.
95 #ifdef ISC_SOCKET_CONSISTENCY_CHECKS
96 #define CONSISTENT(sock) consistent(sock)
98 #define CONSISTENT(sock) do {} while (0)
100 static void consistent(isc_socket_t
*sock
);
103 * Define this macro to control the behavior of connection
104 * resets on UDP sockets. See Microsoft KnowledgeBase Article Q263823
106 * NOTE: This requires that Windows 2000 systems install Service Pack 2
109 #ifndef SIO_UDP_CONNRESET
110 #define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12)
114 * Some systems define the socket length argument as an int, some as size_t,
115 * some as socklen_t. This is here so it can be easily changed if needed.
117 #ifndef ISC_SOCKADDR_LEN_T
118 #define ISC_SOCKADDR_LEN_T unsigned int
122 * Define what the possible "soft" errors can be. These are non-fatal returns
123 * of various network related functions, like recv() and so on.
125 #define SOFT_ERROR(e) ((e) == WSAEINTR || \
126 (e) == WSAEWOULDBLOCK || \
127 (e) == EWOULDBLOCK || \
133 * Pending errors are not really errors and should be
136 #define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0)
138 #define DOIO_SUCCESS 0 /* i/o ok, event sent */
139 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */
140 #define DOIO_HARD 2 /* i/o error, event sent */
141 #define DOIO_EOF 3 /* EOF, no event sent */
142 #define DOIO_PENDING 4 /* status when i/o is in process */
143 #define DOIO_NEEDMORE 5 /* IO was processed, but we need more due to minimum */
145 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
148 * DLVL(90) -- Function entry/exit and other tracing.
149 * DLVL(70) -- Socket "correctness" -- including returning of events, etc.
150 * DLVL(60) -- Socket data send/receive
151 * DLVL(50) -- Event tracing, including receiving/sending completion events.
152 * DLVL(20) -- Socket creation/destruction.
154 #define TRACE_LEVEL 90
155 #define CORRECTNESS_LEVEL 70
156 #define IOEVENT_LEVEL 60
157 #define EVENT_LEVEL 50
158 #define CREATION_LEVEL 20
160 #define TRACE DLVL(TRACE_LEVEL)
161 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL)
162 #define IOEVENT DLVL(IOEVENT_LEVEL)
163 #define EVENT DLVL(EVENT_LEVEL)
164 #define CREATION DLVL(CREATION_LEVEL)
166 typedef isc_event_t intev_t
;
172 SOCK_INITIALIZED
, /* Socket Initialized */
173 SOCK_OPEN
, /* Socket opened but nothing yet to do */
174 SOCK_DATA
, /* Socket sending or receiving data */
175 SOCK_LISTEN
, /* TCP Socket listening for connects */
176 SOCK_ACCEPT
, /* TCP socket is waiting to accept */
177 SOCK_CONNECT
, /* TCP Socket connecting */
178 SOCK_CLOSED
, /* Socket has been closed */
181 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o')
182 #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC)
185 * IPv6 control information. If the socket is an IPv6 socket we want
186 * to collect the destination address and interface so the client can
187 * set them on outgoing packets.
189 #ifdef ISC_PLATFORM_HAVEIPV6
196 * We really don't want to try and use these control messages. Win32
197 * doesn't have this mechanism before XP.
202 * Message header for recvmsg and sendmsg calls.
203 * Used value-result for recvmsg, value only for sendmsg.
206 SOCKADDR_STORAGE to_addr
; /* UDP send/recv address */
207 int to_addr_len
; /* length of the address */
208 WSABUF
*msg_iov
; /* scatter/gather array */
209 u_int msg_iovlen
; /* # elements in msg_iov */
210 void *msg_control
; /* ancillary data, see below */
211 u_int msg_controllen
; /* ancillary data buffer len */
212 int msg_totallen
; /* total length of this message */
216 * The size to raise the receive buffer to.
218 #define RCVBUFSIZE (32*1024)
221 * The number of times a send operation is repeated if the result
229 isc_socketmgr_t
*manager
;
231 isc_sockettype_t type
;
233 /* Pointers to scatter/gather buffers */
234 WSABUF iov
[ISC_SOCKET_MAXSCATTERGATHER
];
236 /* Locked by socket lock. */
237 ISC_LINK(isc_socket_t
) link
;
238 unsigned int references
; /* EXTERNAL references */
239 SOCKET fd
; /* file handle */
240 int pf
; /* protocol family */
245 * Each recv() call uses this buffer. It is a per-socket receive
246 * buffer that allows us to decouple the system recv() from the
247 * recv_list done events. This means the items on the recv_list
248 * can be removed without having to cancel pending system recv()
249 * calls. It also allows us to read-ahead in some cases.
252 SOCKADDR_STORAGE from_addr
; // UDP send/recv address
253 int from_addr_len
; // length of the address
254 char *base
; // the base of the buffer
255 char *consume_position
; // where to start copying data from next
256 unsigned int len
; // the actual size of this buffer
257 unsigned int remaining
; // the number of bytes remaining
260 ISC_LIST(isc_socketevent_t
) send_list
;
261 ISC_LIST(isc_socketevent_t
) recv_list
;
262 ISC_LIST(isc_socket_newconnev_t
) accept_list
;
263 isc_socket_connev_t
*connect_ev
;
265 isc_sockaddr_t address
; /* remote address */
267 unsigned int listener
: 1, /* listener socket */
269 pending_connect
: 1, /* connect pending */
270 bound
: 1; /* bound to local addr */
271 unsigned int pending_iocp
; /* Should equal the counters below. Debug. */
272 unsigned int pending_recv
; /* Number of outstanding recv() calls. */
273 unsigned int pending_send
; /* Number of outstanding send() calls. */
274 unsigned int pending_accept
; /* Number of outstanding accept() calls. */
275 unsigned int state
; /* Socket state. Debugging and consistency checking. */
276 int state_lineno
; /* line which last touched state */
277 int in_recovery_cnt
; /* avoid recovery loop. */
280 #define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0)
285 typedef struct buflist buflist_t
;
290 ISC_LINK(buflist_t
) link
;
294 * I/O Completion ports Info structures
297 static HANDLE hHeapHandle
= NULL
;
298 typedef struct IoCompletionInfo
{
299 OVERLAPPED overlapped
;
300 isc_socketevent_t
*dev
; /* send()/recv() done event */
301 isc_socket_connev_t
*cdev
; /* connect() done event */
302 isc_socket_newconnev_t
*adev
; /* accept() done event */
304 DWORD received_bytes
;
306 struct msghdr messagehdr
;
307 ISC_LIST(buflist_t
) bufferlist
; /*%< list of buffers */
311 * Define a maximum number of I/O Completion Port worker threads
312 * to handle the load on the Completion Port. The actual number
313 * used is the number of CPU's + 1.
315 #define MAX_IOCPTHREADS 20
317 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g')
318 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
320 struct isc_socketmgr
{
327 /* Locked by manager lock. */
328 ISC_LIST(isc_socket_t
) socklist
;
329 isc_boolean_t bShutdown
;
330 isc_condition_t shutdown_ok
;
331 HANDLE hIoCompletionPort
;
333 HANDLE hIOCPThreads
[MAX_IOCPTHREADS
];
334 DWORD dwIOCPThreadIds
[MAX_IOCPTHREADS
];
338 * Modified by InterlockedIncrement() and InterlockedDecrement()
352 * send() and recv() iovec counts
354 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
355 #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
357 static isc_threadresult_t WINAPI
SocketIoThread(LPVOID ThreadContext
);
358 static void maybe_free_socket(isc_socket_t
**, int);
359 static void free_socket(isc_socket_t
**, int);
360 static isc_boolean_t
senddone_is_active(isc_socket_t
*sock
, isc_socketevent_t
*dev
);
361 static isc_boolean_t
acceptdone_is_active(isc_socket_t
*sock
, isc_socket_newconnev_t
*dev
);
362 static isc_boolean_t
connectdone_is_active(isc_socket_t
*sock
, isc_socket_connev_t
*dev
);
363 static void send_recvdone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
);
364 static void send_senddone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
);
365 static void send_acceptdone_event(isc_socket_t
*sock
, isc_socket_newconnev_t
**adev
);
366 static void send_connectdone_event(isc_socket_t
*sock
, isc_socket_connev_t
**cdev
);
367 static void send_recvdone_abort(isc_socket_t
*sock
, isc_result_t result
);
368 static void queue_receive_event(isc_socket_t
*sock
, isc_task_t
*task
, isc_socketevent_t
*dev
);
369 static void queue_receive_request(isc_socket_t
*sock
);
370 static void hard_recover_receive_request(isc_socket_t
*sock
);
371 static void recover_receive_request(isc_socket_t
*sock
, void **lplpo
);
374 * This is used to dump the contents of the sock structure
375 * You should make sure that the sock is locked before
376 * dumping it. Since the code uses simple printf() statements
377 * it should only be used interactively.
380 sock_dump(isc_socket_t
*sock
) {
381 isc_socketevent_t
*ldev
;
382 isc_socket_newconnev_t
*ndev
;
388 isc_socket_getpeername(sock
, &addr
);
389 isc_sockaddr_format(&addr
, socktext
, sizeof(socktext
));
390 printf("Remote Socket: %s\n", socktext
);
391 isc_socket_getsockname(sock
, &addr
);
392 isc_sockaddr_format(&addr
, socktext
, sizeof(socktext
));
393 printf("This Socket: %s\n", socktext
);
396 printf("\n\t\tSock Dump\n");
397 printf("\t\tfd: %u\n", sock
->fd
);
398 printf("\t\treferences: %d\n", sock
->references
);
399 printf("\t\tpending_accept: %d\n", sock
->pending_accept
);
400 printf("\t\tconnecting: %d\n", sock
->pending_connect
);
401 printf("\t\tconnected: %d\n", sock
->connected
);
402 printf("\t\tbound: %d\n", sock
->bound
);
403 printf("\t\tpending_iocp: %d\n", sock
->pending_iocp
);
404 printf("\t\tsocket type: %d\n", sock
->type
);
406 printf("\n\t\tSock Recv List\n");
407 ldev
= ISC_LIST_HEAD(sock
->recv_list
);
408 while (ldev
!= NULL
) {
409 printf("\t\tdev: %p\n", ldev
);
410 ldev
= ISC_LIST_NEXT(ldev
, ev_link
);
413 printf("\n\t\tSock Send List\n");
414 ldev
= ISC_LIST_HEAD(sock
->send_list
);
415 while (ldev
!= NULL
) {
416 printf("\t\tdev: %p\n", ldev
);
417 ldev
= ISC_LIST_NEXT(ldev
, ev_link
);
420 printf("\n\t\tSock Accept List\n");
421 ndev
= ISC_LIST_HEAD(sock
->accept_list
);
422 while (ndev
!= NULL
) {
423 printf("\t\tdev: %p\n", ldev
);
424 ndev
= ISC_LIST_NEXT(ndev
, ev_link
);
429 socket_log(int lineno
, isc_socket_t
*sock
, isc_sockaddr_t
*address
,
430 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
431 isc_msgcat_t
*msgcat
, int msgset
, int message
,
432 const char *fmt
, ...) ISC_FORMAT_PRINTF(9, 10);
434 /* This function will add an entry to the I/O completion port
435 * that will signal the I/O thread to exit (gracefully)
438 signal_iocompletionport_exit(isc_socketmgr_t
*manager
) {
441 char strbuf
[ISC_STRERRORSIZE
];
443 REQUIRE(VALID_MANAGER(manager
));
444 for (i
= 0; i
< manager
->maxIOCPThreads
; i
++) {
445 if (!PostQueuedCompletionStatus(manager
->hIoCompletionPort
,
447 errval
= GetLastError();
448 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
449 FATAL_ERROR(__FILE__
, __LINE__
,
450 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
452 "Can't request service thread to exit: %s"),
459 * Create the worker threads for the I/O Completion Port
462 iocompletionport_createthreads(int total_threads
, isc_socketmgr_t
*manager
) {
464 char strbuf
[ISC_STRERRORSIZE
];
467 INSIST(total_threads
> 0);
468 REQUIRE(VALID_MANAGER(manager
));
470 * We need at least one
472 for (i
= 0; i
< total_threads
; i
++) {
473 manager
->hIOCPThreads
[i
] = CreateThread(NULL
, 0, SocketIoThread
,
475 &manager
->dwIOCPThreadIds
[i
]);
476 if (manager
->hIOCPThreads
[i
] == NULL
) {
477 errval
= GetLastError();
478 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
479 FATAL_ERROR(__FILE__
, __LINE__
,
480 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
482 "Can't create IOCP thread: %s"),
490 * Create/initialise the I/O completion port
493 iocompletionport_init(isc_socketmgr_t
*manager
) {
495 char strbuf
[ISC_STRERRORSIZE
];
497 REQUIRE(VALID_MANAGER(manager
));
499 * Create a private heap to handle the socket overlapped structure
500 * The minimum number of structures is 10, there is no maximum
502 hHeapHandle
= HeapCreate(0, 10 * sizeof(IoCompletionInfo
), 0);
503 if (hHeapHandle
== NULL
) {
504 errval
= GetLastError();
505 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
506 FATAL_ERROR(__FILE__
, __LINE__
,
507 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
509 "HeapCreate() failed during "
510 "initialization: %s"),
515 manager
->maxIOCPThreads
= min(isc_os_ncpus() + 1, MAX_IOCPTHREADS
);
517 /* Now Create the Completion Port */
518 manager
->hIoCompletionPort
= CreateIoCompletionPort(
519 INVALID_HANDLE_VALUE
, NULL
,
520 0, manager
->maxIOCPThreads
);
521 if (manager
->hIoCompletionPort
== NULL
) {
522 errval
= GetLastError();
523 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
524 FATAL_ERROR(__FILE__
, __LINE__
,
525 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
527 "CreateIoCompletionPort() failed "
528 "during initialization: %s"),
534 * Worker threads for servicing the I/O
536 iocompletionport_createthreads(manager
->maxIOCPThreads
, manager
);
540 * Associate a socket with an IO Completion Port. This allows us to queue events for it
541 * and have our worker pool of threads process them.
544 iocompletionport_update(isc_socket_t
*sock
) {
546 char strbuf
[ISC_STRERRORSIZE
];
548 REQUIRE(VALID_SOCKET(sock
));
550 hiocp
= CreateIoCompletionPort((HANDLE
)sock
->fd
,
551 sock
->manager
->hIoCompletionPort
, (ULONG_PTR
)sock
, 0);
554 DWORD errval
= GetLastError();
555 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
556 isc_log_iwrite(isc_lctx
,
557 ISC_LOGCATEGORY_GENERAL
,
558 ISC_LOGMODULE_SOCKET
, ISC_LOG_ERROR
,
559 isc_msgcat
, ISC_MSGSET_SOCKET
,
560 ISC_MSG_TOOMANYHANDLES
,
561 "iocompletionport_update: failed to open"
562 " io completion port: %s",
565 /* XXXMLG temporary hack to make failures detected.
566 * This function should return errors to the caller, not
569 FATAL_ERROR(__FILE__
, __LINE__
,
570 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
572 "CreateIoCompletionPort() failed "
573 "during initialization: %s"),
578 InterlockedIncrement(&sock
->manager
->iocp_total
);
582 * Routine to cleanup and then close the socket.
583 * Only close the socket here if it is NOT associated
584 * with an event, otherwise the WSAWaitForMultipleEvents
585 * may fail due to the fact that the Wait should not
586 * be running while closing an event or a socket.
587 * The socket is locked before calling this function
590 socket_close(isc_socket_t
*sock
) {
592 REQUIRE(sock
!= NULL
);
594 if (sock
->fd
!= INVALID_SOCKET
) {
595 closesocket(sock
->fd
);
596 sock
->fd
= INVALID_SOCKET
;
597 _set_state(sock
, SOCK_CLOSED
);
598 InterlockedDecrement(&sock
->manager
->totalSockets
);
602 static isc_once_t initialise_once
= ISC_ONCE_INIT
;
603 static isc_boolean_t initialised
= ISC_FALSE
;
607 WORD wVersionRequested
;
611 GUID GUIDConnectEx
= WSAID_CONNECTEX
;
612 GUID GUIDAcceptEx
= WSAID_ACCEPTEX
;
613 GUID GUIDGetAcceptExSockaddrs
= WSAID_GETACCEPTEXSOCKADDRS
;
616 /* Need Winsock 2.2 or better */
617 wVersionRequested
= MAKEWORD(2, 2);
619 err
= WSAStartup(wVersionRequested
, &wsaData
);
621 char strbuf
[ISC_STRERRORSIZE
];
622 isc__strerror(err
, strbuf
, sizeof(strbuf
));
623 FATAL_ERROR(__FILE__
, __LINE__
, "WSAStartup() %s: %s",
624 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
625 ISC_MSG_FAILED
, "failed"),
630 * The following APIs do not exist as functions in a library, but we must
631 * ask winsock for them. They are "extensions" -- but why they cannot be
632 * actual functions is beyond me. So, ask winsock for the pointers to the
635 sock
= socket(AF_INET
, SOCK_STREAM
, IPPROTO_TCP
);
636 INSIST(sock
!= INVALID_SOCKET
);
637 err
= WSAIoctl(sock
, SIO_GET_EXTENSION_FUNCTION_POINTER
,
638 &GUIDConnectEx
, sizeof(GUIDConnectEx
),
639 &ISCConnectEx
, sizeof(ISCConnectEx
),
640 &dwBytes
, NULL
, NULL
);
643 err
= WSAIoctl(sock
, SIO_GET_EXTENSION_FUNCTION_POINTER
,
644 &GUIDAcceptEx
, sizeof(GUIDAcceptEx
),
645 &ISCAcceptEx
, sizeof(ISCAcceptEx
),
646 &dwBytes
, NULL
, NULL
);
649 err
= WSAIoctl(sock
, SIO_GET_EXTENSION_FUNCTION_POINTER
,
650 &GUIDGetAcceptExSockaddrs
, sizeof(GUIDGetAcceptExSockaddrs
),
651 &ISCGetAcceptExSockaddrs
, sizeof(ISCGetAcceptExSockaddrs
),
652 &dwBytes
, NULL
, NULL
);
657 initialised
= ISC_TRUE
;
661 * Initialize socket services
665 RUNTIME_CHECK(isc_once_do(&initialise_once
,
666 initialise
) == ISC_R_SUCCESS
);
672 internal_sendmsg(isc_socket_t
*sock
, IoCompletionInfo
*lpo
,
673 struct msghdr
*messagehdr
, int flags
, int *Error
)
681 Result
= WSASendTo(sock
->fd
, messagehdr
->msg_iov
,
682 messagehdr
->msg_iovlen
, &BytesSent
,
683 Flags
, (SOCKADDR
*)&messagehdr
->to_addr
,
684 messagehdr
->to_addr_len
, (LPWSAOVERLAPPED
)lpo
,
687 total_sent
= (int)BytesSent
;
689 /* Check for errors.*/
690 if (Result
== SOCKET_ERROR
) {
691 *Error
= WSAGetLastError();
694 case WSA_IO_INCOMPLETE
:
695 case WSA_WAIT_IO_COMPLETION
:
697 case NO_ERROR
: /* Strange, but okay */
698 sock
->pending_iocp
++;
699 sock
->pending_send
++;
707 sock
->pending_iocp
++;
708 sock
->pending_send
++;
718 queue_receive_request(isc_socket_t
*sock
) {
724 isc_boolean_t need_recovering
= ISC_FALSE
;
726 IoCompletionInfo
*lpo
;
727 isc_result_t isc_result
;
730 * If we already have a receive pending, do nothing.
732 if (sock
->pending_recv
> 0)
736 * If no one is waiting, do nothing.
738 if (ISC_LIST_EMPTY(sock
->recv_list
))
741 INSIST(sock
->recvbuf
.remaining
== 0);
742 INSIST(sock
->fd
!= INVALID_SOCKET
);
744 iov
[0].len
= sock
->recvbuf
.len
;
745 iov
[0].buf
= sock
->recvbuf
.base
;
747 lpo
= (IoCompletionInfo
*)HeapAlloc(hHeapHandle
,
749 sizeof(IoCompletionInfo
));
750 RUNTIME_CHECK(lpo
!= NULL
);
751 lpo
->request_type
= SOCKET_RECV
;
753 sock
->recvbuf
.from_addr_len
= sizeof(sock
->recvbuf
.from_addr
);
756 Result
= WSARecvFrom((SOCKET
)sock
->fd
, iov
, 1,
758 (SOCKADDR
*)&sock
->recvbuf
.from_addr
,
759 &sock
->recvbuf
.from_addr_len
,
760 (LPWSAOVERLAPPED
)lpo
, NULL
);
762 /* Check for errors. */
763 if (Result
== SOCKET_ERROR
) {
764 Error
= WSAGetLastError();
768 sock
->pending_iocp
++;
769 sock
->pending_recv
++;
772 case ERROR_HOST_UNREACHABLE
:
773 if (sock
->type
== isc_sockettype_udp
) {
774 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
775 "WSARecvFrom ERROR_HOST_UNREACHABLE: trying to recover");
776 need_recovering
= ISC_TRUE
;
782 if (sock
->type
== isc_sockettype_udp
) {
783 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
784 "WSARecvFrom WSAENETRESET: trying to recover");
785 need_recovering
= ISC_TRUE
;
791 if (sock
->type
== isc_sockettype_udp
) {
792 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
793 "WSARecvFrom WSAECONNRESET: trying to recover");
794 need_recovering
= ISC_TRUE
;
801 isc_result
= isc__errno2result(Error
);
802 if ((isc_result
== ISC_R_UNEXPECTED
) ||
803 (isc_result
== ISC_R_CONNECTIONRESET
) ||
804 (isc_result
== ISC_R_HOSTUNREACH
))
805 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
806 "WSARecvFrom: Windows error code: %d, isc result %d",
808 send_recvdone_abort(sock
, isc_result
);
813 * The recv() finished immediately, but we will still get
814 * a completion event. Rather than duplicate code, let
815 * that thread handle sending the data along its way.
817 sock
->pending_iocp
++;
818 sock
->pending_recv
++;
819 sock
->in_recovery_cnt
= 0;
822 socket_log(__LINE__
, sock
, NULL
, IOEVENT
,
823 isc_msgcat
, ISC_MSGSET_SOCKET
,
825 "queue_io_request: fd %d result %d error %d",
826 sock
->fd
, Result
, Error
);
831 recover_receive_request(sock
, &lpo
);
835 * (placeholder) Hard recovery, doing nothing useful today
836 * (other than to avoid unlimited recursion).
839 hard_recover_receive_request(isc_socket_t
*sock
)
841 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
842 "can't recover fd %d sock %p",
844 send_recvdone_abort(sock
, ISC_R_UNEXPECTED
);
848 * Recovery from a Windows 2008 Server bug
849 * (WSARecvFrom() getting an ERROR_HOST_UNREACHABLE).
850 * Free the overlapped pointer and requeue a receive request.
853 recover_receive_request(isc_socket_t
*sock
, void **lplpo
)
856 HeapFree(hHeapHandle
, 0, *lplpo
);
859 /* limit recursion to 20 */
860 if (sock
->in_recovery_cnt
++ < 20)
861 queue_receive_request(sock
);
863 hard_recover_receive_request(sock
);
867 manager_log(isc_socketmgr_t
*sockmgr
, isc_logcategory_t
*category
,
868 isc_logmodule_t
*module
, int level
, const char *fmt
, ...)
873 if (!isc_log_wouldlog(isc_lctx
, level
))
877 vsnprintf(msgbuf
, sizeof(msgbuf
), fmt
, ap
);
880 isc_log_write(isc_lctx
, category
, module
, level
,
881 "sockmgr %p: %s", sockmgr
, msgbuf
);
885 socket_log(int lineno
, isc_socket_t
*sock
, isc_sockaddr_t
*address
,
886 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
887 isc_msgcat_t
*msgcat
, int msgset
, int message
,
888 const char *fmt
, ...)
895 if (!isc_log_wouldlog(isc_lctx
, level
))
899 vsnprintf(msgbuf
, sizeof(msgbuf
), fmt
, ap
);
902 if (address
== NULL
) {
903 isc_log_iwrite(isc_lctx
, category
, module
, level
,
904 msgcat
, msgset
, message
,
905 "socket %p line %d: %s", sock
, lineno
, msgbuf
);
907 isc_sockaddr_format(address
, peerbuf
, sizeof(peerbuf
));
908 isc_log_iwrite(isc_lctx
, category
, module
, level
,
909 msgcat
, msgset
, message
,
910 "socket %p line %d peer %s: %s", sock
, lineno
,
917 * Make an fd SOCKET non-blocking.
920 make_nonblock(SOCKET fd
) {
922 unsigned long flags
= 1;
923 char strbuf
[ISC_STRERRORSIZE
];
925 /* Set the socket to non-blocking */
926 ret
= ioctlsocket(fd
, FIONBIO
, &flags
);
929 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
930 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
931 "ioctlsocket(%d, FIOBIO, %d): %s",
934 return (ISC_R_UNEXPECTED
);
937 return (ISC_R_SUCCESS
);
941 * Windows 2000 systems incorrectly cause UDP sockets using WSARecvFrom
942 * to not work correctly, returning a WSACONNRESET error when a WSASendTo
943 * fails with an "ICMP port unreachable" response and preventing the
944 * socket from using the WSARecvFrom in subsequent operations.
945 * The function below fixes this, but requires that Windows 2000
946 * Service Pack 2 or later be installed on the system. NT 4.0
947 * systems are not affected by this and work correctly.
948 * See Microsoft Knowledge Base Article Q263823 for details of this.
951 connection_reset_fix(SOCKET fd
) {
952 DWORD dwBytesReturned
= 0;
953 BOOL bNewBehavior
= FALSE
;
956 if (isc_win32os_majorversion() < 5)
957 return (ISC_R_SUCCESS
); /* NT 4.0 has no problem */
959 /* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */
960 status
= WSAIoctl(fd
, SIO_UDP_CONNRESET
, &bNewBehavior
,
961 sizeof(bNewBehavior
), NULL
, 0,
962 &dwBytesReturned
, NULL
, NULL
);
963 if (status
!= SOCKET_ERROR
)
964 return (ISC_R_SUCCESS
);
966 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
967 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s",
968 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
969 ISC_MSG_FAILED
, "failed"));
970 return (ISC_R_UNEXPECTED
);
975 * Construct an iov array and attach it to the msghdr passed in. This is
976 * the SEND constructor, which will use the used region of the buffer
977 * (if using a buffer list) or will use the internal region (if a single
978 * buffer I/O is requested).
980 * Nothing can be NULL, and the done event must list at least one buffer
981 * on the buffer linked list for this function to be meaningful.
984 build_msghdr_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
,
985 struct msghdr
*msg
, char *cmsg
, WSABUF
*iov
,
986 IoCompletionInfo
*lpo
)
988 unsigned int iovcount
;
989 isc_buffer_t
*buffer
;
995 memset(msg
, 0, sizeof(*msg
));
997 memcpy(&msg
->to_addr
, &dev
->address
.type
, dev
->address
.length
);
998 msg
->to_addr_len
= dev
->address
.length
;
1000 buffer
= ISC_LIST_HEAD(dev
->bufferlist
);
1005 * Single buffer I/O? Skip what we've done so far in this region.
1007 if (buffer
== NULL
) {
1008 write_count
= dev
->region
.length
- dev
->n
;
1009 cpbuffer
= HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
, sizeof(buflist_t
));
1010 RUNTIME_CHECK(cpbuffer
!= NULL
);
1011 cpbuffer
->buf
= HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
, write_count
);
1012 RUNTIME_CHECK(cpbuffer
->buf
!= NULL
);
1014 socket_log(__LINE__
, sock
, NULL
, TRACE
,
1015 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
1016 "alloc_buffer %p %d %p %d", cpbuffer
, sizeof(buflist_t
),
1017 cpbuffer
->buf
, write_count
);
1019 memcpy(cpbuffer
->buf
,(dev
->region
.base
+ dev
->n
), write_count
);
1020 cpbuffer
->buflen
= write_count
;
1021 ISC_LIST_ENQUEUE(lpo
->bufferlist
, cpbuffer
, link
);
1022 iov
[0].buf
= cpbuffer
->buf
;
1023 iov
[0].len
= write_count
;
1031 * Skip the data in the buffer list that we have already written.
1033 skip_count
= dev
->n
;
1034 while (buffer
!= NULL
) {
1035 REQUIRE(ISC_BUFFER_VALID(buffer
));
1036 if (skip_count
< isc_buffer_usedlength(buffer
))
1038 skip_count
-= isc_buffer_usedlength(buffer
);
1039 buffer
= ISC_LIST_NEXT(buffer
, link
);
1042 while (buffer
!= NULL
) {
1043 INSIST(iovcount
< MAXSCATTERGATHER_SEND
);
1045 isc_buffer_usedregion(buffer
, &used
);
1047 if (used
.length
> 0) {
1048 int uselen
= used
.length
- skip_count
;
1049 cpbuffer
= HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
, sizeof(buflist_t
));
1050 RUNTIME_CHECK(cpbuffer
!= NULL
);
1051 cpbuffer
->buf
= HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
, uselen
);
1052 RUNTIME_CHECK(cpbuffer
->buf
!= NULL
);
1054 socket_log(__LINE__
, sock
, NULL
, TRACE
,
1055 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
1056 "alloc_buffer %p %d %p %d", cpbuffer
, sizeof(buflist_t
),
1057 cpbuffer
->buf
, write_count
);
1059 memcpy(cpbuffer
->buf
,(used
.base
+ skip_count
), uselen
);
1060 cpbuffer
->buflen
= uselen
;
1061 iov
[iovcount
].buf
= cpbuffer
->buf
;
1062 iov
[iovcount
].len
= used
.length
- skip_count
;
1063 write_count
+= uselen
;
1067 buffer
= ISC_LIST_NEXT(buffer
, link
);
1070 INSIST(skip_count
== 0);
1074 msg
->msg_iovlen
= iovcount
;
1075 msg
->msg_totallen
= write_count
;
1079 set_dev_address(isc_sockaddr_t
*address
, isc_socket_t
*sock
,
1080 isc_socketevent_t
*dev
)
1082 if (sock
->type
== isc_sockettype_udp
) {
1083 if (address
!= NULL
)
1084 dev
->address
= *address
;
1086 dev
->address
= sock
->address
;
1087 } else if (sock
->type
== isc_sockettype_tcp
) {
1088 INSIST(address
== NULL
);
1089 dev
->address
= sock
->address
;
1094 destroy_socketevent(isc_event_t
*event
) {
1095 isc_socketevent_t
*ev
= (isc_socketevent_t
*)event
;
1097 INSIST(ISC_LIST_EMPTY(ev
->bufferlist
));
1099 (ev
->destroy
)(event
);
1102 static isc_socketevent_t
*
1103 allocate_socketevent(isc_socket_t
*sock
, isc_eventtype_t eventtype
,
1104 isc_taskaction_t action
, const void *arg
)
1106 isc_socketevent_t
*ev
;
1108 ev
= (isc_socketevent_t
*)isc_event_allocate(sock
->manager
->mctx
,
1115 ev
->result
= ISC_R_IOERROR
; // XXXMLG temporary change to detect failure to set
1116 ISC_LINK_INIT(ev
, ev_link
);
1117 ISC_LIST_INIT(ev
->bufferlist
);
1118 ev
->region
.base
= NULL
;
1122 ev
->destroy
= ev
->ev_destroy
;
1123 ev
->ev_destroy
= destroy_socketevent
;
1128 #if defined(ISC_SOCKET_DEBUG)
1130 dump_msg(struct msghdr
*msg
, isc_socket_t
*sock
) {
1133 printf("MSGHDR %p, Socket #: %u\n", msg
, sock
->fd
);
1134 printf("\tname %p, namelen %d\n", msg
->msg_name
, msg
->msg_namelen
);
1135 printf("\tiov %p, iovlen %d\n", msg
->msg_iov
, msg
->msg_iovlen
);
1136 for (i
= 0; i
< (unsigned int)msg
->msg_iovlen
; i
++)
1137 printf("\t\t%d\tbase %p, len %d\n", i
,
1138 msg
->msg_iov
[i
].buf
,
1139 msg
->msg_iov
[i
].len
);
1144 * map the error code
1147 map_socket_error(isc_socket_t
*sock
, int windows_errno
, int *isc_errno
,
1148 char *errorstring
, size_t bufsize
) {
1151 switch (windows_errno
) {
1152 case WSAECONNREFUSED
:
1153 *isc_errno
= ISC_R_CONNREFUSED
;
1154 if (sock
->connected
)
1155 doreturn
= DOIO_HARD
;
1157 doreturn
= DOIO_SOFT
;
1159 case WSAENETUNREACH
:
1160 case ERROR_NETWORK_UNREACHABLE
:
1161 *isc_errno
= ISC_R_NETUNREACH
;
1162 if (sock
->connected
)
1163 doreturn
= DOIO_HARD
;
1165 doreturn
= DOIO_SOFT
;
1167 case ERROR_PORT_UNREACHABLE
:
1168 case ERROR_HOST_UNREACHABLE
:
1169 case WSAEHOSTUNREACH
:
1170 *isc_errno
= ISC_R_HOSTUNREACH
;
1171 if (sock
->connected
)
1172 doreturn
= DOIO_HARD
;
1174 doreturn
= DOIO_SOFT
;
1177 *isc_errno
= ISC_R_NETDOWN
;
1178 if (sock
->connected
)
1179 doreturn
= DOIO_HARD
;
1181 doreturn
= DOIO_SOFT
;
1184 *isc_errno
= ISC_R_HOSTDOWN
;
1185 if (sock
->connected
)
1186 doreturn
= DOIO_HARD
;
1188 doreturn
= DOIO_SOFT
;
1191 *isc_errno
= ISC_R_NOPERM
;
1192 if (sock
->connected
)
1193 doreturn
= DOIO_HARD
;
1195 doreturn
= DOIO_SOFT
;
1199 case WSAECONNABORTED
:
1201 *isc_errno
= ISC_R_CONNECTIONRESET
;
1202 if (sock
->connected
)
1203 doreturn
= DOIO_HARD
;
1205 doreturn
= DOIO_SOFT
;
1208 *isc_errno
= ISC_R_NOTCONNECTED
;
1209 if (sock
->connected
)
1210 doreturn
= DOIO_HARD
;
1212 doreturn
= DOIO_SOFT
;
1214 case ERROR_OPERATION_ABORTED
:
1215 case ERROR_CONNECTION_ABORTED
:
1216 case ERROR_REQUEST_ABORTED
:
1217 *isc_errno
= ISC_R_CONNECTIONRESET
;
1218 doreturn
= DOIO_HARD
;
1221 *isc_errno
= ISC_R_NORESOURCES
;
1222 doreturn
= DOIO_HARD
;
1224 case WSAEAFNOSUPPORT
:
1225 *isc_errno
= ISC_R_FAMILYNOSUPPORT
;
1226 doreturn
= DOIO_HARD
;
1228 case WSAEADDRNOTAVAIL
:
1229 *isc_errno
= ISC_R_ADDRNOTAVAIL
;
1230 doreturn
= DOIO_HARD
;
1232 case WSAEDESTADDRREQ
:
1233 *isc_errno
= ISC_R_BADADDRESSFORM
;
1234 doreturn
= DOIO_HARD
;
1236 case ERROR_NETNAME_DELETED
:
1237 *isc_errno
= ISC_R_NETDOWN
;
1238 doreturn
= DOIO_HARD
;
1241 *isc_errno
= ISC_R_IOERROR
;
1242 doreturn
= DOIO_HARD
;
1245 if (doreturn
== DOIO_HARD
) {
1246 isc__strerror(windows_errno
, errorstring
, bufsize
);
1252 fill_recv(isc_socket_t
*sock
, isc_socketevent_t
*dev
) {
1255 isc_buffer_t
*buffer
;
1257 INSIST(dev
->n
< dev
->minimum
);
1258 INSIST(sock
->recvbuf
.remaining
> 0);
1259 INSIST(sock
->pending_recv
== 0);
1261 if (sock
->type
== isc_sockettype_udp
) {
1262 dev
->address
.length
= sock
->recvbuf
.from_addr_len
;
1263 memcpy(&dev
->address
.type
, &sock
->recvbuf
.from_addr
,
1264 sock
->recvbuf
.from_addr_len
);
1265 if (isc_sockaddr_getport(&dev
->address
) == 0) {
1266 if (isc_log_wouldlog(isc_lctx
, IOEVENT_LEVEL
)) {
1267 socket_log(__LINE__
, sock
, &dev
->address
, IOEVENT
,
1268 isc_msgcat
, ISC_MSGSET_SOCKET
,
1270 "dropping source port zero packet");
1272 sock
->recvbuf
.remaining
= 0;
1275 } else if (sock
->type
== isc_sockettype_tcp
) {
1276 dev
->address
= sock
->address
;
1280 * Run through the list of buffers we were given, and find the
1281 * first one with space. Once it is found, loop through, filling
1282 * the buffers as much as possible.
1284 buffer
= ISC_LIST_HEAD(dev
->bufferlist
);
1285 if (buffer
!= NULL
) { // Multi-buffer receive
1286 while (buffer
!= NULL
&& sock
->recvbuf
.remaining
> 0) {
1287 REQUIRE(ISC_BUFFER_VALID(buffer
));
1288 if (isc_buffer_availablelength(buffer
) > 0) {
1289 isc_buffer_availableregion(buffer
, &r
);
1290 copylen
= min(r
.length
, sock
->recvbuf
.remaining
);
1291 memcpy(r
.base
, sock
->recvbuf
.consume_position
, copylen
);
1292 sock
->recvbuf
.consume_position
+= copylen
;
1293 sock
->recvbuf
.remaining
-= copylen
;
1294 isc_buffer_add(buffer
, copylen
);
1297 buffer
= ISC_LIST_NEXT(buffer
, link
);
1299 } else { // Single-buffer receive
1300 copylen
= min(dev
->region
.length
- dev
->n
, sock
->recvbuf
.remaining
);
1301 memcpy(dev
->region
.base
+ dev
->n
, sock
->recvbuf
.consume_position
, copylen
);
1302 sock
->recvbuf
.consume_position
+= copylen
;
1303 sock
->recvbuf
.remaining
-= copylen
;
1308 * UDP receives are all-consuming. That is, if we have 4k worth of
1309 * data in our receive buffer, and the caller only gave us
1310 * 1k of space, we will toss the remaining 3k of data. TCP
1311 * will keep the extra data around and use it for later requests.
1313 if (sock
->type
== isc_sockettype_udp
)
1314 sock
->recvbuf
.remaining
= 0;
1318 * Copy out as much data from the internal buffer to done events.
1319 * As each done event is filled, send it along its way.
1322 completeio_recv(isc_socket_t
*sock
)
1324 isc_socketevent_t
*dev
;
1327 * If we are in the process of filling our buffer, we cannot
1328 * touch it yet, so don't.
1330 if (sock
->pending_recv
> 0)
1333 while (sock
->recvbuf
.remaining
> 0 && !ISC_LIST_EMPTY(sock
->recv_list
)) {
1334 dev
= ISC_LIST_HEAD(sock
->recv_list
);
1337 * See if we have sufficient data in our receive buffer
1338 * to handle this. If we do, copy out the data.
1340 fill_recv(sock
, dev
);
1343 * Did we satisfy it?
1345 if (dev
->n
>= dev
->minimum
) {
1346 dev
->result
= ISC_R_SUCCESS
;
1347 send_recvdone_event(sock
, &dev
);
1354 * DOIO_SUCCESS The operation succeeded. dev->result contains
1357 * DOIO_HARD A hard or unexpected I/O error was encountered.
1358 * dev->result contains the appropriate error.
1360 * DOIO_SOFT A soft I/O error was encountered. No senddone
1361 * event was sent. The operation should be retried.
1363 * No other return values are possible.
1366 completeio_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
,
1367 struct msghdr
*messagehdr
, int cc
, int send_errno
)
1369 char addrbuf
[ISC_SOCKADDR_FORMATSIZE
];
1370 char strbuf
[ISC_STRERRORSIZE
];
1372 if (send_errno
!= 0) {
1373 if (SOFT_ERROR(send_errno
))
1376 return (map_socket_error(sock
, send_errno
, &dev
->result
,
1377 strbuf
, sizeof(strbuf
)));
1380 * The other error types depend on whether or not the
1381 * socket is UDP or TCP. If it is UDP, some errors
1382 * that we expect to be fatal under TCP are merely
1383 * annoying, and are really soft errors.
1385 * However, these soft errors are still returned as
1388 isc_sockaddr_format(&dev
->address
, addrbuf
, sizeof(addrbuf
));
1389 isc__strerror(send_errno
, strbuf
, sizeof(strbuf
));
1390 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "completeio_send: %s: %s",
1392 dev
->result
= isc__errno2result(send_errno
);
1397 * If we write less than we expected, update counters, poke.
1400 if (cc
!= messagehdr
->msg_totallen
)
1404 * Exactly what we wanted to write. We're done with this
1405 * entry. Post its completion event.
1407 dev
->result
= ISC_R_SUCCESS
;
1408 return (DOIO_SUCCESS
);
1412 startio_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
, int *nbytes
,
1416 char strbuf
[ISC_STRERRORSIZE
];
1417 IoCompletionInfo
*lpo
;
1419 struct msghdr
*msghdr
;
1421 lpo
= (IoCompletionInfo
*)HeapAlloc(hHeapHandle
,
1423 sizeof(IoCompletionInfo
));
1424 RUNTIME_CHECK(lpo
!= NULL
);
1425 lpo
->request_type
= SOCKET_SEND
;
1427 msghdr
= &lpo
->messagehdr
;
1428 memset(msghdr
, 0, sizeof(struct msghdr
));
1429 ISC_LIST_INIT(lpo
->bufferlist
);
1431 build_msghdr_send(sock
, dev
, msghdr
, cmsg
, sock
->iov
, lpo
);
1433 *nbytes
= internal_sendmsg(sock
, lpo
, msghdr
, 0, send_errno
);
1437 * I/O has been initiated
1438 * completion will be through the completion port
1440 if (PENDING_ERROR(*send_errno
)) {
1441 status
= DOIO_PENDING
;
1445 if (SOFT_ERROR(*send_errno
)) {
1451 * If we got this far then something is wrong
1453 if (isc_log_wouldlog(isc_lctx
, IOEVENT_LEVEL
)) {
1454 isc__strerror(*send_errno
, strbuf
, sizeof(strbuf
));
1455 socket_log(__LINE__
, sock
, NULL
, IOEVENT
,
1456 isc_msgcat
, ISC_MSGSET_SOCKET
,
1457 ISC_MSG_INTERNALSEND
,
1458 "startio_send: internal_sendmsg(%d) %d "
1460 sock
->fd
, *nbytes
, *send_errno
, strbuf
);
1465 dev
->result
= ISC_R_SUCCESS
;
1468 _set_state(sock
, SOCK_DATA
);
1473 allocate_socket(isc_socketmgr_t
*manager
, isc_sockettype_t type
,
1474 isc_socket_t
**socketp
) {
1476 isc_result_t result
;
1478 sock
= isc_mem_get(manager
->mctx
, sizeof(*sock
));
1481 return (ISC_R_NOMEMORY
);
1484 sock
->references
= 0;
1486 sock
->manager
= manager
;
1488 sock
->fd
= INVALID_SOCKET
;
1490 ISC_LINK_INIT(sock
, link
);
1493 * set up list of readers and writers to be initially empty
1495 ISC_LIST_INIT(sock
->recv_list
);
1496 ISC_LIST_INIT(sock
->send_list
);
1497 ISC_LIST_INIT(sock
->accept_list
);
1498 sock
->connect_ev
= NULL
;
1499 sock
->pending_accept
= 0;
1500 sock
->pending_recv
= 0;
1501 sock
->pending_send
= 0;
1502 sock
->pending_iocp
= 0;
1504 sock
->connected
= 0;
1505 sock
->pending_connect
= 0;
1507 sock
->in_recovery_cnt
= 0;
1508 memset(sock
->name
, 0, sizeof(sock
->name
)); // zero the name field
1509 _set_state(sock
, SOCK_INITIALIZED
);
1511 sock
->recvbuf
.len
= 65536;
1512 sock
->recvbuf
.consume_position
= sock
->recvbuf
.base
;
1513 sock
->recvbuf
.remaining
= 0;
1514 sock
->recvbuf
.base
= isc_mem_get(manager
->mctx
, sock
->recvbuf
.len
); // max buffer size
1515 if (sock
->recvbuf
.base
== NULL
) {
1521 * initialize the lock
1523 result
= isc_mutex_init(&sock
->lock
);
1524 if (result
!= ISC_R_SUCCESS
) {
1526 isc_mem_put(manager
->mctx
, sock
->recvbuf
.base
, sock
->recvbuf
.len
);
1527 sock
->recvbuf
.base
= NULL
;
1531 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
1534 sock
->magic
= SOCKET_MAGIC
;
1537 return (ISC_R_SUCCESS
);
1540 isc_mem_put(manager
->mctx
, sock
, sizeof(*sock
));
1546 * Verify that the socket state is consistent.
1549 consistent(isc_socket_t
*sock
) {
1551 isc_socketevent_t
*dev
;
1552 isc_socket_newconnev_t
*nev
;
1555 isc_boolean_t crash
= ISC_FALSE
;
1557 REQUIRE(sock
->pending_iocp
== sock
->pending_recv
+ sock
->pending_send
1558 + sock
->pending_accept
+ sock
->pending_connect
);
1560 dev
= ISC_LIST_HEAD(sock
->send_list
);
1562 while (dev
!= NULL
) {
1564 dev
= ISC_LIST_NEXT(dev
, ev_link
);
1566 if (count
> sock
->pending_send
) {
1568 crash_reason
= "send_list > sock->pending_send";
1571 nev
= ISC_LIST_HEAD(sock
->accept_list
);
1573 while (nev
!= NULL
) {
1575 nev
= ISC_LIST_NEXT(nev
, ev_link
);
1577 if (count
> sock
->pending_accept
) {
1579 crash_reason
= "send_list > sock->pending_send";
1583 socket_log(__LINE__
, sock
, NULL
, CREATION
, isc_msgcat
, ISC_MSGSET_SOCKET
,
1584 ISC_MSG_DESTROYING
, "SOCKET INCONSISTENT: %s",
1587 INSIST(crash
== ISC_FALSE
);
1592 * Maybe free the socket.
1594 * This function will verify tht the socket is no longer in use in any way,
1595 * either internally or externally. This is the only place where this
1596 * check is to be made; if some bit of code believes that IT is done with
1597 * the socket (e.g., some reference counter reaches zero), it should call
1600 * When calling this function, the socket must be locked, and the manager
1603 * When this function returns, *socketp will be NULL. No tricks to try
1604 * to hold on to this pointer are allowed.
1607 maybe_free_socket(isc_socket_t
**socketp
, int lineno
) {
1608 isc_socket_t
*sock
= *socketp
;
1611 INSIST(VALID_SOCKET(sock
));
1614 if (sock
->pending_iocp
> 0
1615 || sock
->pending_recv
> 0
1616 || sock
->pending_send
> 0
1617 || sock
->pending_accept
> 0
1618 || sock
->references
> 0
1619 || sock
->pending_connect
== 1
1620 || !ISC_LIST_EMPTY(sock
->recv_list
)
1621 || !ISC_LIST_EMPTY(sock
->send_list
)
1622 || !ISC_LIST_EMPTY(sock
->accept_list
)
1623 || sock
->fd
!= INVALID_SOCKET
) {
1624 UNLOCK(&sock
->lock
);
1627 UNLOCK(&sock
->lock
);
1629 free_socket(&sock
, lineno
);
1633 free_socket(isc_socket_t
**sockp
, int lineno
) {
1634 isc_socketmgr_t
*manager
;
1635 isc_socket_t
*sock
= *sockp
;
1638 manager
= sock
->manager
;
1641 * Seems we can free the socket after all.
1643 manager
= sock
->manager
;
1644 socket_log(__LINE__
, sock
, NULL
, CREATION
, isc_msgcat
, ISC_MSGSET_SOCKET
,
1645 ISC_MSG_DESTROYING
, "freeing socket line %d fd %d lock %p semaphore %p",
1646 lineno
, sock
->fd
, &sock
->lock
, sock
->lock
.LockSemaphore
);
1649 DESTROYLOCK(&sock
->lock
);
1651 if (sock
->recvbuf
.base
!= NULL
)
1652 isc_mem_put(manager
->mctx
, sock
->recvbuf
.base
, sock
->recvbuf
.len
);
1654 LOCK(&manager
->lock
);
1655 if (ISC_LINK_LINKED(sock
, link
))
1656 ISC_LIST_UNLINK(manager
->socklist
, sock
, link
);
1657 isc_mem_put(manager
->mctx
, sock
, sizeof(*sock
));
1659 if (ISC_LIST_EMPTY(manager
->socklist
))
1660 SIGNAL(&manager
->shutdown_ok
);
1661 UNLOCK(&manager
->lock
);
1665 * Create a new 'type' socket managed by 'manager'. Events
1666 * will be posted to 'task' and when dispatched 'action' will be
1667 * called with 'arg' as the arg value. The new socket is returned
1671 isc__socket_create(isc_socketmgr_t
*manager
, int pf
, isc_sockettype_t type
,
1672 isc_socket_t
**socketp
) {
1673 isc_socket_t
*sock
= NULL
;
1674 isc_result_t result
;
1675 #if defined(USE_CMSG)
1678 #if defined(SO_RCVBUF)
1679 ISC_SOCKADDR_LEN_T optlen
;
1683 char strbuf
[ISC_STRERRORSIZE
];
1685 REQUIRE(VALID_MANAGER(manager
));
1686 REQUIRE(socketp
!= NULL
&& *socketp
== NULL
);
1687 REQUIRE(type
!= isc_sockettype_fdwatch
);
1689 result
= allocate_socket(manager
, type
, &sock
);
1690 if (result
!= ISC_R_SUCCESS
)
1695 case isc_sockettype_udp
:
1696 sock
->fd
= socket(pf
, SOCK_DGRAM
, IPPROTO_UDP
);
1697 if (sock
->fd
!= INVALID_SOCKET
) {
1698 result
= connection_reset_fix(sock
->fd
);
1699 if (result
!= ISC_R_SUCCESS
) {
1700 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
1701 "closed %d %d %d con_reset_fix_failed",
1702 sock
->pending_recv
, sock
->pending_send
,
1704 closesocket(sock
->fd
);
1705 _set_state(sock
, SOCK_CLOSED
);
1706 sock
->fd
= INVALID_SOCKET
;
1707 free_socket(&sock
, __LINE__
);
1712 case isc_sockettype_tcp
:
1713 sock
->fd
= socket(pf
, SOCK_STREAM
, IPPROTO_TCP
);
1717 if (sock
->fd
== INVALID_SOCKET
) {
1718 socket_errno
= WSAGetLastError();
1719 free_socket(&sock
, __LINE__
);
1721 switch (socket_errno
) {
1724 return (ISC_R_NORESOURCES
);
1726 case WSAEPROTONOSUPPORT
:
1727 case WSAEPFNOSUPPORT
:
1728 case WSAEAFNOSUPPORT
:
1729 return (ISC_R_FAMILYNOSUPPORT
);
1732 isc__strerror(socket_errno
, strbuf
, sizeof(strbuf
));
1733 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1735 isc_msgcat_get(isc_msgcat
,
1740 return (ISC_R_UNEXPECTED
);
1744 result
= make_nonblock(sock
->fd
);
1745 if (result
!= ISC_R_SUCCESS
) {
1746 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
1747 "closed %d %d %d make_nonblock_failed",
1748 sock
->pending_recv
, sock
->pending_send
,
1750 closesocket(sock
->fd
);
1751 sock
->fd
= INVALID_SOCKET
;
1752 free_socket(&sock
, __LINE__
);
1757 #if defined(USE_CMSG) || defined(SO_RCVBUF)
1758 if (type
== isc_sockettype_udp
) {
1760 #if defined(USE_CMSG)
1761 #if defined(ISC_PLATFORM_HAVEIPV6)
1762 #ifdef IPV6_RECVPKTINFO
1764 if ((pf
== AF_INET6
)
1765 && (setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
,
1766 (char *)&on
, sizeof(on
)) < 0)) {
1767 isc__strerror(WSAGetLastError(), strbuf
, sizeof(strbuf
));
1768 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1769 "setsockopt(%d, IPV6_RECVPKTINFO) "
1771 isc_msgcat_get(isc_msgcat
,
1779 if ((pf
== AF_INET6
)
1780 && (setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_PKTINFO
,
1781 (char *)&on
, sizeof(on
)) < 0)) {
1782 isc__strerror(WSAGetLastError(), strbuf
, sizeof(strbuf
));
1783 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1784 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1786 isc_msgcat_get(isc_msgcat
,
1792 #endif /* IPV6_RECVPKTINFO */
1793 #ifdef IPV6_USE_MIN_MTU /*2292bis, not too common yet*/
1794 /* use minimum MTU */
1795 if (pf
== AF_INET6
) {
1796 (void)setsockopt(sock
->fd
, IPPROTO_IPV6
,
1798 (char *)&on
, sizeof(on
));
1801 #endif /* ISC_PLATFORM_HAVEIPV6 */
1802 #endif /* defined(USE_CMSG) */
1804 #if defined(SO_RCVBUF)
1805 optlen
= sizeof(size
);
1806 if (getsockopt(sock
->fd
, SOL_SOCKET
, SO_RCVBUF
,
1807 (char *)&size
, &optlen
) >= 0 &&
1808 size
< RCVBUFSIZE
) {
1810 (void)setsockopt(sock
->fd
, SOL_SOCKET
, SO_RCVBUF
,
1811 (char *)&size
, sizeof(size
));
1816 #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */
1818 _set_state(sock
, SOCK_OPEN
);
1819 sock
->references
= 1;
1822 iocompletionport_update(sock
);
1825 * Note we don't have to lock the socket like we normally would because
1826 * there are no external references to it yet.
1828 LOCK(&manager
->lock
);
1829 ISC_LIST_APPEND(manager
->socklist
, sock
, link
);
1830 InterlockedIncrement(&manager
->totalSockets
);
1831 UNLOCK(&manager
->lock
);
1833 socket_log(__LINE__
, sock
, NULL
, CREATION
, isc_msgcat
, ISC_MSGSET_SOCKET
,
1834 ISC_MSG_CREATED
, "created %u type %u", sock
->fd
, type
);
1836 return (ISC_R_SUCCESS
);
1840 isc_socket_open(isc_socket_t
*sock
) {
1841 REQUIRE(VALID_SOCKET(sock
));
1842 REQUIRE(sock
->type
!= isc_sockettype_fdwatch
);
1844 return (ISC_R_NOTIMPLEMENTED
);
1848 * Attach to a socket. Caller must explicitly detach when it is done.
1851 isc__socket_attach(isc_socket_t
*sock
, isc_socket_t
**socketp
) {
1852 REQUIRE(VALID_SOCKET(sock
));
1853 REQUIRE(socketp
!= NULL
&& *socketp
== NULL
);
1858 UNLOCK(&sock
->lock
);
1864 * Dereference a socket. If this is the last reference to it, clean things
1865 * up by destroying the socket.
1868 isc__socket_detach(isc_socket_t
**socketp
) {
1870 isc_boolean_t kill_socket
= ISC_FALSE
;
1872 REQUIRE(socketp
!= NULL
);
1874 REQUIRE(VALID_SOCKET(sock
));
1875 REQUIRE(sock
->type
!= isc_sockettype_fdwatch
);
1879 REQUIRE(sock
->references
> 0);
1882 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
1883 "detach_socket %d %d %d",
1884 sock
->pending_recv
, sock
->pending_send
,
1887 if (sock
->references
== 0 && sock
->fd
!= INVALID_SOCKET
) {
1888 closesocket(sock
->fd
);
1889 sock
->fd
= INVALID_SOCKET
;
1890 _set_state(sock
, SOCK_CLOSED
);
1893 maybe_free_socket(&sock
, __LINE__
);
1899 isc_socket_close(isc_socket_t
*sock
) {
1900 REQUIRE(VALID_SOCKET(sock
));
1901 REQUIRE(sock
->type
!= isc_sockettype_fdwatch
);
1903 return (ISC_R_NOTIMPLEMENTED
);
1907 * Dequeue an item off the given socket's read queue, set the result code
1908 * in the done event to the one provided, and send it to the task it was
1911 * If the event to be sent is on a list, remove it before sending. If
1912 * asked to, send and detach from the task as well.
1914 * Caller must have the socket locked if the event is attached to the socket.
1917 send_recvdone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
) {
1920 task
= (*dev
)->ev_sender
;
1921 (*dev
)->ev_sender
= sock
;
1923 if (ISC_LINK_LINKED(*dev
, ev_link
))
1924 ISC_LIST_DEQUEUE(sock
->recv_list
, *dev
, ev_link
);
1926 if (((*dev
)->attributes
& ISC_SOCKEVENTATTR_ATTACHED
)
1927 == ISC_SOCKEVENTATTR_ATTACHED
)
1928 isc_task_sendanddetach(&task
, (isc_event_t
**)dev
);
1930 isc_task_send(task
, (isc_event_t
**)dev
);
1936 * See comments for send_recvdone_event() above.
1939 send_senddone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
) {
1942 INSIST(dev
!= NULL
&& *dev
!= NULL
);
1944 task
= (*dev
)->ev_sender
;
1945 (*dev
)->ev_sender
= sock
;
1947 if (ISC_LINK_LINKED(*dev
, ev_link
))
1948 ISC_LIST_DEQUEUE(sock
->send_list
, *dev
, ev_link
);
1950 if (((*dev
)->attributes
& ISC_SOCKEVENTATTR_ATTACHED
)
1951 == ISC_SOCKEVENTATTR_ATTACHED
)
1952 isc_task_sendanddetach(&task
, (isc_event_t
**)dev
);
1954 isc_task_send(task
, (isc_event_t
**)dev
);
1960 * See comments for send_recvdone_event() above.
1963 send_acceptdone_event(isc_socket_t
*sock
, isc_socket_newconnev_t
**adev
) {
1966 INSIST(adev
!= NULL
&& *adev
!= NULL
);
1968 task
= (*adev
)->ev_sender
;
1969 (*adev
)->ev_sender
= sock
;
1971 if (ISC_LINK_LINKED(*adev
, ev_link
))
1972 ISC_LIST_DEQUEUE(sock
->accept_list
, *adev
, ev_link
);
1974 isc_task_sendanddetach(&task
, (isc_event_t
**)adev
);
1980 * See comments for send_recvdone_event() above.
1983 send_connectdone_event(isc_socket_t
*sock
, isc_socket_connev_t
**cdev
) {
1986 INSIST(cdev
!= NULL
&& *cdev
!= NULL
);
1988 task
= (*cdev
)->ev_sender
;
1989 (*cdev
)->ev_sender
= sock
;
1991 sock
->connect_ev
= NULL
;
1993 isc_task_sendanddetach(&task
, (isc_event_t
**)cdev
);
1999 * On entry to this function, the event delivered is the internal
2000 * readable event, and the first item on the accept_list should be
2001 * the done event we want to send. If the list is empty, this is a no-op,
2002 * so just close the new connection, unlock, and return.
2004 * Note the socket is locked before entering here
2007 internal_accept(isc_socket_t
*sock
, IoCompletionInfo
*lpo
, int accept_errno
) {
2008 isc_socket_newconnev_t
*adev
;
2009 isc_result_t result
= ISC_R_SUCCESS
;
2010 isc_socket_t
*nsock
;
2011 struct sockaddr
*localaddr
;
2012 int localaddr_len
= sizeof(*localaddr
);
2013 struct sockaddr
*remoteaddr
;
2014 int remoteaddr_len
= sizeof(*remoteaddr
);
2016 INSIST(VALID_SOCKET(sock
));
2020 socket_log(__LINE__
, sock
, NULL
, TRACE
,
2021 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
2022 "internal_accept called");
2024 INSIST(sock
->listener
);
2026 INSIST(sock
->pending_iocp
> 0);
2027 sock
->pending_iocp
--;
2028 INSIST(sock
->pending_accept
> 0);
2029 sock
->pending_accept
--;
2034 * If the event is no longer in the list we can just return.
2036 if (!acceptdone_is_active(sock
, adev
))
2039 nsock
= adev
->newsocket
;
2042 * Pull off the done event.
2044 ISC_LIST_UNLINK(sock
->accept_list
, adev
, ev_link
);
2047 * Extract the addresses from the socket, copy them into the structure,
2048 * and return the new socket.
2050 ISCGetAcceptExSockaddrs(lpo
->acceptbuffer
, 0,
2051 sizeof(SOCKADDR_STORAGE
) + 16, sizeof(SOCKADDR_STORAGE
) + 16,
2052 (LPSOCKADDR
*)&localaddr
, &localaddr_len
,
2053 (LPSOCKADDR
*)&remoteaddr
, &remoteaddr_len
);
2054 memcpy(&adev
->address
.type
, remoteaddr
, remoteaddr_len
);
2055 adev
->address
.length
= remoteaddr_len
;
2056 nsock
->address
= adev
->address
;
2057 nsock
->pf
= adev
->address
.type
.sa
.sa_family
;
2059 socket_log(__LINE__
, nsock
, &nsock
->address
, TRACE
,
2060 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
2061 "internal_accept parent %p", sock
);
2063 result
= make_nonblock(adev
->newsocket
->fd
);
2064 INSIST(result
== ISC_R_SUCCESS
);
2066 INSIST(setsockopt(nsock
->fd
, SOL_SOCKET
, SO_UPDATE_ACCEPT_CONTEXT
,
2067 (char *)&sock
->fd
, sizeof(sock
->fd
)) == 0);
2070 * Hook it up into the manager.
2073 nsock
->connected
= 1;
2074 _set_state(nsock
, SOCK_OPEN
);
2076 LOCK(&nsock
->manager
->lock
);
2077 ISC_LIST_APPEND(nsock
->manager
->socklist
, nsock
, link
);
2078 InterlockedIncrement(&nsock
->manager
->totalSockets
);
2079 UNLOCK(&nsock
->manager
->lock
);
2081 socket_log(__LINE__
, sock
, &nsock
->address
, CREATION
,
2082 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTEDCXN
,
2083 "accepted_connection new_socket %p fd %d",
2086 adev
->result
= result
;
2087 send_acceptdone_event(sock
, &adev
);
2091 UNLOCK(&sock
->lock
);
2093 HeapFree(hHeapHandle
, 0, lpo
->acceptbuffer
);
2094 lpo
->acceptbuffer
= NULL
;
2098 * Called when a socket with a pending connect() finishes.
2099 * Note that the socket is locked before entering.
2102 internal_connect(isc_socket_t
*sock
, IoCompletionInfo
*lpo
, int connect_errno
) {
2103 isc_socket_connev_t
*cdev
;
2104 char strbuf
[ISC_STRERRORSIZE
];
2106 INSIST(VALID_SOCKET(sock
));
2110 INSIST(sock
->pending_iocp
> 0);
2111 sock
->pending_iocp
--;
2112 INSIST(sock
->pending_connect
== 1);
2113 sock
->pending_connect
= 0;
2116 * Has this event been canceled?
2119 if (!connectdone_is_active(sock
, cdev
)) {
2120 sock
->pending_connect
= 0;
2121 if (sock
->fd
!= INVALID_SOCKET
) {
2122 closesocket(sock
->fd
);
2123 sock
->fd
= INVALID_SOCKET
;
2124 _set_state(sock
, SOCK_CLOSED
);
2127 UNLOCK(&sock
->lock
);
2132 * Check possible Windows network event error status here.
2134 if (connect_errno
!= 0) {
2136 * If the error is SOFT, just try again on this
2137 * fd and pretend nothing strange happened.
2139 if (SOFT_ERROR(connect_errno
) ||
2140 connect_errno
== WSAEINPROGRESS
) {
2141 sock
->pending_connect
= 1;
2143 UNLOCK(&sock
->lock
);
2148 * Translate other errors into ISC_R_* flavors.
2150 switch (connect_errno
) {
2151 #define ERROR_MATCH(a, b) case a: cdev->result = b; break;
2152 ERROR_MATCH(WSAEACCES
, ISC_R_NOPERM
);
2153 ERROR_MATCH(WSAEADDRNOTAVAIL
, ISC_R_ADDRNOTAVAIL
);
2154 ERROR_MATCH(WSAEAFNOSUPPORT
, ISC_R_ADDRNOTAVAIL
);
2155 ERROR_MATCH(WSAECONNREFUSED
, ISC_R_CONNREFUSED
);
2156 ERROR_MATCH(WSAEHOSTUNREACH
, ISC_R_HOSTUNREACH
);
2157 ERROR_MATCH(WSAEHOSTDOWN
, ISC_R_HOSTDOWN
);
2158 ERROR_MATCH(WSAENETUNREACH
, ISC_R_NETUNREACH
);
2159 ERROR_MATCH(WSAENETDOWN
, ISC_R_NETDOWN
);
2160 ERROR_MATCH(WSAENOBUFS
, ISC_R_NORESOURCES
);
2161 ERROR_MATCH(WSAECONNRESET
, ISC_R_CONNECTIONRESET
);
2162 ERROR_MATCH(WSAECONNABORTED
, ISC_R_CONNECTIONRESET
);
2163 ERROR_MATCH(WSAETIMEDOUT
, ISC_R_TIMEDOUT
);
2166 cdev
->result
= ISC_R_UNEXPECTED
;
2167 isc__strerror(connect_errno
, strbuf
, sizeof(strbuf
));
2168 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2169 "internal_connect: connect() %s",
2173 INSIST(setsockopt(sock
->fd
, SOL_SOCKET
,
2174 SO_UPDATE_CONNECT_CONTEXT
, NULL
, 0) == 0);
2175 cdev
->result
= ISC_R_SUCCESS
;
2176 sock
->connected
= 1;
2177 socket_log(__LINE__
, sock
, &sock
->address
, IOEVENT
,
2178 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTEDCXN
,
2179 "internal_connect: success");
2182 send_connectdone_event(sock
, &cdev
);
2184 UNLOCK(&sock
->lock
);
2188 * Loop through the socket, returning ISC_R_EOF for each done event pending.
2191 send_recvdone_abort(isc_socket_t
*sock
, isc_result_t result
) {
2192 isc_socketevent_t
*dev
;
2194 while (!ISC_LIST_EMPTY(sock
->recv_list
)) {
2195 dev
= ISC_LIST_HEAD(sock
->recv_list
);
2196 dev
->result
= result
;
2197 send_recvdone_event(sock
, &dev
);
2202 * Take the data we received in our private buffer, and if any recv() calls on
2203 * our list are satisfied, send the corresponding done event.
2205 * If we need more data (there are still items on the recv_list after we consume all
2206 * our data) then arrange for another system recv() call to fill our buffers.
2209 internal_recv(isc_socket_t
*sock
, int nbytes
)
2211 INSIST(VALID_SOCKET(sock
));
2216 socket_log(__LINE__
, sock
, NULL
, IOEVENT
,
2217 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_INTERNALRECV
,
2218 "internal_recv: %d bytes received", nbytes
);
2221 * If we got here, the I/O operation succeeded. However, we might still have removed this
2222 * event from our notification list (or never placed it on it due to immediate completion.)
2223 * Handle the reference counting here, and handle the cancellation event just after.
2225 INSIST(sock
->pending_iocp
> 0);
2226 sock
->pending_iocp
--;
2227 INSIST(sock
->pending_recv
> 0);
2228 sock
->pending_recv
--;
2231 * The only way we could have gotten here is that our I/O has successfully completed.
2232 * Update our pointers, and move on. The only odd case here is that we might not
2233 * have received enough data on a TCP stream to satisfy the minimum requirements. If
2234 * this is the case, we will re-issue the recv() call for what we need.
2236 * We do check for a recv() of 0 bytes on a TCP stream. This means the remote end
2239 if (nbytes
== 0 && sock
->type
== isc_sockettype_tcp
) {
2240 send_recvdone_abort(sock
, ISC_R_EOF
);
2241 maybe_free_socket(&sock
, __LINE__
);
2244 sock
->recvbuf
.remaining
= nbytes
;
2245 sock
->recvbuf
.consume_position
= sock
->recvbuf
.base
;
2246 completeio_recv(sock
);
2249 * If there are more receivers waiting for data, queue another receive
2252 queue_receive_request(sock
);
2255 * Unlock and/or destroy if we are the last thing this socket has left to do.
2257 maybe_free_socket(&sock
, __LINE__
);
2261 internal_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
,
2262 struct msghdr
*messagehdr
, int nbytes
, int send_errno
, IoCompletionInfo
*lpo
)
2267 * Find out what socket this is and lock it.
2269 INSIST(VALID_SOCKET(sock
));
2274 socket_log(__LINE__
, sock
, NULL
, IOEVENT
,
2275 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_INTERNALSEND
,
2276 "internal_send: task got socket event %p", dev
);
2278 buffer
= ISC_LIST_HEAD(lpo
->bufferlist
);
2279 while (buffer
!= NULL
) {
2280 ISC_LIST_DEQUEUE(lpo
->bufferlist
, buffer
, link
);
2282 socket_log(__LINE__
, sock
, NULL
, TRACE
,
2283 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
2284 "free_buffer %p %p", buffer
, buffer
->buf
);
2286 HeapFree(hHeapHandle
, 0, buffer
->buf
);
2287 HeapFree(hHeapHandle
, 0, buffer
);
2288 buffer
= ISC_LIST_HEAD(lpo
->bufferlist
);
2291 INSIST(sock
->pending_iocp
> 0);
2292 sock
->pending_iocp
--;
2293 INSIST(sock
->pending_send
> 0);
2294 sock
->pending_send
--;
2296 /* If the event is no longer in the list we can just return */
2297 if (!senddone_is_active(sock
, dev
))
2301 * Set the error code and send things on its way.
2303 switch (completeio_send(sock
, dev
, messagehdr
, nbytes
, send_errno
)) {
2308 send_senddone_event(sock
, &dev
);
2313 maybe_free_socket(&sock
, __LINE__
);
2317 * These return if the done event passed in is on the list (or for connect, is
2318 * the one we're waiting for. Using these ensures we will not double-send an
2321 static isc_boolean_t
2322 senddone_is_active(isc_socket_t
*sock
, isc_socketevent_t
*dev
)
2324 isc_socketevent_t
*ldev
;
2326 ldev
= ISC_LIST_HEAD(sock
->send_list
);
2327 while (ldev
!= NULL
&& ldev
!= dev
)
2328 ldev
= ISC_LIST_NEXT(ldev
, ev_link
);
2330 return (ldev
== NULL
? ISC_FALSE
: ISC_TRUE
);
2333 static isc_boolean_t
2334 acceptdone_is_active(isc_socket_t
*sock
, isc_socket_newconnev_t
*dev
)
2336 isc_socket_newconnev_t
*ldev
;
2338 ldev
= ISC_LIST_HEAD(sock
->accept_list
);
2339 while (ldev
!= NULL
&& ldev
!= dev
)
2340 ldev
= ISC_LIST_NEXT(ldev
, ev_link
);
2342 return (ldev
== NULL
? ISC_FALSE
: ISC_TRUE
);
2345 static isc_boolean_t
2346 connectdone_is_active(isc_socket_t
*sock
, isc_socket_connev_t
*dev
)
2348 return (sock
->connect_ev
== dev
? ISC_TRUE
: ISC_FALSE
);
2352 * This is the I/O Completion Port Worker Function. It loops forever
2353 * waiting for I/O to complete and then forwards them for further
2354 * processing. There are a number of these in separate threads.
2356 static isc_threadresult_t WINAPI
2357 SocketIoThread(LPVOID ThreadContext
) {
2358 isc_socketmgr_t
*manager
= ThreadContext
;
2359 BOOL bSuccess
= FALSE
;
2361 IoCompletionInfo
*lpo
= NULL
;
2362 isc_socket_t
*sock
= NULL
;
2364 struct msghdr
*messagehdr
= NULL
;
2366 char strbuf
[ISC_STRERRORSIZE
];
2369 REQUIRE(VALID_MANAGER(manager
));
2372 * Set the thread priority high enough so I/O will
2373 * preempt normal recv packet processing, but not
2374 * higher than the timer sync thread.
2376 if (!SetThreadPriority(GetCurrentThread(),
2377 THREAD_PRIORITY_ABOVE_NORMAL
)) {
2378 errval
= GetLastError();
2379 isc__strerror(errval
, strbuf
, sizeof(strbuf
));
2380 FATAL_ERROR(__FILE__
, __LINE__
,
2381 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
2383 "Can't set thread priority: %s"),
2388 * Loop forever waiting on I/O Completions and then processing them
2391 bSuccess
= GetQueuedCompletionStatus(manager
->hIoCompletionPort
,
2392 &nbytes
, (LPDWORD
)&sock
,
2393 (LPWSAOVERLAPPED
*)&lpo
,
2395 if (lpo
== NULL
) /* Received request to exit */
2398 REQUIRE(VALID_SOCKET(sock
));
2400 request
= lpo
->request_type
;
2404 isc_result_t isc_result
;
2407 * Did the I/O operation complete?
2409 errstatus
= GetLastError();
2410 isc_result
= isc__errno2resultx(errstatus
, __FILE__
, __LINE__
);
2416 INSIST(sock
->pending_iocp
> 0);
2417 sock
->pending_iocp
--;
2418 INSIST(sock
->pending_recv
> 0);
2419 sock
->pending_recv
--;
2420 if ((sock
->type
== isc_sockettype_udp
) &&
2421 (errstatus
== ERROR_HOST_UNREACHABLE
)) {
2422 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2423 "SOCKET_RECV ERROR_HOST_UNREACHABLE: trying to recover");
2424 recover_receive_request(sock
, &lpo
);
2427 if ((sock
->type
== isc_sockettype_udp
) &&
2428 (errstatus
== WSAENETRESET
)) {
2429 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2430 "SOCKET_RECV WSAENETRESET: trying to recover");
2431 recover_receive_request(sock
, &lpo
);
2434 if ((sock
->type
== isc_sockettype_udp
) &&
2435 (errstatus
== WSAECONNRESET
)) {
2436 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2437 "SOCKET_RECV WSAECONNRESET: trying to recover");
2438 recover_receive_request(sock
, &lpo
);
2441 send_recvdone_abort(sock
, isc_result
);
2442 if ((isc_result
== ISC_R_UNEXPECTED
) ||
2443 ((isc_result
== ISC_R_CONNECTIONRESET
) &&
2444 (errstatus
!= ERROR_OPERATION_ABORTED
)) ||
2445 (isc_result
== ISC_R_HOSTUNREACH
)) {
2446 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2447 "SOCKET_RECV: Windows error code: %d, returning ISC error %d",
2448 errstatus
, isc_result
);
2453 INSIST(sock
->pending_iocp
> 0);
2454 sock
->pending_iocp
--;
2455 INSIST(sock
->pending_send
> 0);
2456 sock
->pending_send
--;
2457 if (senddone_is_active(sock
, lpo
->dev
)) {
2458 lpo
->dev
->result
= isc_result
;
2459 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2461 send_senddone_event(sock
, &lpo
->dev
);
2466 INSIST(sock
->pending_iocp
> 0);
2467 sock
->pending_iocp
--;
2468 INSIST(sock
->pending_accept
> 0);
2469 sock
->pending_accept
--;
2470 if (acceptdone_is_active(sock
, lpo
->adev
)) {
2471 closesocket(lpo
->adev
->newsocket
->fd
);
2472 lpo
->adev
->newsocket
->fd
= INVALID_SOCKET
;
2473 lpo
->adev
->newsocket
->references
--;
2474 free_socket(&lpo
->adev
->newsocket
, __LINE__
);
2475 lpo
->adev
->result
= isc_result
;
2476 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2478 send_acceptdone_event(sock
, &lpo
->adev
);
2482 case SOCKET_CONNECT
:
2483 INSIST(sock
->pending_iocp
> 0);
2484 sock
->pending_iocp
--;
2485 INSIST(sock
->pending_connect
== 1);
2486 sock
->pending_connect
= 0;
2487 if (connectdone_is_active(sock
, lpo
->cdev
)) {
2488 lpo
->cdev
->result
= isc_result
;
2489 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2490 "canceled_connect");
2491 send_connectdone_event(sock
, &lpo
->cdev
);
2495 maybe_free_socket(&sock
, __LINE__
);
2498 HeapFree(hHeapHandle
, 0, lpo
);
2502 messagehdr
= &lpo
->messagehdr
;
2506 internal_recv(sock
, nbytes
);
2509 internal_send(sock
, lpo
->dev
, messagehdr
, nbytes
, errstatus
, lpo
);
2512 internal_accept(sock
, lpo
, errstatus
);
2514 case SOCKET_CONNECT
:
2515 internal_connect(sock
, lpo
, errstatus
);
2520 HeapFree(hHeapHandle
, 0, lpo
);
2524 * Exit Completion Port Thread
2526 manager_log(manager
, TRACE
,
2527 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2528 ISC_MSG_EXITING
, "SocketIoThread exiting"));
2529 return ((isc_threadresult_t
)0);
2533 * Create a new socket manager.
2536 isc__socketmgr_create(isc_mem_t
*mctx
, isc_socketmgr_t
**managerp
) {
2537 return (isc_socketmgr_create2(mctx
, managerp
, 0));
2541 isc__socketmgr_create2(isc_mem_t
*mctx
, isc_socketmgr_t
**managerp
,
2542 unsigned int maxsocks
)
2544 isc_socketmgr_t
*manager
;
2545 isc_result_t result
;
2547 REQUIRE(managerp
!= NULL
&& *managerp
== NULL
);
2550 return (ISC_R_NOTIMPLEMENTED
);
2552 manager
= isc_mem_get(mctx
, sizeof(*manager
));
2553 if (manager
== NULL
)
2554 return (ISC_R_NOMEMORY
);
2558 manager
->magic
= SOCKET_MANAGER_MAGIC
;
2559 manager
->mctx
= NULL
;
2560 manager
->stats
= NULL
;
2561 ISC_LIST_INIT(manager
->socklist
);
2562 result
= isc_mutex_init(&manager
->lock
);
2563 if (result
!= ISC_R_SUCCESS
) {
2564 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2567 if (isc_condition_init(&manager
->shutdown_ok
) != ISC_R_SUCCESS
) {
2568 DESTROYLOCK(&manager
->lock
);
2569 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2570 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2571 "isc_condition_init() %s",
2572 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2573 ISC_MSG_FAILED
, "failed"));
2574 return (ISC_R_UNEXPECTED
);
2577 isc_mem_attach(mctx
, &manager
->mctx
);
2579 iocompletionport_init(manager
); /* Create the Completion Ports */
2581 manager
->bShutdown
= ISC_FALSE
;
2582 manager
->totalSockets
= 0;
2583 manager
->iocp_total
= 0;
2585 *managerp
= manager
;
2587 return (ISC_R_SUCCESS
);
2591 isc__socketmgr_getmaxsockets(isc_socketmgr_t
*manager
, unsigned int *nsockp
) {
2592 REQUIRE(VALID_MANAGER(manager
));
2593 REQUIRE(nsockp
!= NULL
);
2595 return (ISC_R_NOTIMPLEMENTED
);
2599 isc__socketmgr_setstats(isc_socketmgr_t
*manager
, isc_stats_t
*stats
) {
2600 REQUIRE(VALID_MANAGER(manager
));
2601 REQUIRE(ISC_LIST_EMPTY(manager
->socklist
));
2602 REQUIRE(manager
->stats
== NULL
);
2603 REQUIRE(isc_stats_ncounters(stats
) == isc_sockstatscounter_max
);
2605 isc_stats_attach(stats
, &manager
->stats
);
2609 isc__socketmgr_destroy(isc_socketmgr_t
**managerp
) {
2610 isc_socketmgr_t
*manager
;
2615 * Destroy a socket manager.
2618 REQUIRE(managerp
!= NULL
);
2619 manager
= *managerp
;
2620 REQUIRE(VALID_MANAGER(manager
));
2622 LOCK(&manager
->lock
);
2625 * Wait for all sockets to be destroyed.
2627 while (!ISC_LIST_EMPTY(manager
->socklist
)) {
2628 manager_log(manager
, CREATION
,
2629 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
2630 ISC_MSG_SOCKETSREMAIN
,
2632 WAIT(&manager
->shutdown_ok
, &manager
->lock
);
2635 UNLOCK(&manager
->lock
);
2638 * Here, we need to had some wait code for the completion port
2641 signal_iocompletionport_exit(manager
);
2642 manager
->bShutdown
= ISC_TRUE
;
2645 * Wait for threads to exit.
2647 for (i
= 0; i
< manager
->maxIOCPThreads
; i
++) {
2648 if (isc_thread_join((isc_thread_t
) manager
->hIOCPThreads
[i
],
2649 NULL
) != ISC_R_SUCCESS
)
2650 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2651 "isc_thread_join() for Completion Port %s",
2652 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2653 ISC_MSG_FAILED
, "failed"));
2659 CloseHandle(manager
->hIoCompletionPort
);
2661 (void)isc_condition_destroy(&manager
->shutdown_ok
);
2663 DESTROYLOCK(&manager
->lock
);
2664 if (manager
->stats
!= NULL
)
2665 isc_stats_detach(&manager
->stats
);
2667 mctx
= manager
->mctx
;
2668 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2670 isc_mem_detach(&mctx
);
2676 queue_receive_event(isc_socket_t
*sock
, isc_task_t
*task
, isc_socketevent_t
*dev
)
2678 isc_task_t
*ntask
= NULL
;
2680 isc_task_attach(task
, &ntask
);
2681 dev
->attributes
|= ISC_SOCKEVENTATTR_ATTACHED
;
2684 * Enqueue the request.
2686 INSIST(!ISC_LINK_LINKED(dev
, ev_link
));
2687 ISC_LIST_ENQUEUE(sock
->recv_list
, dev
, ev_link
);
2689 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2690 "queue_receive_event: event %p -> task %p",
2695 * Check the pending receive queue, and if we have data pending, give it to this
2696 * caller. If we have none, queue an I/O request. If this caller is not the first
2697 * on the list, then we will just queue this event and return.
2699 * Caller must have the socket locked.
2702 socket_recv(isc_socket_t
*sock
, isc_socketevent_t
*dev
, isc_task_t
*task
,
2706 isc_task_t
*ntask
= NULL
;
2707 isc_result_t result
= ISC_R_SUCCESS
;
2710 dev
->ev_sender
= task
;
2712 if (sock
->fd
== INVALID_SOCKET
)
2716 * Queue our event on the list of things to do. Call our function to
2717 * attempt to fill buffers as much as possible, and return done events.
2718 * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE
2719 * here and tell our caller that we could not satisfy it immediately.
2721 queue_receive_event(sock
, task
, dev
);
2722 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) != 0)
2723 result
= ISC_R_INPROGRESS
;
2725 completeio_recv(sock
);
2728 * If there are more receivers waiting for data, queue another receive
2731 queue_receive_request(sock
);
2737 isc__socket_recvv(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
2738 unsigned int minimum
, isc_task_t
*task
,
2739 isc_taskaction_t action
, const void *arg
)
2741 isc_socketevent_t
*dev
;
2742 isc_socketmgr_t
*manager
;
2743 unsigned int iocount
;
2744 isc_buffer_t
*buffer
;
2747 REQUIRE(VALID_SOCKET(sock
));
2752 * Make sure that the socket is not closed. XXXMLG change error here?
2754 if (sock
->fd
== INVALID_SOCKET
) {
2755 UNLOCK(&sock
->lock
);
2756 return (ISC_R_CONNREFUSED
);
2759 REQUIRE(buflist
!= NULL
);
2760 REQUIRE(!ISC_LIST_EMPTY(*buflist
));
2761 REQUIRE(task
!= NULL
);
2762 REQUIRE(action
!= NULL
);
2764 manager
= sock
->manager
;
2765 REQUIRE(VALID_MANAGER(manager
));
2767 iocount
= isc_bufferlist_availablecount(buflist
);
2768 REQUIRE(iocount
> 0);
2770 INSIST(sock
->bound
);
2772 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_RECVDONE
, action
, arg
);
2774 UNLOCK(&sock
->lock
);
2775 return (ISC_R_NOMEMORY
);
2779 * UDP sockets are always partial read
2781 if (sock
->type
== isc_sockettype_udp
)
2785 dev
->minimum
= iocount
;
2787 dev
->minimum
= minimum
;
2791 * Move each buffer from the passed in list to our internal one.
2793 buffer
= ISC_LIST_HEAD(*buflist
);
2794 while (buffer
!= NULL
) {
2795 ISC_LIST_DEQUEUE(*buflist
, buffer
, link
);
2796 ISC_LIST_ENQUEUE(dev
->bufferlist
, buffer
, link
);
2797 buffer
= ISC_LIST_HEAD(*buflist
);
2800 ret
= socket_recv(sock
, dev
, task
, 0);
2802 UNLOCK(&sock
->lock
);
2807 isc__socket_recv(isc_socket_t
*sock
, isc_region_t
*region
,
2808 unsigned int minimum
, isc_task_t
*task
,
2809 isc_taskaction_t action
, const void *arg
)
2811 isc_socketevent_t
*dev
;
2812 isc_socketmgr_t
*manager
;
2815 REQUIRE(VALID_SOCKET(sock
));
2820 * make sure that the socket's not closed
2822 if (sock
->fd
== INVALID_SOCKET
) {
2823 UNLOCK(&sock
->lock
);
2824 return (ISC_R_CONNREFUSED
);
2826 REQUIRE(action
!= NULL
);
2828 manager
= sock
->manager
;
2829 REQUIRE(VALID_MANAGER(manager
));
2831 INSIST(sock
->bound
);
2833 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_RECVDONE
, action
, arg
);
2835 UNLOCK(&sock
->lock
);
2836 return (ISC_R_NOMEMORY
);
2839 ret
= isc_socket_recv2(sock
, region
, minimum
, task
, dev
, 0);
2840 UNLOCK(&sock
->lock
);
2845 isc__socket_recv2(isc_socket_t
*sock
, isc_region_t
*region
,
2846 unsigned int minimum
, isc_task_t
*task
,
2847 isc_socketevent_t
*event
, unsigned int flags
)
2851 REQUIRE(VALID_SOCKET(sock
));
2855 event
->result
= ISC_R_UNEXPECTED
;
2856 event
->ev_sender
= sock
;
2858 * make sure that the socket's not closed
2860 if (sock
->fd
== INVALID_SOCKET
) {
2861 UNLOCK(&sock
->lock
);
2862 return (ISC_R_CONNREFUSED
);
2865 ISC_LIST_INIT(event
->bufferlist
);
2866 event
->region
= *region
;
2869 event
->attributes
= 0;
2872 * UDP sockets are always partial read.
2874 if (sock
->type
== isc_sockettype_udp
)
2878 event
->minimum
= region
->length
;
2880 event
->minimum
= minimum
;
2883 ret
= socket_recv(sock
, event
, task
, flags
);
2884 UNLOCK(&sock
->lock
);
2889 * Caller must have the socket locked.
2892 socket_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
, isc_task_t
*task
,
2893 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
,
2899 isc_task_t
*ntask
= NULL
;
2900 isc_result_t result
= ISC_R_SUCCESS
;
2902 dev
->ev_sender
= task
;
2904 set_dev_address(address
, sock
, dev
);
2905 if (pktinfo
!= NULL
) {
2906 socket_log(__LINE__
, sock
, NULL
, TRACE
, isc_msgcat
, ISC_MSGSET_SOCKET
,
2907 ISC_MSG_PKTINFOPROVIDED
,
2908 "pktinfo structure provided, ifindex %u (set to 0)",
2909 pktinfo
->ipi6_ifindex
);
2911 dev
->attributes
|= ISC_SOCKEVENTATTR_PKTINFO
;
2912 dev
->pktinfo
= *pktinfo
;
2914 * Set the pktinfo index to 0 here, to let the kernel decide
2915 * what interface it should send on.
2917 dev
->pktinfo
.ipi6_ifindex
= 0;
2920 io_state
= startio_send(sock
, dev
, &cc
, &send_errno
);
2922 case DOIO_PENDING
: /* I/O started. Nothing more to do */
2925 * We couldn't send all or part of the request right now, so
2926 * queue it unless ISC_SOCKFLAG_NORETRY is set.
2928 if ((flags
& ISC_SOCKFLAG_NORETRY
) == 0) {
2929 isc_task_attach(task
, &ntask
);
2930 dev
->attributes
|= ISC_SOCKEVENTATTR_ATTACHED
;
2933 * Enqueue the request.
2935 INSIST(!ISC_LINK_LINKED(dev
, ev_link
));
2936 ISC_LIST_ENQUEUE(sock
->send_list
, dev
, ev_link
);
2938 socket_log(__LINE__
, sock
, NULL
, EVENT
, NULL
, 0, 0,
2939 "socket_send: event %p -> task %p",
2942 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) != 0)
2943 result
= ISC_R_INPROGRESS
;
2955 isc__socket_send(isc_socket_t
*sock
, isc_region_t
*region
,
2956 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
2959 * REQUIRE() checking is performed in isc_socket_sendto().
2961 return (isc_socket_sendto(sock
, region
, task
, action
, arg
, NULL
,
2966 isc__socket_sendto(isc_socket_t
*sock
, isc_region_t
*region
,
2967 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
,
2968 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
)
2970 isc_socketevent_t
*dev
;
2971 isc_socketmgr_t
*manager
;
2974 REQUIRE(VALID_SOCKET(sock
));
2975 REQUIRE(sock
->type
!= isc_sockettype_fdwatch
);
2981 * make sure that the socket's not closed
2983 if (sock
->fd
== INVALID_SOCKET
) {
2984 UNLOCK(&sock
->lock
);
2985 return (ISC_R_CONNREFUSED
);
2987 REQUIRE(region
!= NULL
);
2988 REQUIRE(task
!= NULL
);
2989 REQUIRE(action
!= NULL
);
2991 manager
= sock
->manager
;
2992 REQUIRE(VALID_MANAGER(manager
));
2994 INSIST(sock
->bound
);
2996 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_SENDDONE
, action
, arg
);
2998 UNLOCK(&sock
->lock
);
2999 return (ISC_R_NOMEMORY
);
3001 dev
->region
= *region
;
3003 ret
= socket_send(sock
, dev
, task
, address
, pktinfo
, 0);
3004 UNLOCK(&sock
->lock
);
3009 isc__socket_sendv(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
3010 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
3012 return (isc_socket_sendtov(sock
, buflist
, task
, action
, arg
, NULL
,
3017 isc__socket_sendtov(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
3018 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
,
3019 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
)
3021 isc_socketevent_t
*dev
;
3022 isc_socketmgr_t
*manager
;
3023 unsigned int iocount
;
3024 isc_buffer_t
*buffer
;
3027 REQUIRE(VALID_SOCKET(sock
));
3033 * make sure that the socket's not closed
3035 if (sock
->fd
== INVALID_SOCKET
) {
3036 UNLOCK(&sock
->lock
);
3037 return (ISC_R_CONNREFUSED
);
3039 REQUIRE(buflist
!= NULL
);
3040 REQUIRE(!ISC_LIST_EMPTY(*buflist
));
3041 REQUIRE(task
!= NULL
);
3042 REQUIRE(action
!= NULL
);
3044 manager
= sock
->manager
;
3045 REQUIRE(VALID_MANAGER(manager
));
3047 iocount
= isc_bufferlist_usedcount(buflist
);
3048 REQUIRE(iocount
> 0);
3050 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_SENDDONE
, action
, arg
);
3052 UNLOCK(&sock
->lock
);
3053 return (ISC_R_NOMEMORY
);
3057 * Move each buffer from the passed in list to our internal one.
3059 buffer
= ISC_LIST_HEAD(*buflist
);
3060 while (buffer
!= NULL
) {
3061 ISC_LIST_DEQUEUE(*buflist
, buffer
, link
);
3062 ISC_LIST_ENQUEUE(dev
->bufferlist
, buffer
, link
);
3063 buffer
= ISC_LIST_HEAD(*buflist
);
3066 ret
= socket_send(sock
, dev
, task
, address
, pktinfo
, 0);
3067 UNLOCK(&sock
->lock
);
3072 isc__socket_sendto2(isc_socket_t
*sock
, isc_region_t
*region
,
3074 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
,
3075 isc_socketevent_t
*event
, unsigned int flags
)
3079 REQUIRE(VALID_SOCKET(sock
));
3083 REQUIRE((flags
& ~(ISC_SOCKFLAG_IMMEDIATE
|ISC_SOCKFLAG_NORETRY
)) == 0);
3084 if ((flags
& ISC_SOCKFLAG_NORETRY
) != 0)
3085 REQUIRE(sock
->type
== isc_sockettype_udp
);
3086 event
->ev_sender
= sock
;
3087 event
->result
= ISC_R_UNEXPECTED
;
3089 * make sure that the socket's not closed
3091 if (sock
->fd
== INVALID_SOCKET
) {
3092 UNLOCK(&sock
->lock
);
3093 return (ISC_R_CONNREFUSED
);
3095 ISC_LIST_INIT(event
->bufferlist
);
3096 event
->region
= *region
;
3099 event
->attributes
= 0;
3101 ret
= socket_send(sock
, event
, task
, address
, pktinfo
, flags
);
3102 UNLOCK(&sock
->lock
);
3107 isc__socket_bind(isc_socket_t
*sock
, isc_sockaddr_t
*sockaddr
,
3108 unsigned int options
) {
3110 char strbuf
[ISC_STRERRORSIZE
];
3113 REQUIRE(VALID_SOCKET(sock
));
3118 * make sure that the socket's not closed
3120 if (sock
->fd
== INVALID_SOCKET
) {
3121 UNLOCK(&sock
->lock
);
3122 return (ISC_R_CONNREFUSED
);
3125 INSIST(!sock
->bound
);
3127 if (sock
->pf
!= sockaddr
->type
.sa
.sa_family
) {
3128 UNLOCK(&sock
->lock
);
3129 return (ISC_R_FAMILYMISMATCH
);
3132 * Only set SO_REUSEADDR when we want a specific port.
3134 if ((options
& ISC_SOCKET_REUSEADDRESS
) != 0 &&
3135 isc_sockaddr_getport(sockaddr
) != (in_port_t
)0 &&
3136 setsockopt(sock
->fd
, SOL_SOCKET
, SO_REUSEADDR
, (char *)&on
,
3138 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
3139 "setsockopt(%d) %s", sock
->fd
,
3140 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
3141 ISC_MSG_FAILED
, "failed"));
3144 if (bind(sock
->fd
, &sockaddr
->type
.sa
, sockaddr
->length
) < 0) {
3145 bind_errno
= WSAGetLastError();
3146 UNLOCK(&sock
->lock
);
3147 switch (bind_errno
) {
3149 return (ISC_R_NOPERM
);
3150 case WSAEADDRNOTAVAIL
:
3151 return (ISC_R_ADDRNOTAVAIL
);
3153 return (ISC_R_ADDRINUSE
);
3155 return (ISC_R_BOUND
);
3157 isc__strerror(bind_errno
, strbuf
, sizeof(strbuf
));
3158 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "bind: %s",
3160 return (ISC_R_UNEXPECTED
);
3164 socket_log(__LINE__
, sock
, sockaddr
, TRACE
,
3165 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_BOUND
, "bound");
3168 UNLOCK(&sock
->lock
);
3169 return (ISC_R_SUCCESS
);
3173 isc__socket_filter(isc_socket_t
*sock
, const char *filter
) {
3177 REQUIRE(VALID_SOCKET(sock
));
3178 return (ISC_R_NOTIMPLEMENTED
);
3182 * Set up to listen on a given socket. We do this by creating an internal
3183 * event that will be dispatched when the socket has read activity. The
3184 * watcher will send the internal event to the task when there is a new
3187 * Unlike in read, we don't preallocate a done event here. Every time there
3188 * is a new connection we'll have to allocate a new one anyway, so we might
3189 * as well keep things simple rather than having to track them.
3192 isc__socket_listen(isc_socket_t
*sock
, unsigned int backlog
) {
3193 char strbuf
[ISC_STRERRORSIZE
];
3195 REQUIRE(VALID_SOCKET(sock
));
3201 * make sure that the socket's not closed
3203 if (sock
->fd
== INVALID_SOCKET
) {
3204 UNLOCK(&sock
->lock
);
3205 return (ISC_R_CONNREFUSED
);
3208 REQUIRE(!sock
->listener
);
3209 REQUIRE(sock
->bound
);
3210 REQUIRE(sock
->type
== isc_sockettype_tcp
);
3213 backlog
= SOMAXCONN
;
3215 if (listen(sock
->fd
, (int)backlog
) < 0) {
3216 UNLOCK(&sock
->lock
);
3217 isc__strerror(WSAGetLastError(), strbuf
, sizeof(strbuf
));
3219 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "listen: %s", strbuf
);
3221 return (ISC_R_UNEXPECTED
);
3224 socket_log(__LINE__
, sock
, NULL
, TRACE
,
3225 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_BOUND
, "listening");
3227 _set_state(sock
, SOCK_LISTEN
);
3229 UNLOCK(&sock
->lock
);
3230 return (ISC_R_SUCCESS
);
3234 * This should try to do aggressive accept() XXXMLG
3237 isc__socket_accept(isc_socket_t
*sock
,
3238 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
3240 isc_socket_newconnev_t
*adev
;
3241 isc_socketmgr_t
*manager
;
3242 isc_task_t
*ntask
= NULL
;
3243 isc_socket_t
*nsock
;
3244 isc_result_t result
;
3245 IoCompletionInfo
*lpo
;
3247 REQUIRE(VALID_SOCKET(sock
));
3249 manager
= sock
->manager
;
3250 REQUIRE(VALID_MANAGER(manager
));
3256 * make sure that the socket's not closed
3258 if (sock
->fd
== INVALID_SOCKET
) {
3259 UNLOCK(&sock
->lock
);
3260 return (ISC_R_CONNREFUSED
);
3263 REQUIRE(sock
->listener
);
3266 * Sender field is overloaded here with the task we will be sending
3267 * this event to. Just before the actual event is delivered the
3268 * actual ev_sender will be touched up to be the socket.
3270 adev
= (isc_socket_newconnev_t
*)
3271 isc_event_allocate(manager
->mctx
, task
, ISC_SOCKEVENT_NEWCONN
,
3272 action
, arg
, sizeof(*adev
));
3274 UNLOCK(&sock
->lock
);
3275 return (ISC_R_NOMEMORY
);
3277 ISC_LINK_INIT(adev
, ev_link
);
3279 result
= allocate_socket(manager
, sock
->type
, &nsock
);
3280 if (result
!= ISC_R_SUCCESS
) {
3281 isc_event_free((isc_event_t
**)&adev
);
3282 UNLOCK(&sock
->lock
);
3287 * AcceptEx() requires we pass in a socket.
3289 nsock
->fd
= socket(sock
->pf
, SOCK_STREAM
, IPPROTO_TCP
);
3290 if (nsock
->fd
== INVALID_SOCKET
) {
3291 free_socket(&nsock
, __LINE__
);
3292 isc_event_free((isc_event_t
**)&adev
);
3293 UNLOCK(&sock
->lock
);
3294 return (ISC_R_FAILURE
); // XXXMLG need real error message
3298 * Attach to socket and to task.
3300 isc_task_attach(task
, &ntask
);
3301 nsock
->references
++;
3303 adev
->ev_sender
= ntask
;
3304 adev
->newsocket
= nsock
;
3305 _set_state(nsock
, SOCK_ACCEPT
);
3308 * Queue io completion for an accept().
3310 lpo
= (IoCompletionInfo
*)HeapAlloc(hHeapHandle
,
3312 sizeof(IoCompletionInfo
));
3313 RUNTIME_CHECK(lpo
!= NULL
);
3314 lpo
->acceptbuffer
= (void *)HeapAlloc(hHeapHandle
, HEAP_ZERO_MEMORY
,
3315 (sizeof(SOCKADDR_STORAGE
) + 16) * 2);
3316 RUNTIME_CHECK(lpo
->acceptbuffer
!= NULL
);
3319 lpo
->request_type
= SOCKET_ACCEPT
;
3321 ISCAcceptEx(sock
->fd
,
3322 nsock
->fd
, /* Accepted Socket */
3323 lpo
->acceptbuffer
, /* Buffer for initial Recv */
3324 0, /* Length of Buffer */
3325 sizeof(SOCKADDR_STORAGE
) + 16, /* Local address length + 16 */
3326 sizeof(SOCKADDR_STORAGE
) + 16, /* Remote address lengh + 16 */
3327 (LPDWORD
)&lpo
->received_bytes
, /* Bytes Recved */
3328 (LPOVERLAPPED
)lpo
/* Overlapped structure */
3330 iocompletionport_update(nsock
);
3332 socket_log(__LINE__
, sock
, NULL
, TRACE
,
3333 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_BOUND
,
3334 "accepting for nsock %p fd %d", nsock
, nsock
->fd
);
3339 ISC_LIST_ENQUEUE(sock
->accept_list
, adev
, ev_link
);
3340 sock
->pending_accept
++;
3341 sock
->pending_iocp
++;
3343 UNLOCK(&sock
->lock
);
3344 return (ISC_R_SUCCESS
);
3348 isc__socket_connect(isc_socket_t
*sock
, isc_sockaddr_t
*addr
,
3349 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
3351 char strbuf
[ISC_STRERRORSIZE
];
3352 isc_socket_connev_t
*cdev
;
3353 isc_task_t
*ntask
= NULL
;
3354 isc_socketmgr_t
*manager
;
3355 IoCompletionInfo
*lpo
;
3358 REQUIRE(VALID_SOCKET(sock
));
3359 REQUIRE(addr
!= NULL
);
3360 REQUIRE(task
!= NULL
);
3361 REQUIRE(action
!= NULL
);
3363 manager
= sock
->manager
;
3364 REQUIRE(VALID_MANAGER(manager
));
3365 REQUIRE(addr
!= NULL
);
3367 if (isc_sockaddr_ismulticast(addr
))
3368 return (ISC_R_MULTICAST
);
3374 * make sure that the socket's not closed
3376 if (sock
->fd
== INVALID_SOCKET
) {
3377 UNLOCK(&sock
->lock
);
3378 return (ISC_R_CONNREFUSED
);
3382 * Windows sockets won't connect unless the socket is bound.
3387 isc_sockaddr_anyofpf(&any
, isc_sockaddr_pf(addr
));
3388 if (bind(sock
->fd
, &any
.type
.sa
, any
.length
) < 0) {
3389 bind_errno
= WSAGetLastError();
3390 UNLOCK(&sock
->lock
);
3391 switch (bind_errno
) {
3393 return (ISC_R_NOPERM
);
3394 case WSAEADDRNOTAVAIL
:
3395 return (ISC_R_ADDRNOTAVAIL
);
3397 return (ISC_R_ADDRINUSE
);
3399 return (ISC_R_BOUND
);
3401 isc__strerror(bind_errno
, strbuf
,
3403 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
3404 "bind: %s", strbuf
);
3405 return (ISC_R_UNEXPECTED
);
3411 REQUIRE(!sock
->pending_connect
);
3413 cdev
= (isc_socket_connev_t
*)isc_event_allocate(manager
->mctx
, sock
,
3414 ISC_SOCKEVENT_CONNECT
,
3418 UNLOCK(&sock
->lock
);
3419 return (ISC_R_NOMEMORY
);
3421 ISC_LINK_INIT(cdev
, ev_link
);
3423 if (sock
->type
== isc_sockettype_tcp
) {
3425 * Queue io completion for an accept().
3427 lpo
= (IoCompletionInfo
*)HeapAlloc(hHeapHandle
,
3429 sizeof(IoCompletionInfo
));
3431 lpo
->request_type
= SOCKET_CONNECT
;
3433 sock
->address
= *addr
;
3434 ISCConnectEx(sock
->fd
, &addr
->type
.sa
, addr
->length
,
3435 NULL
, 0, NULL
, (LPOVERLAPPED
)lpo
);
3440 isc_task_attach(task
, &ntask
);
3441 cdev
->ev_sender
= ntask
;
3443 sock
->pending_connect
= 1;
3444 _set_state(sock
, SOCK_CONNECT
);
3447 * Enqueue the request.
3449 sock
->connect_ev
= cdev
;
3450 sock
->pending_iocp
++;
3452 WSAConnect(sock
->fd
, &addr
->type
.sa
, addr
->length
, NULL
, NULL
, NULL
, NULL
);
3453 cdev
->result
= ISC_R_SUCCESS
;
3454 isc_task_send(task
, (isc_event_t
**)&cdev
);
3457 UNLOCK(&sock
->lock
);
3459 return (ISC_R_SUCCESS
);
3463 isc__socket_getpeername(isc_socket_t
*sock
, isc_sockaddr_t
*addressp
) {
3464 isc_result_t result
;
3466 REQUIRE(VALID_SOCKET(sock
));
3467 REQUIRE(addressp
!= NULL
);
3473 * make sure that the socket's not closed
3475 if (sock
->fd
== INVALID_SOCKET
) {
3476 UNLOCK(&sock
->lock
);
3477 return (ISC_R_CONNREFUSED
);
3480 if (sock
->connected
) {
3481 *addressp
= sock
->address
;
3482 result
= ISC_R_SUCCESS
;
3484 result
= ISC_R_NOTCONNECTED
;
3487 UNLOCK(&sock
->lock
);
3493 isc__socket_getsockname(isc_socket_t
*sock
, isc_sockaddr_t
*addressp
) {
3494 ISC_SOCKADDR_LEN_T len
;
3495 isc_result_t result
;
3496 char strbuf
[ISC_STRERRORSIZE
];
3498 REQUIRE(VALID_SOCKET(sock
));
3499 REQUIRE(addressp
!= NULL
);
3505 * make sure that the socket's not closed
3507 if (sock
->fd
== INVALID_SOCKET
) {
3508 UNLOCK(&sock
->lock
);
3509 return (ISC_R_CONNREFUSED
);
3513 result
= ISC_R_NOTBOUND
;
3517 result
= ISC_R_SUCCESS
;
3519 len
= sizeof(addressp
->type
);
3520 if (getsockname(sock
->fd
, &addressp
->type
.sa
, (void *)&len
) < 0) {
3521 isc__strerror(WSAGetLastError(), strbuf
, sizeof(strbuf
));
3522 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "getsockname: %s",
3524 result
= ISC_R_UNEXPECTED
;
3527 addressp
->length
= (unsigned int)len
;
3530 UNLOCK(&sock
->lock
);
3536 * Run through the list of events on this socket, and cancel the ones
3537 * queued for task "task" of type "how". "how" is a bitmask.
3540 isc__socket_cancel(isc_socket_t
*sock
, isc_task_t
*task
, unsigned int how
) {
3542 REQUIRE(VALID_SOCKET(sock
));
3545 * Quick exit if there is nothing to do. Don't even bother locking
3555 * make sure that the socket's not closed
3557 if (sock
->fd
== INVALID_SOCKET
) {
3558 UNLOCK(&sock
->lock
);
3563 * All of these do the same thing, more or less.
3565 * o If the internal event is marked as "posted" try to
3566 * remove it from the task's queue. If this fails, mark it
3567 * as canceled instead, and let the task clean it up later.
3568 * o For each I/O request for that task of that type, post
3569 * its done event with status of "ISC_R_CANCELED".
3570 * o Reset any state needed.
3573 if ((how
& ISC_SOCKCANCEL_RECV
) == ISC_SOCKCANCEL_RECV
) {
3574 isc_socketevent_t
*dev
;
3575 isc_socketevent_t
*next
;
3576 isc_task_t
*current_task
;
3578 dev
= ISC_LIST_HEAD(sock
->recv_list
);
3579 while (dev
!= NULL
) {
3580 current_task
= dev
->ev_sender
;
3581 next
= ISC_LIST_NEXT(dev
, ev_link
);
3582 if ((task
== NULL
) || (task
== current_task
)) {
3583 dev
->result
= ISC_R_CANCELED
;
3584 send_recvdone_event(sock
, &dev
);
3589 how
&= ~ISC_SOCKCANCEL_RECV
;
3591 if ((how
& ISC_SOCKCANCEL_SEND
) == ISC_SOCKCANCEL_SEND
) {
3592 isc_socketevent_t
*dev
;
3593 isc_socketevent_t
*next
;
3594 isc_task_t
*current_task
;
3596 dev
= ISC_LIST_HEAD(sock
->send_list
);
3598 while (dev
!= NULL
) {
3599 current_task
= dev
->ev_sender
;
3600 next
= ISC_LIST_NEXT(dev
, ev_link
);
3601 if ((task
== NULL
) || (task
== current_task
)) {
3602 dev
->result
= ISC_R_CANCELED
;
3603 send_senddone_event(sock
, &dev
);
3608 how
&= ~ISC_SOCKCANCEL_SEND
;
3610 if (((how
& ISC_SOCKCANCEL_ACCEPT
) == ISC_SOCKCANCEL_ACCEPT
)
3611 && !ISC_LIST_EMPTY(sock
->accept_list
)) {
3612 isc_socket_newconnev_t
*dev
;
3613 isc_socket_newconnev_t
*next
;
3614 isc_task_t
*current_task
;
3616 dev
= ISC_LIST_HEAD(sock
->accept_list
);
3617 while (dev
!= NULL
) {
3618 current_task
= dev
->ev_sender
;
3619 next
= ISC_LIST_NEXT(dev
, ev_link
);
3621 if ((task
== NULL
) || (task
== current_task
)) {
3623 dev
->newsocket
->references
--;
3624 closesocket(dev
->newsocket
->fd
);
3625 dev
->newsocket
->fd
= INVALID_SOCKET
;
3626 free_socket(&dev
->newsocket
, __LINE__
);
3628 dev
->result
= ISC_R_CANCELED
;
3629 send_acceptdone_event(sock
, &dev
);
3635 how
&= ~ISC_SOCKCANCEL_ACCEPT
;
3638 * Connecting is not a list.
3640 if (((how
& ISC_SOCKCANCEL_CONNECT
) == ISC_SOCKCANCEL_CONNECT
)
3641 && sock
->connect_ev
!= NULL
) {
3642 isc_socket_connev_t
*dev
;
3643 isc_task_t
*current_task
;
3645 INSIST(sock
->pending_connect
);
3647 dev
= sock
->connect_ev
;
3648 current_task
= dev
->ev_sender
;
3650 if ((task
== NULL
) || (task
== current_task
)) {
3651 closesocket(sock
->fd
);
3652 sock
->fd
= INVALID_SOCKET
;
3653 _set_state(sock
, SOCK_CLOSED
);
3655 sock
->connect_ev
= NULL
;
3656 dev
->result
= ISC_R_CANCELED
;
3657 send_connectdone_event(sock
, &dev
);
3660 how
&= ~ISC_SOCKCANCEL_CONNECT
;
3662 maybe_free_socket(&sock
, __LINE__
);
3666 isc__socket_gettype(isc_socket_t
*sock
) {
3667 isc_sockettype_t type
;
3669 REQUIRE(VALID_SOCKET(sock
));
3674 * make sure that the socket's not closed
3676 if (sock
->fd
== INVALID_SOCKET
) {
3677 UNLOCK(&sock
->lock
);
3678 return (ISC_R_CONNREFUSED
);
3682 UNLOCK(&sock
->lock
);
3687 isc__socket_isbound(isc_socket_t
*sock
) {
3690 REQUIRE(VALID_SOCKET(sock
));
3696 * make sure that the socket's not closed
3698 if (sock
->fd
== INVALID_SOCKET
) {
3699 UNLOCK(&sock
->lock
);
3703 val
= ((sock
->bound
) ? ISC_TRUE
: ISC_FALSE
);
3704 UNLOCK(&sock
->lock
);
3710 isc__socket_ipv6only(isc_socket_t
*sock
, isc_boolean_t yes
) {
3711 #if defined(IPV6_V6ONLY)
3712 int onoff
= yes
? 1 : 0;
3717 REQUIRE(VALID_SOCKET(sock
));
3720 if (sock
->pf
== AF_INET6
) {
3721 (void)setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_V6ONLY
,
3722 (char *)&onoff
, sizeof(onoff
));
3728 isc__socket_cleanunix(isc_sockaddr_t
*addr
, isc_boolean_t active
) {
3734 isc__socket_permunix(isc_sockaddr_t
*addr
, isc_uint32_t perm
,
3735 isc_uint32_t owner
, isc_uint32_t group
)
3741 return (ISC_R_NOTIMPLEMENTED
);
3745 isc__socket_setname(isc_socket_t
*socket
, const char *name
, void *tag
) {
3751 REQUIRE(VALID_SOCKET(socket
));
3753 LOCK(&socket
->lock
);
3754 memset(socket
->name
, 0, sizeof(socket
->name
));
3755 strncpy(socket
->name
, name
, sizeof(socket
->name
) - 1);
3757 UNLOCK(&socket
->lock
);
3761 isc__socket_getname(isc_socket_t
*socket
) {
3762 return (socket
->name
);
3766 isc__socket_gettag(isc_socket_t
*socket
) {
3767 return (socket
->tag
);
3771 isc__socketmgr_setreserved(isc_socketmgr_t
*manager
, isc_uint32_t reserved
) {
3777 isc___socketmgr_maxudp(isc_socketmgr_t
*manager
, int maxudp
) {
3786 _socktype(isc_sockettype_t type
)
3788 if (type
== isc_sockettype_udp
)
3790 else if (type
== isc_sockettype_tcp
)
3792 else if (type
== isc_sockettype_unix
)
3794 else if (type
== isc_sockettype_fdwatch
)
3797 return ("not-initialized");
3801 isc_socketmgr_renderxml(isc_socketmgr_t
*mgr
, xmlTextWriterPtr writer
)
3804 char peerbuf
[ISC_SOCKADDR_FORMATSIZE
];
3805 isc_sockaddr_t addr
;
3806 ISC_SOCKADDR_LEN_T len
;
3810 #ifndef ISC_PLATFORM_USETHREADS
3811 xmlTextWriterStartElement(writer
, ISC_XMLCHAR
"references");
3812 xmlTextWriterWriteFormatString(writer
, "%d", mgr
->refs
);
3813 xmlTextWriterEndElement(writer
);
3816 xmlTextWriterStartElement(writer
, ISC_XMLCHAR
"sockets");
3817 sock
= ISC_LIST_HEAD(mgr
->socklist
);
3818 while (sock
!= NULL
) {
3820 xmlTextWriterStartElement(writer
, ISC_XMLCHAR
"socket");
3822 xmlTextWriterStartElement(writer
, ISC_XMLCHAR
"id");
3823 xmlTextWriterWriteFormatString(writer
, "%p", sock
);
3824 xmlTextWriterEndElement(writer
);
3826 if (sock
->name
[0] != 0) {
3827 xmlTextWriterStartElement(writer
, ISC_XMLCHAR
"name");
3828 xmlTextWriterWriteFormatString(writer
, "%s",
3830 xmlTextWriterEndElement(writer
); /* name */
3833 xmlTextWriterStartElement(writer
, ISC_XMLCHAR
"references");
3834 xmlTextWriterWriteFormatString(writer
, "%d", sock
->references
);
3835 xmlTextWriterEndElement(writer
);
3837 xmlTextWriterWriteElement(writer
, ISC_XMLCHAR
"type",
3838 ISC_XMLCHAR
_socktype(sock
->type
));
3840 if (sock
->connected
) {
3841 isc_sockaddr_format(&sock
->address
, peerbuf
,
3843 xmlTextWriterWriteElement(writer
,
3844 ISC_XMLCHAR
"peer-address",
3845 ISC_XMLCHAR peerbuf
);
3849 if (getsockname(sock
->fd
, &addr
.type
.sa
, (void *)&len
) == 0) {
3850 isc_sockaddr_format(&addr
, peerbuf
, sizeof(peerbuf
));
3851 xmlTextWriterWriteElement(writer
,
3852 ISC_XMLCHAR
"local-address",
3853 ISC_XMLCHAR peerbuf
);
3856 xmlTextWriterStartElement(writer
, ISC_XMLCHAR
"states");
3857 if (sock
->pending_recv
)
3858 xmlTextWriterWriteElement(writer
, ISC_XMLCHAR
"state",
3859 ISC_XMLCHAR
"pending-receive");
3860 if (sock
->pending_send
)
3861 xmlTextWriterWriteElement(writer
, ISC_XMLCHAR
"state",
3862 ISC_XMLCHAR
"pending-send");
3863 if (sock
->pending_accept
)
3864 xmlTextWriterWriteElement(writer
, ISC_XMLCHAR
"state",
3865 ISC_XMLCHAR
"pending_accept");
3867 xmlTextWriterWriteElement(writer
, ISC_XMLCHAR
"state",
3868 ISC_XMLCHAR
"listener");
3869 if (sock
->connected
)
3870 xmlTextWriterWriteElement(writer
, ISC_XMLCHAR
"state",
3871 ISC_XMLCHAR
"connected");
3872 if (sock
->pending_connect
)
3873 xmlTextWriterWriteElement(writer
, ISC_XMLCHAR
"state",
3874 ISC_XMLCHAR
"connecting");
3876 xmlTextWriterWriteElement(writer
, ISC_XMLCHAR
"state",
3877 ISC_XMLCHAR
"bound");
3879 xmlTextWriterEndElement(writer
); /* states */
3881 xmlTextWriterEndElement(writer
); /* socket */
3883 UNLOCK(&sock
->lock
);
3884 sock
= ISC_LIST_NEXT(sock
, link
);
3886 xmlTextWriterEndElement(writer
); /* sockets */
3890 #endif /* HAVE_LIBXML2 */