4 * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
5 * Copyright (C) 1999-2003 Internet Software Consortium.
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
20 /* Id: dispatch.c,v 1.168 2009/12/02 23:15:14 marka Exp */
27 #include <sys/types.h>
31 #include <isc/entropy.h>
33 #include <isc/mutex.h>
34 #include <isc/portset.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/stats.h>
38 #include <isc/string.h>
44 #include <dns/dispatch.h>
45 #include <dns/events.h>
47 #include <dns/message.h>
48 #include <dns/portlist.h>
49 #include <dns/stats.h>
50 #include <dns/tcpmsg.h>
51 #include <dns/types.h>
53 typedef ISC_LIST(dns_dispentry_t
) dns_displist_t
;
55 typedef struct dispsocket dispsocket_t
;
56 typedef ISC_LIST(dispsocket_t
) dispsocketlist_t
;
58 typedef struct dispportentry dispportentry_t
;
59 typedef ISC_LIST(dispportentry_t
) dispportlist_t
;
61 /* ARC4 Random generator state */
62 typedef struct arc4ctx
{
67 isc_entropy_t
*entropy
; /*%< entropy source for ARC4 */
71 typedef struct dns_qid
{
73 unsigned int qid_nbuckets
; /*%< hash table size */
74 unsigned int qid_increment
; /*%< id increment on collision */
76 dns_displist_t
*qid_table
; /*%< the table itself */
77 dispsocketlist_t
*sock_table
; /*%< socket table */
80 struct dns_dispatchmgr
{
85 dns_portlist_t
*portlist
;
87 isc_entropy_t
*entropy
; /*%< entropy source */
89 /* Locked by "lock". */
92 ISC_LIST(dns_dispatch_t
) list
;
94 /* Locked by arc4_lock. */
95 isc_mutex_t arc4_lock
;
96 arc4ctx_t arc4ctx
; /*%< ARC4 context for QID */
98 /* locked by buffer lock */
100 isc_mutex_t buffer_lock
;
101 unsigned int buffers
; /*%< allocated buffers */
102 unsigned int buffersize
; /*%< size of each buffer */
103 unsigned int maxbuffers
; /*%< max buffers */
105 /* Locked internally. */
106 isc_mutex_t pool_lock
;
107 isc_mempool_t
*epool
; /*%< memory pool for events */
108 isc_mempool_t
*rpool
; /*%< memory pool for replies */
109 isc_mempool_t
*dpool
; /*%< dispatch allocations */
110 isc_mempool_t
*bpool
; /*%< memory pool for buffers */
111 isc_mempool_t
*spool
; /*%< memory pool for dispsocs */
114 * Locked by qid->lock if qid exists; otherwise, can be used without
116 * Memory footprint considerations: this is a simple implementation of
117 * available ports, i.e., an ordered array of the actual port numbers.
118 * This will require about 256KB of memory in the worst case (128KB for
119 * each of IPv4 and IPv6). We could reduce it by representing it as a
120 * more sophisticated way such as a list (or array) of ranges that are
121 * searched to identify a specific port. Our decision here is the saved
122 * memory isn't worth the implementation complexity, considering the
123 * fact that the whole BIND9 process (which is mainly named) already
124 * requires a pretty large memory footprint. We may, however, have to
125 * revisit the decision when we want to use it as a separate module for
126 * an environment where memory requirement is severer.
128 in_port_t
*v4ports
; /*%< available ports for IPv4 */
129 unsigned int nv4ports
; /*%< # of available ports for IPv4 */
130 in_port_t
*v6ports
; /*%< available ports for IPv4 */
131 unsigned int nv6ports
; /*%< # of available ports for IPv4 */
134 #define MGR_SHUTTINGDOWN 0x00000001U
135 #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
137 #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
139 struct dns_dispentry
{
141 dns_dispatch_t
*disp
;
147 isc_taskaction_t action
;
149 isc_boolean_t item_out
;
150 dispsocket_t
*dispsocket
;
151 ISC_LIST(dns_dispatchevent_t
) items
;
152 ISC_LINK(dns_dispentry_t
) link
;
156 * Maximum number of dispatch sockets that can be pooled for reuse. The
157 * appropriate value may vary, but experiments have shown a busy caching server
158 * may need more than 1000 sockets concurrently opened. The maximum allowable
159 * number of dispatch sockets (per manager) will be set to the double of this
162 #ifndef DNS_DISPATCH_POOLSOCKS
163 #define DNS_DISPATCH_POOLSOCKS 2048
167 * Quota to control the number of dispatch sockets. If a dispatch has more
168 * than the quota of sockets, new queries will purge oldest ones, so that
169 * a massive number of outstanding queries won't prevent subsequent queries
170 * (especially if the older ones take longer time and result in timeout).
172 #ifndef DNS_DISPATCH_SOCKSQUOTA
173 #define DNS_DISPATCH_SOCKSQUOTA 3072
178 isc_socket_t
*socket
;
179 dns_dispatch_t
*disp
;
181 in_port_t localport
; /* XXX: should be removed later */
182 dispportentry_t
*portentry
;
183 dns_dispentry_t
*resp
;
185 ISC_LINK(dispsocket_t
) link
;
187 ISC_LINK(dispsocket_t
) blink
;
191 * A port table entry. We remember every port we first open in a table with a
192 * reference counter so that we can 'reuse' the same port (with different
193 * destination addresses) using the SO_REUSEADDR socket option.
195 struct dispportentry
{
198 ISC_LINK(struct dispportentry
) link
;
201 #ifndef DNS_DISPATCH_PORTTABLESIZE
202 #define DNS_DISPATCH_PORTTABLESIZE 1024
205 #define INVALID_BUCKET (0xffffdead)
208 * Number of tasks for each dispatch that use separate sockets for different
209 * transactions. This must be a power of 2 as it will divide 32 bit numbers
210 * to get an uniformly random tasks selection. See get_dispsocket().
212 #define MAX_INTERNAL_TASKS 64
214 struct dns_dispatch
{
216 unsigned int magic
; /*%< magic */
217 dns_dispatchmgr_t
*mgr
; /*%< dispatch manager */
220 * internal task buckets. We use multiple tasks to distribute various
221 * socket events well when using separate dispatch sockets. We use the
222 * 1st task (task[0]) for internal control events.
224 isc_task_t
*task
[MAX_INTERNAL_TASKS
];
225 isc_socket_t
*socket
; /*%< isc socket attached to */
226 isc_sockaddr_t local
; /*%< local address */
227 in_port_t localport
; /*%< local UDP port */
228 unsigned int maxrequests
; /*%< max requests */
229 isc_event_t
*ctlevent
;
231 /*% Locked by mgr->lock. */
232 ISC_LINK(dns_dispatch_t
) link
;
234 /* Locked by "lock". */
235 isc_mutex_t lock
; /*%< locks all below */
236 isc_sockettype_t socktype
;
237 unsigned int attributes
;
238 unsigned int refcount
; /*%< number of users */
239 dns_dispatchevent_t
*failsafe_ev
; /*%< failsafe cancel event */
240 unsigned int shutting_down
: 1,
244 recv_pending
: 1; /*%< is a recv() pending? */
245 isc_result_t shutdown_why
;
246 ISC_LIST(dispsocket_t
) activesockets
;
247 ISC_LIST(dispsocket_t
) inactivesockets
;
248 unsigned int nsockets
;
249 unsigned int requests
; /*%< how many requests we have */
250 unsigned int tcpbuffers
; /*%< allocated buffers */
251 dns_tcpmsg_t tcpmsg
; /*%< for tcp streams */
253 arc4ctx_t arc4ctx
; /*%< for QID/UDP port num */
254 dispportlist_t
*port_table
; /*%< hold ports 'owned' by us */
255 isc_mempool_t
*portpool
; /*%< port table entries */
258 #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
259 #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
261 #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
262 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
264 #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c')
265 #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
267 #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
268 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
270 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
271 #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
273 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
274 (disp)->qid : (disp)->mgr->qid
275 #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
276 (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
279 * Locking a query port buffer is a bit tricky. We access the buffer without
280 * locking until qid is created. Technically, there is a possibility of race
281 * between the creation of qid and access to the port buffer; in practice,
282 * however, this should be safe because qid isn't created until the first
283 * dispatch is created and there should be no contending situation until then.
285 #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
286 #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
291 static dns_dispentry_t
*entry_search(dns_qid_t
*, isc_sockaddr_t
*,
292 dns_messageid_t
, in_port_t
, unsigned int);
293 static isc_boolean_t
destroy_disp_ok(dns_dispatch_t
*);
294 static void destroy_disp(isc_task_t
*task
, isc_event_t
*event
);
295 static void destroy_dispsocket(dns_dispatch_t
*, dispsocket_t
**);
296 static void deactivate_dispsocket(dns_dispatch_t
*, dispsocket_t
*);
297 static void udp_exrecv(isc_task_t
*, isc_event_t
*);
298 static void udp_shrecv(isc_task_t
*, isc_event_t
*);
299 static void udp_recv(isc_event_t
*, dns_dispatch_t
*, dispsocket_t
*);
300 static void tcp_recv(isc_task_t
*, isc_event_t
*);
301 static isc_result_t
startrecv(dns_dispatch_t
*, dispsocket_t
*);
302 static isc_uint32_t
dns_hash(dns_qid_t
*, isc_sockaddr_t
*, dns_messageid_t
,
304 static void free_buffer(dns_dispatch_t
*disp
, void *buf
, unsigned int len
);
305 static void *allocate_udp_buffer(dns_dispatch_t
*disp
);
306 static inline void free_event(dns_dispatch_t
*disp
, dns_dispatchevent_t
*ev
);
307 static inline dns_dispatchevent_t
*allocate_event(dns_dispatch_t
*disp
);
308 static void do_cancel(dns_dispatch_t
*disp
);
309 static dns_dispentry_t
*linear_first(dns_qid_t
*disp
);
310 static dns_dispentry_t
*linear_next(dns_qid_t
*disp
,
311 dns_dispentry_t
*resp
);
312 static void dispatch_free(dns_dispatch_t
**dispp
);
313 static isc_result_t
get_udpsocket(dns_dispatchmgr_t
*mgr
,
314 dns_dispatch_t
*disp
,
315 isc_socketmgr_t
*sockmgr
,
316 isc_sockaddr_t
*localaddr
,
317 isc_socket_t
**sockp
);
318 static isc_result_t
dispatch_createudp(dns_dispatchmgr_t
*mgr
,
319 isc_socketmgr_t
*sockmgr
,
320 isc_taskmgr_t
*taskmgr
,
321 isc_sockaddr_t
*localaddr
,
322 unsigned int maxrequests
,
323 unsigned int attributes
,
324 dns_dispatch_t
**dispp
);
325 static isc_boolean_t
destroy_mgr_ok(dns_dispatchmgr_t
*mgr
);
326 static void destroy_mgr(dns_dispatchmgr_t
**mgrp
);
327 static isc_result_t
qid_allocate(dns_dispatchmgr_t
*mgr
, unsigned int buckets
,
328 unsigned int increment
, dns_qid_t
**qidp
,
329 isc_boolean_t needaddrtable
);
330 static void qid_destroy(isc_mem_t
*mctx
, dns_qid_t
**qidp
);
331 static isc_result_t
open_socket(isc_socketmgr_t
*mgr
, isc_sockaddr_t
*local
,
332 unsigned int options
, isc_socket_t
**sockp
);
333 static isc_boolean_t
portavailable(dns_dispatchmgr_t
*mgr
, isc_socket_t
*sock
,
334 isc_sockaddr_t
*sockaddrp
);
336 #define LVL(x) ISC_LOG_DEBUG(x)
339 mgr_log(dns_dispatchmgr_t
*mgr
, int level
, const char *fmt
, ...)
340 ISC_FORMAT_PRINTF(3, 4);
343 mgr_log(dns_dispatchmgr_t
*mgr
, int level
, const char *fmt
, ...) {
347 if (! isc_log_wouldlog(dns_lctx
, level
))
351 vsnprintf(msgbuf
, sizeof(msgbuf
), fmt
, ap
);
354 isc_log_write(dns_lctx
,
355 DNS_LOGCATEGORY_DISPATCH
, DNS_LOGMODULE_DISPATCH
,
356 level
, "dispatchmgr %p: %s", mgr
, msgbuf
);
360 inc_stats(dns_dispatchmgr_t
*mgr
, isc_statscounter_t counter
) {
361 if (mgr
->stats
!= NULL
)
362 isc_stats_increment(mgr
->stats
, counter
);
366 dispatch_log(dns_dispatch_t
*disp
, int level
, const char *fmt
, ...)
367 ISC_FORMAT_PRINTF(3, 4);
370 dispatch_log(dns_dispatch_t
*disp
, int level
, const char *fmt
, ...) {
374 if (! isc_log_wouldlog(dns_lctx
, level
))
378 vsnprintf(msgbuf
, sizeof(msgbuf
), fmt
, ap
);
381 isc_log_write(dns_lctx
,
382 DNS_LOGCATEGORY_DISPATCH
, DNS_LOGMODULE_DISPATCH
,
383 level
, "dispatch %p: %s", disp
, msgbuf
);
387 request_log(dns_dispatch_t
*disp
, dns_dispentry_t
*resp
,
388 int level
, const char *fmt
, ...)
389 ISC_FORMAT_PRINTF(4, 5);
392 request_log(dns_dispatch_t
*disp
, dns_dispentry_t
*resp
,
393 int level
, const char *fmt
, ...)
399 if (! isc_log_wouldlog(dns_lctx
, level
))
403 vsnprintf(msgbuf
, sizeof(msgbuf
), fmt
, ap
);
406 if (VALID_RESPONSE(resp
)) {
407 isc_sockaddr_format(&resp
->host
, peerbuf
, sizeof(peerbuf
));
408 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_DISPATCH
,
409 DNS_LOGMODULE_DISPATCH
, level
,
410 "dispatch %p response %p %s: %s", disp
, resp
,
413 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_DISPATCH
,
414 DNS_LOGMODULE_DISPATCH
, level
,
415 "dispatch %p req/resp %p: %s", disp
, resp
,
421 * ARC4 random number generator derived from OpenBSD.
422 * Only dispatch_random() and dispatch_uniformrandom() are expected
423 * to be called from general dispatch routines; the rest of them are subroutines
426 * The original copyright follows:
427 * Copyright (c) 1996, David Mazieres <dm@uun.org>
428 * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
430 * Permission to use, copy, modify, and distribute this software for any
431 * purpose with or without fee is hereby granted, provided that the above
432 * copyright notice and this permission notice appear in all copies.
434 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
435 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
436 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
437 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
438 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
439 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
440 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
444 dispatch_initrandom(arc4ctx_t
*actx
, isc_entropy_t
*entropy
,
448 for (n
= 0; n
< 256; n
++)
453 actx
->entropy
= entropy
; /* don't have to attach */
458 dispatch_arc4addrandom(arc4ctx_t
*actx
, unsigned char *dat
, int datlen
) {
463 for (n
= 0; n
< 256; n
++) {
464 actx
->i
= (actx
->i
+ 1);
465 si
= actx
->s
[actx
->i
];
466 actx
->j
= (actx
->j
+ si
+ dat
[n
% datlen
]);
467 actx
->s
[actx
->i
] = actx
->s
[actx
->j
];
468 actx
->s
[actx
->j
] = si
;
473 static inline isc_uint8_t
474 dispatch_arc4get8(arc4ctx_t
*actx
) {
477 actx
->i
= (actx
->i
+ 1);
478 si
= actx
->s
[actx
->i
];
479 actx
->j
= (actx
->j
+ si
);
480 sj
= actx
->s
[actx
->j
];
481 actx
->s
[actx
->i
] = sj
;
482 actx
->s
[actx
->j
] = si
;
484 return (actx
->s
[(si
+ sj
) & 0xff]);
487 static inline isc_uint16_t
488 dispatch_arc4get16(arc4ctx_t
*actx
) {
491 val
= dispatch_arc4get8(actx
) << 8;
492 val
|= dispatch_arc4get8(actx
);
498 dispatch_arc4stir(arc4ctx_t
*actx
) {
501 unsigned char rnd
[128];
502 isc_uint32_t rnd32
[32];
506 if (actx
->entropy
!= NULL
) {
508 * We accept any quality of random data to avoid blocking.
510 result
= isc_entropy_getdata(actx
->entropy
, rnd
.rnd
,
511 sizeof(rnd
), NULL
, 0);
512 RUNTIME_CHECK(result
== ISC_R_SUCCESS
);
514 for (i
= 0; i
< 32; i
++)
515 isc_random_get(&rnd
.rnd32
[i
]);
517 dispatch_arc4addrandom(actx
, rnd
.rnd
, sizeof(rnd
.rnd
));
520 * Discard early keystream, as per recommendations in:
521 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
523 for (i
= 0; i
< 256; i
++)
524 (void)dispatch_arc4get8(actx
);
527 * Derived from OpenBSD's implementation. The rationale is not clear,
528 * but should be conservative enough in safety, and reasonably large
531 actx
->count
= 1600000;
535 dispatch_random(arc4ctx_t
*actx
) {
538 if (actx
->lock
!= NULL
)
541 actx
->count
-= sizeof(isc_uint16_t
);
542 if (actx
->count
<= 0)
543 dispatch_arc4stir(actx
);
544 result
= dispatch_arc4get16(actx
);
546 if (actx
->lock
!= NULL
)
553 * For general purpose library, we don't have to be too strict about the
554 * quality of random values. Performance doesn't matter much, either.
555 * So we simply use the isc_random module to keep the library as small as
560 dispatch_initrandom(arc4ctx_t
*actx
, isc_entropy_t
*entropy
,
571 dispatch_random(arc4ctx_t
*actx
) {
582 dispatch_uniformrandom(arc4ctx_t
*actx
, isc_uint16_t upper_bound
) {
589 * Ensure the range of random numbers [min, 0xffff] be a multiple of
590 * upper_bound and contain at least a half of the 16 bit range.
593 if (upper_bound
> 0x8000)
594 min
= 1 + ~upper_bound
; /* 0x8000 - upper_bound */
596 min
= (isc_uint16_t
)(0x10000 % (isc_uint32_t
)upper_bound
);
599 * This could theoretically loop forever but each retry has
600 * p > 0.5 (worst case, usually far better) of selecting a
601 * number inside the range we need, so it should rarely need
605 r
= dispatch_random(actx
);
610 return (r
% upper_bound
);
614 * Return a hash of the destination and message id.
617 dns_hash(dns_qid_t
*qid
, isc_sockaddr_t
*dest
, dns_messageid_t id
,
622 ret
= isc_sockaddr_hash(dest
, ISC_TRUE
);
623 ret
^= (id
<< 16) | port
;
624 ret
%= qid
->qid_nbuckets
;
626 INSIST(ret
< qid
->qid_nbuckets
);
632 * Find the first entry in 'qid'. Returns NULL if there are no entries.
634 static dns_dispentry_t
*
635 linear_first(dns_qid_t
*qid
) {
636 dns_dispentry_t
*ret
;
641 while (bucket
< qid
->qid_nbuckets
) {
642 ret
= ISC_LIST_HEAD(qid
->qid_table
[bucket
]);
652 * Find the next entry after 'resp' in 'qid'. Return NULL if there are
655 static dns_dispentry_t
*
656 linear_next(dns_qid_t
*qid
, dns_dispentry_t
*resp
) {
657 dns_dispentry_t
*ret
;
660 ret
= ISC_LIST_NEXT(resp
, link
);
664 bucket
= resp
->bucket
;
666 while (bucket
< qid
->qid_nbuckets
) {
667 ret
= ISC_LIST_HEAD(qid
->qid_table
[bucket
]);
677 * The dispatch must be locked.
680 destroy_disp_ok(dns_dispatch_t
*disp
)
682 if (disp
->refcount
!= 0)
685 if (disp
->recv_pending
!= 0)
688 if (!ISC_LIST_EMPTY(disp
->activesockets
))
691 if (disp
->shutting_down
== 0)
698 * Called when refcount reaches 0 (and safe to destroy).
700 * The dispatcher must not be locked.
701 * The manager must be locked.
704 destroy_disp(isc_task_t
*task
, isc_event_t
*event
) {
705 dns_dispatch_t
*disp
;
706 dns_dispatchmgr_t
*mgr
;
707 isc_boolean_t killmgr
;
708 dispsocket_t
*dispsocket
;
711 INSIST(event
->ev_type
== DNS_EVENT_DISPATCHCONTROL
);
715 disp
= event
->ev_arg
;
719 ISC_LIST_UNLINK(mgr
->list
, disp
, link
);
721 dispatch_log(disp
, LVL(90),
722 "shutting down; detaching from sock %p, task %p",
723 disp
->socket
, disp
->task
[0]); /* XXXX */
725 if (disp
->socket
!= NULL
)
726 isc_socket_detach(&disp
->socket
);
727 while ((dispsocket
= ISC_LIST_HEAD(disp
->inactivesockets
)) != NULL
) {
728 ISC_LIST_UNLINK(disp
->inactivesockets
, dispsocket
, link
);
729 destroy_dispsocket(disp
, &dispsocket
);
731 for (i
= 0; i
< disp
->ntasks
; i
++)
732 isc_task_detach(&disp
->task
[i
]);
733 isc_event_free(&event
);
735 dispatch_free(&disp
);
737 killmgr
= destroy_mgr_ok(mgr
);
744 * Manipulate port table per dispatch: find an entry for a given port number,
745 * create a new entry, and decrement a given entry with possible clean-up.
747 static dispportentry_t
*
748 port_search(dns_dispatch_t
*disp
, in_port_t port
) {
749 dispportentry_t
*portentry
;
751 REQUIRE(disp
->port_table
!= NULL
);
753 portentry
= ISC_LIST_HEAD(disp
->port_table
[port
%
754 DNS_DISPATCH_PORTTABLESIZE
]);
755 while (portentry
!= NULL
) {
756 if (portentry
->port
== port
)
758 portentry
= ISC_LIST_NEXT(portentry
, link
);
764 static dispportentry_t
*
765 new_portentry(dns_dispatch_t
*disp
, in_port_t port
) {
766 dispportentry_t
*portentry
;
768 REQUIRE(disp
->port_table
!= NULL
);
770 portentry
= isc_mempool_get(disp
->portpool
);
771 if (portentry
== NULL
)
774 portentry
->port
= port
;
776 ISC_LINK_INIT(portentry
, link
);
777 ISC_LIST_APPEND(disp
->port_table
[port
% DNS_DISPATCH_PORTTABLESIZE
],
784 * The caller must not hold the qid->lock.
787 deref_portentry(dns_dispatch_t
*disp
, dispportentry_t
**portentryp
) {
788 dispportentry_t
*portentry
= *portentryp
;
791 REQUIRE(disp
->port_table
!= NULL
);
792 REQUIRE(portentry
!= NULL
&& portentry
->refs
> 0);
797 if (portentry
->refs
== 0) {
798 ISC_LIST_UNLINK(disp
->port_table
[portentry
->port
%
799 DNS_DISPATCH_PORTTABLESIZE
],
801 isc_mempool_put(disp
->portpool
, portentry
);
809 * Find a dispsocket for socket address 'dest', and port number 'port'.
810 * Return NULL if no such entry exists.
812 static dispsocket_t
*
813 socket_search(dns_qid_t
*qid
, isc_sockaddr_t
*dest
, in_port_t port
,
816 dispsocket_t
*dispsock
;
818 REQUIRE(bucket
< qid
->qid_nbuckets
);
820 dispsock
= ISC_LIST_HEAD(qid
->sock_table
[bucket
]);
822 while (dispsock
!= NULL
) {
823 if (dispsock
->portentry
!= NULL
&&
824 dispsock
->portentry
->port
== port
&&
825 isc_sockaddr_equal(dest
, &dispsock
->host
))
827 dispsock
= ISC_LIST_NEXT(dispsock
, blink
);
834 * Make a new socket for a single dispatch with a random port number.
835 * The caller must hold the disp->lock and qid->lock.
838 get_dispsocket(dns_dispatch_t
*disp
, isc_sockaddr_t
*dest
,
839 isc_socketmgr_t
*sockmgr
, dns_qid_t
*qid
,
840 dispsocket_t
**dispsockp
, in_port_t
*portp
)
844 dns_dispatchmgr_t
*mgr
= disp
->mgr
;
845 isc_socket_t
*sock
= NULL
;
846 isc_result_t result
= ISC_R_FAILURE
;
848 isc_sockaddr_t localaddr
;
849 unsigned int bucket
= 0;
850 dispsocket_t
*dispsock
;
853 unsigned int bindoptions
;
854 dispportentry_t
*portentry
= NULL
;
856 if (isc_sockaddr_pf(&disp
->local
) == AF_INET
) {
857 nports
= disp
->mgr
->nv4ports
;
858 ports
= disp
->mgr
->v4ports
;
860 nports
= disp
->mgr
->nv6ports
;
861 ports
= disp
->mgr
->v6ports
;
864 return (ISC_R_ADDRNOTAVAIL
);
866 dispsock
= ISC_LIST_HEAD(disp
->inactivesockets
);
867 if (dispsock
!= NULL
) {
868 ISC_LIST_UNLINK(disp
->inactivesockets
, dispsock
, link
);
869 sock
= dispsock
->socket
;
870 dispsock
->socket
= NULL
;
872 dispsock
= isc_mempool_get(mgr
->spool
);
873 if (dispsock
== NULL
)
874 return (ISC_R_NOMEMORY
);
877 dispsock
->socket
= NULL
;
878 dispsock
->disp
= disp
;
879 dispsock
->resp
= NULL
;
880 dispsock
->portentry
= NULL
;
882 dispsock
->task
= NULL
;
883 isc_task_attach(disp
->task
[r
% disp
->ntasks
], &dispsock
->task
);
884 ISC_LINK_INIT(dispsock
, link
);
885 ISC_LINK_INIT(dispsock
, blink
);
886 dispsock
->magic
= DISPSOCK_MAGIC
;
890 * Pick up a random UDP port and open a new socket with it. Avoid
891 * choosing ports that share the same destination because it will be
892 * very likely to fail in bind(2) or connect(2).
894 localaddr
= disp
->local
;
895 for (i
= 0; i
< 64; i
++) {
896 port
= ports
[dispatch_uniformrandom(DISP_ARC4CTX(disp
),
898 isc_sockaddr_setport(&localaddr
, port
);
900 bucket
= dns_hash(qid
, dest
, 0, port
);
901 if (socket_search(qid
, dest
, port
, bucket
) != NULL
)
904 portentry
= port_search(disp
, port
);
905 if (portentry
!= NULL
)
906 bindoptions
|= ISC_SOCKET_REUSEADDRESS
;
907 result
= open_socket(sockmgr
, &localaddr
, bindoptions
, &sock
);
908 if (result
== ISC_R_SUCCESS
) {
909 if (portentry
== NULL
) {
910 portentry
= new_portentry(disp
, port
);
911 if (portentry
== NULL
) {
912 result
= ISC_R_NOMEMORY
;
918 } else if (result
!= ISC_R_ADDRINUSE
)
922 if (result
== ISC_R_SUCCESS
) {
923 dispsock
->socket
= sock
;
924 dispsock
->host
= *dest
;
925 dispsock
->portentry
= portentry
;
926 dispsock
->bucket
= bucket
;
927 ISC_LIST_APPEND(qid
->sock_table
[bucket
], dispsock
, blink
);
928 *dispsockp
= dispsock
;
932 * We could keep it in the inactive list, but since this should
933 * be an exceptional case and might be resource shortage, we'd
937 isc_socket_detach(&sock
);
938 destroy_dispsocket(disp
, &dispsock
);
945 * Destroy a dedicated dispatch socket.
948 destroy_dispsocket(dns_dispatch_t
*disp
, dispsocket_t
**dispsockp
) {
949 dispsocket_t
*dispsock
;
953 * The dispatch must be locked.
956 REQUIRE(dispsockp
!= NULL
&& *dispsockp
!= NULL
);
957 dispsock
= *dispsockp
;
958 REQUIRE(!ISC_LINK_LINKED(dispsock
, link
));
962 if (dispsock
->portentry
!= NULL
)
963 deref_portentry(disp
, &dispsock
->portentry
);
964 if (dispsock
->socket
!= NULL
)
965 isc_socket_detach(&dispsock
->socket
);
966 if (ISC_LINK_LINKED(dispsock
, blink
)) {
969 ISC_LIST_UNLINK(qid
->sock_table
[dispsock
->bucket
], dispsock
,
973 if (dispsock
->task
!= NULL
)
974 isc_task_detach(&dispsock
->task
);
975 isc_mempool_put(disp
->mgr
->spool
, dispsock
);
981 * Deactivate a dedicated dispatch socket. Move it to the inactive list for
982 * future reuse unless the total number of sockets are exceeding the maximum.
985 deactivate_dispsocket(dns_dispatch_t
*disp
, dispsocket_t
*dispsock
) {
990 * The dispatch must be locked.
992 ISC_LIST_UNLINK(disp
->activesockets
, dispsock
, link
);
993 if (dispsock
->resp
!= NULL
) {
994 INSIST(dispsock
->resp
->dispsocket
== dispsock
);
995 dispsock
->resp
->dispsocket
= NULL
;
998 INSIST(dispsock
->portentry
!= NULL
);
999 deref_portentry(disp
, &dispsock
->portentry
);
1002 if (disp
->nsockets
> DNS_DISPATCH_POOLSOCKS
)
1003 destroy_dispsocket(disp
, &dispsock
);
1005 result
= isc_socket_close(dispsock
->socket
);
1007 qid
= DNS_QID(disp
);
1009 ISC_LIST_UNLINK(qid
->sock_table
[dispsock
->bucket
], dispsock
,
1013 if (result
== ISC_R_SUCCESS
)
1014 ISC_LIST_APPEND(disp
->inactivesockets
, dispsock
, link
);
1017 * If the underlying system does not allow this
1018 * optimization, destroy this temporary structure (and
1019 * create a new one for a new transaction).
1021 INSIST(result
== ISC_R_NOTIMPLEMENTED
);
1022 destroy_dispsocket(disp
, &dispsock
);
1026 /* This kind of optimization isn't necessary for normal use */
1030 destroy_dispsocket(disp
, &dispsock
);
1035 * Find an entry for query ID 'id', socket address 'dest', and port number
1037 * Return NULL if no such entry exists.
1039 static dns_dispentry_t
*
1040 entry_search(dns_qid_t
*qid
, isc_sockaddr_t
*dest
, dns_messageid_t id
,
1041 in_port_t port
, unsigned int bucket
)
1043 dns_dispentry_t
*res
;
1045 REQUIRE(bucket
< qid
->qid_nbuckets
);
1047 res
= ISC_LIST_HEAD(qid
->qid_table
[bucket
]);
1049 while (res
!= NULL
) {
1050 if (res
->id
== id
&& isc_sockaddr_equal(dest
, &res
->host
) &&
1051 res
->port
== port
) {
1054 res
= ISC_LIST_NEXT(res
, link
);
1061 free_buffer(dns_dispatch_t
*disp
, void *buf
, unsigned int len
) {
1062 INSIST(buf
!= NULL
&& len
!= 0);
1065 switch (disp
->socktype
) {
1066 case isc_sockettype_tcp
:
1067 INSIST(disp
->tcpbuffers
> 0);
1069 isc_mem_put(disp
->mgr
->mctx
, buf
, len
);
1071 case isc_sockettype_udp
:
1072 LOCK(&disp
->mgr
->buffer_lock
);
1073 INSIST(disp
->mgr
->buffers
> 0);
1074 INSIST(len
== disp
->mgr
->buffersize
);
1075 disp
->mgr
->buffers
--;
1076 isc_mempool_put(disp
->mgr
->bpool
, buf
);
1077 UNLOCK(&disp
->mgr
->buffer_lock
);
1086 allocate_udp_buffer(dns_dispatch_t
*disp
) {
1089 LOCK(&disp
->mgr
->buffer_lock
);
1090 temp
= isc_mempool_get(disp
->mgr
->bpool
);
1093 disp
->mgr
->buffers
++;
1094 UNLOCK(&disp
->mgr
->buffer_lock
);
1100 free_event(dns_dispatch_t
*disp
, dns_dispatchevent_t
*ev
) {
1101 if (disp
->failsafe_ev
== ev
) {
1102 INSIST(disp
->shutdown_out
== 1);
1103 disp
->shutdown_out
= 0;
1108 isc_mempool_put(disp
->mgr
->epool
, ev
);
1111 static inline dns_dispatchevent_t
*
1112 allocate_event(dns_dispatch_t
*disp
) {
1113 dns_dispatchevent_t
*ev
;
1115 ev
= isc_mempool_get(disp
->mgr
->epool
);
1118 ISC_EVENT_INIT(ev
, sizeof(*ev
), 0, NULL
, 0,
1119 NULL
, NULL
, NULL
, NULL
, NULL
);
1125 udp_exrecv(isc_task_t
*task
, isc_event_t
*ev
) {
1126 dispsocket_t
*dispsock
= ev
->ev_arg
;
1130 REQUIRE(VALID_DISPSOCK(dispsock
));
1131 udp_recv(ev
, dispsock
->disp
, dispsock
);
1135 udp_shrecv(isc_task_t
*task
, isc_event_t
*ev
) {
1136 dns_dispatch_t
*disp
= ev
->ev_arg
;
1140 REQUIRE(VALID_DISPATCH(disp
));
1141 udp_recv(ev
, disp
, NULL
);
1147 * If I/O result == CANCELED or error, free the buffer.
1149 * If query, free the buffer, restart.
1152 * Allocate event, fill in details.
1153 * If cannot allocate, free buffer, restart.
1154 * find target. If not found, free buffer, restart.
1155 * if event queue is not empty, queue. else, send.
1159 udp_recv(isc_event_t
*ev_in
, dns_dispatch_t
*disp
, dispsocket_t
*dispsock
) {
1160 isc_socketevent_t
*ev
= (isc_socketevent_t
*)ev_in
;
1163 isc_buffer_t source
;
1165 dns_dispentry_t
*resp
= NULL
;
1166 dns_dispatchevent_t
*rev
;
1167 unsigned int bucket
;
1168 isc_boolean_t killit
;
1169 isc_boolean_t queue_response
;
1170 dns_dispatchmgr_t
*mgr
;
1172 isc_netaddr_t netaddr
;
1175 isc_boolean_t qidlocked
= ISC_FALSE
;
1182 dispatch_log(disp
, LVL(90),
1183 "got packet: requests %d, buffers %d, recvs %d",
1184 disp
->requests
, disp
->mgr
->buffers
, disp
->recv_pending
);
1186 if (dispsock
== NULL
&& ev
->ev_type
== ISC_SOCKEVENT_RECVDONE
) {
1188 * Unless the receive event was imported from a listening
1189 * interface, in which case the event type is
1190 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1192 INSIST(disp
->recv_pending
!= 0);
1193 disp
->recv_pending
= 0;
1196 if (dispsock
!= NULL
&&
1197 (ev
->result
== ISC_R_CANCELED
|| dispsock
->resp
== NULL
)) {
1199 * dispsock->resp can be NULL if this transaction was canceled
1200 * just after receiving a response. Since this socket is
1201 * exclusively used and there should be at most one receive
1202 * event the canceled event should have been no effect. So
1203 * we can (and should) deactivate the socket right now.
1205 deactivate_dispsocket(disp
, dispsock
);
1209 if (disp
->shutting_down
) {
1211 * This dispatcher is shutting down.
1213 free_buffer(disp
, ev
->region
.base
, ev
->region
.length
);
1215 isc_event_free(&ev_in
);
1218 killit
= destroy_disp_ok(disp
);
1219 UNLOCK(&disp
->lock
);
1221 isc_task_send(disp
->task
[0], &disp
->ctlevent
);
1226 if ((disp
->attributes
& DNS_DISPATCHATTR_EXCLUSIVE
) != 0) {
1227 if (dispsock
!= NULL
) {
1228 resp
= dispsock
->resp
;
1230 if (ev
->result
!= ISC_R_SUCCESS
) {
1232 * This is most likely a network error on a
1233 * connected socket. It makes no sense to
1234 * check the address or parse the packet, but it
1235 * will help to return the error to the caller.
1240 free_buffer(disp
, ev
->region
.base
, ev
->region
.length
);
1242 UNLOCK(&disp
->lock
);
1243 isc_event_free(&ev_in
);
1246 } else if (ev
->result
!= ISC_R_SUCCESS
) {
1247 free_buffer(disp
, ev
->region
.base
, ev
->region
.length
);
1249 if (ev
->result
!= ISC_R_CANCELED
)
1250 dispatch_log(disp
, ISC_LOG_ERROR
,
1251 "odd socket result in udp_recv(): %s",
1252 isc_result_totext(ev
->result
));
1254 UNLOCK(&disp
->lock
);
1255 isc_event_free(&ev_in
);
1260 * If this is from a blackholed address, drop it.
1262 isc_netaddr_fromsockaddr(&netaddr
, &ev
->address
);
1263 if (disp
->mgr
->blackhole
!= NULL
&&
1264 dns_acl_match(&netaddr
, NULL
, disp
->mgr
->blackhole
,
1265 NULL
, &match
, NULL
) == ISC_R_SUCCESS
&&
1268 if (isc_log_wouldlog(dns_lctx
, LVL(10))) {
1269 char netaddrstr
[ISC_NETADDR_FORMATSIZE
];
1270 isc_netaddr_format(&netaddr
, netaddrstr
,
1271 sizeof(netaddrstr
));
1272 dispatch_log(disp
, LVL(10),
1273 "blackholed packet from %s",
1276 free_buffer(disp
, ev
->region
.base
, ev
->region
.length
);
1281 * Peek into the buffer to see what we can see.
1283 isc_buffer_init(&source
, ev
->region
.base
, ev
->region
.length
);
1284 isc_buffer_add(&source
, ev
->n
);
1285 dres
= dns_message_peekheader(&source
, &id
, &flags
);
1286 if (dres
!= ISC_R_SUCCESS
) {
1287 free_buffer(disp
, ev
->region
.base
, ev
->region
.length
);
1288 dispatch_log(disp
, LVL(10), "got garbage packet");
1292 dispatch_log(disp
, LVL(92),
1293 "got valid DNS message header, /QR %c, id %u",
1294 ((flags
& DNS_MESSAGEFLAG_QR
) ? '1' : '0'), id
);
1297 * Look at flags. If query, drop it. If response,
1298 * look to see where it goes.
1300 queue_response
= ISC_FALSE
;
1301 if ((flags
& DNS_MESSAGEFLAG_QR
) == 0) {
1303 free_buffer(disp
, ev
->region
.base
, ev
->region
.length
);
1308 * Search for the corresponding response. If we are using an exclusive
1309 * socket, we've already identified it and we can skip the search; but
1310 * the ID and the address must match the expected ones.
1313 bucket
= dns_hash(qid
, &ev
->address
, id
, disp
->localport
);
1315 qidlocked
= ISC_TRUE
;
1316 resp
= entry_search(qid
, &ev
->address
, id
, disp
->localport
,
1318 dispatch_log(disp
, LVL(90),
1319 "search for response in bucket %d: %s",
1320 bucket
, (resp
== NULL
? "not found" : "found"));
1323 inc_stats(mgr
, dns_resstatscounter_mismatch
);
1324 free_buffer(disp
, ev
->region
.base
, ev
->region
.length
);
1327 } else if (resp
->id
!= id
|| !isc_sockaddr_equal(&ev
->address
,
1329 dispatch_log(disp
, LVL(90),
1330 "response to an exclusive socket doesn't match");
1331 inc_stats(mgr
, dns_resstatscounter_mismatch
);
1332 free_buffer(disp
, ev
->region
.base
, ev
->region
.length
);
1337 * Now that we have the original dispatch the query was sent
1338 * from check that the address and port the response was
1339 * sent to make sense.
1341 if (disp
!= resp
->disp
) {
1346 * Check that the socket types and ports match.
1348 if (disp
->socktype
!= resp
->disp
->socktype
||
1349 isc_sockaddr_getport(&disp
->local
) !=
1350 isc_sockaddr_getport(&resp
->disp
->local
)) {
1351 free_buffer(disp
, ev
->region
.base
, ev
->region
.length
);
1356 * If both dispatches are bound to an address then fail as
1357 * the addresses can't be equal (enforced by the IP stack).
1359 * Note under Linux a packet can be sent out via IPv4 socket
1360 * and the response be received via a IPv6 socket.
1362 * Requests sent out via IPv6 should always come back in
1365 if (isc_sockaddr_pf(&resp
->disp
->local
) == PF_INET6
&&
1366 isc_sockaddr_pf(&disp
->local
) != PF_INET6
) {
1367 free_buffer(disp
, ev
->region
.base
, ev
->region
.length
);
1370 isc_sockaddr_anyofpf(&a1
, isc_sockaddr_pf(&resp
->disp
->local
));
1371 isc_sockaddr_anyofpf(&a2
, isc_sockaddr_pf(&disp
->local
));
1372 if (!isc_sockaddr_eqaddr(&a1
, &resp
->disp
->local
) &&
1373 !isc_sockaddr_eqaddr(&a2
, &disp
->local
)) {
1374 free_buffer(disp
, ev
->region
.base
, ev
->region
.length
);
1380 queue_response
= resp
->item_out
;
1381 rev
= allocate_event(resp
->disp
);
1383 free_buffer(disp
, ev
->region
.base
, ev
->region
.length
);
1388 * At this point, rev contains the event we want to fill in, and
1389 * resp contains the information on the place to send it to.
1390 * Send the event off.
1392 isc_buffer_init(&rev
->buffer
, ev
->region
.base
, ev
->region
.length
);
1393 isc_buffer_add(&rev
->buffer
, ev
->n
);
1394 rev
->result
= ev
->result
;
1396 rev
->addr
= ev
->address
;
1397 rev
->pktinfo
= ev
->pktinfo
;
1398 rev
->attributes
= ev
->attributes
;
1399 if (queue_response
) {
1400 ISC_LIST_APPEND(resp
->items
, rev
, ev_link
);
1402 ISC_EVENT_INIT(rev
, sizeof(*rev
), 0, NULL
,
1404 resp
->action
, resp
->arg
, resp
, NULL
, NULL
);
1405 request_log(disp
, resp
, LVL(90),
1406 "[a] Sent event %p buffer %p len %d to task %p",
1407 rev
, rev
->buffer
.base
, rev
->buffer
.length
,
1409 resp
->item_out
= ISC_TRUE
;
1410 isc_task_send(resp
->task
, ISC_EVENT_PTR(&rev
));
1417 * Restart recv() to get the next packet.
1420 result
= startrecv(disp
, dispsock
);
1421 if (result
!= ISC_R_SUCCESS
&& dispsock
!= NULL
) {
1423 * XXX: wired. There seems to be no recovery process other than
1424 * deactivate this socket anyway (since we cannot start
1425 * receiving, we won't be able to receive a cancel event
1428 deactivate_dispsocket(disp
, dispsock
);
1430 UNLOCK(&disp
->lock
);
1432 isc_event_free(&ev_in
);
1438 * If I/O result == CANCELED, EOF, or error, notify everyone as the
1439 * various queues drain.
1441 * If query, restart.
1444 * Allocate event, fill in details.
1445 * If cannot allocate, restart.
1446 * find target. If not found, restart.
1447 * if event queue is not empty, queue. else, send.
1451 tcp_recv(isc_task_t
*task
, isc_event_t
*ev_in
) {
1452 dns_dispatch_t
*disp
= ev_in
->ev_arg
;
1453 dns_tcpmsg_t
*tcpmsg
= &disp
->tcpmsg
;
1457 dns_dispentry_t
*resp
;
1458 dns_dispatchevent_t
*rev
;
1459 unsigned int bucket
;
1460 isc_boolean_t killit
;
1461 isc_boolean_t queue_response
;
1464 char buf
[ISC_SOCKADDR_FORMATSIZE
];
1468 REQUIRE(VALID_DISPATCH(disp
));
1472 dispatch_log(disp
, LVL(90),
1473 "got TCP packet: requests %d, buffers %d, recvs %d",
1474 disp
->requests
, disp
->tcpbuffers
, disp
->recv_pending
);
1478 INSIST(disp
->recv_pending
!= 0);
1479 disp
->recv_pending
= 0;
1481 if (disp
->refcount
== 0) {
1483 * This dispatcher is shutting down. Force cancelation.
1485 tcpmsg
->result
= ISC_R_CANCELED
;
1488 if (tcpmsg
->result
!= ISC_R_SUCCESS
) {
1489 switch (tcpmsg
->result
) {
1490 case ISC_R_CANCELED
:
1494 dispatch_log(disp
, LVL(90), "shutting down on EOF");
1498 case ISC_R_CONNECTIONRESET
:
1499 level
= ISC_LOG_INFO
;
1503 level
= ISC_LOG_ERROR
;
1505 isc_sockaddr_format(&tcpmsg
->address
, buf
, sizeof(buf
));
1506 dispatch_log(disp
, level
, "shutting down due to TCP "
1507 "receive error: %s: %s", buf
,
1508 isc_result_totext(tcpmsg
->result
));
1514 * The event is statically allocated in the tcpmsg
1515 * structure, and destroy_disp() frees the tcpmsg, so we must
1516 * free the event *before* calling destroy_disp().
1518 isc_event_free(&ev_in
);
1520 disp
->shutting_down
= 1;
1521 disp
->shutdown_why
= tcpmsg
->result
;
1524 * If the recv() was canceled pass the word on.
1526 killit
= destroy_disp_ok(disp
);
1527 UNLOCK(&disp
->lock
);
1529 isc_task_send(disp
->task
[0], &disp
->ctlevent
);
1533 dispatch_log(disp
, LVL(90), "result %d, length == %d, addr = %p",
1535 tcpmsg
->buffer
.length
, tcpmsg
->buffer
.base
);
1538 * Peek into the buffer to see what we can see.
1540 dres
= dns_message_peekheader(&tcpmsg
->buffer
, &id
, &flags
);
1541 if (dres
!= ISC_R_SUCCESS
) {
1542 dispatch_log(disp
, LVL(10), "got garbage packet");
1546 dispatch_log(disp
, LVL(92),
1547 "got valid DNS message header, /QR %c, id %u",
1548 ((flags
& DNS_MESSAGEFLAG_QR
) ? '1' : '0'), id
);
1551 * Allocate an event to send to the query or response client, and
1552 * allocate a new buffer for our use.
1556 * Look at flags. If query, drop it. If response,
1557 * look to see where it goes.
1559 queue_response
= ISC_FALSE
;
1560 if ((flags
& DNS_MESSAGEFLAG_QR
) == 0) {
1570 bucket
= dns_hash(qid
, &tcpmsg
->address
, id
, disp
->localport
);
1572 resp
= entry_search(qid
, &tcpmsg
->address
, id
, disp
->localport
, bucket
);
1573 dispatch_log(disp
, LVL(90),
1574 "search for response in bucket %d: %s",
1575 bucket
, (resp
== NULL
? "not found" : "found"));
1579 queue_response
= resp
->item_out
;
1580 rev
= allocate_event(disp
);
1585 * At this point, rev contains the event we want to fill in, and
1586 * resp contains the information on the place to send it to.
1587 * Send the event off.
1589 dns_tcpmsg_keepbuffer(tcpmsg
, &rev
->buffer
);
1591 rev
->result
= ISC_R_SUCCESS
;
1593 rev
->addr
= tcpmsg
->address
;
1594 if (queue_response
) {
1595 ISC_LIST_APPEND(resp
->items
, rev
, ev_link
);
1597 ISC_EVENT_INIT(rev
, sizeof(*rev
), 0, NULL
, DNS_EVENT_DISPATCH
,
1598 resp
->action
, resp
->arg
, resp
, NULL
, NULL
);
1599 request_log(disp
, resp
, LVL(90),
1600 "[b] Sent event %p buffer %p len %d to task %p",
1601 rev
, rev
->buffer
.base
, rev
->buffer
.length
,
1603 resp
->item_out
= ISC_TRUE
;
1604 isc_task_send(resp
->task
, ISC_EVENT_PTR(&rev
));
1610 * Restart recv() to get the next packet.
1613 (void)startrecv(disp
, NULL
);
1615 UNLOCK(&disp
->lock
);
1617 isc_event_free(&ev_in
);
1621 * disp must be locked.
1624 startrecv(dns_dispatch_t
*disp
, dispsocket_t
*dispsock
) {
1626 isc_region_t region
;
1627 isc_socket_t
*socket
;
1629 if (disp
->shutting_down
== 1)
1630 return (ISC_R_SUCCESS
);
1632 if ((disp
->attributes
& DNS_DISPATCHATTR_NOLISTEN
) != 0)
1633 return (ISC_R_SUCCESS
);
1635 if (disp
->recv_pending
!= 0 && dispsock
== NULL
)
1636 return (ISC_R_SUCCESS
);
1638 if (disp
->mgr
->buffers
>= disp
->mgr
->maxbuffers
)
1639 return (ISC_R_NOMEMORY
);
1641 if ((disp
->attributes
& DNS_DISPATCHATTR_EXCLUSIVE
) != 0 &&
1643 return (ISC_R_SUCCESS
);
1645 if (dispsock
!= NULL
)
1646 socket
= dispsock
->socket
;
1648 socket
= disp
->socket
;
1649 INSIST(socket
!= NULL
);
1651 switch (disp
->socktype
) {
1653 * UDP reads are always maximal.
1655 case isc_sockettype_udp
:
1656 region
.length
= disp
->mgr
->buffersize
;
1657 region
.base
= allocate_udp_buffer(disp
);
1658 if (region
.base
== NULL
)
1659 return (ISC_R_NOMEMORY
);
1660 if (dispsock
!= NULL
) {
1661 res
= isc_socket_recv(socket
, ®ion
, 1,
1662 dispsock
->task
, udp_exrecv
,
1664 if (res
!= ISC_R_SUCCESS
) {
1665 free_buffer(disp
, region
.base
, region
.length
);
1669 res
= isc_socket_recv(socket
, ®ion
, 1,
1670 disp
->task
[0], udp_shrecv
, disp
);
1671 if (res
!= ISC_R_SUCCESS
) {
1672 free_buffer(disp
, region
.base
, region
.length
);
1673 disp
->shutdown_why
= res
;
1674 disp
->shutting_down
= 1;
1676 return (ISC_R_SUCCESS
); /* recover by cancel */
1678 INSIST(disp
->recv_pending
== 0);
1679 disp
->recv_pending
= 1;
1683 case isc_sockettype_tcp
:
1684 res
= dns_tcpmsg_readmessage(&disp
->tcpmsg
, disp
->task
[0],
1686 if (res
!= ISC_R_SUCCESS
) {
1687 disp
->shutdown_why
= res
;
1688 disp
->shutting_down
= 1;
1690 return (ISC_R_SUCCESS
); /* recover by cancel */
1692 INSIST(disp
->recv_pending
== 0);
1693 disp
->recv_pending
= 1;
1700 return (ISC_R_SUCCESS
);
1704 * Mgr must be locked when calling this function.
1706 static isc_boolean_t
1707 destroy_mgr_ok(dns_dispatchmgr_t
*mgr
) {
1708 mgr_log(mgr
, LVL(90),
1709 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1710 "epool=%d, rpool=%d, dpool=%d",
1711 MGR_IS_SHUTTINGDOWN(mgr
), !ISC_LIST_EMPTY(mgr
->list
),
1712 isc_mempool_getallocated(mgr
->epool
),
1713 isc_mempool_getallocated(mgr
->rpool
),
1714 isc_mempool_getallocated(mgr
->dpool
));
1715 if (!MGR_IS_SHUTTINGDOWN(mgr
))
1717 if (!ISC_LIST_EMPTY(mgr
->list
))
1719 if (isc_mempool_getallocated(mgr
->epool
) != 0)
1721 if (isc_mempool_getallocated(mgr
->rpool
) != 0)
1723 if (isc_mempool_getallocated(mgr
->dpool
) != 0)
1730 * Mgr must be unlocked when calling this function.
1733 destroy_mgr(dns_dispatchmgr_t
**mgrp
) {
1735 dns_dispatchmgr_t
*mgr
;
1744 DESTROYLOCK(&mgr
->lock
);
1747 DESTROYLOCK(&mgr
->arc4_lock
);
1749 isc_mempool_destroy(&mgr
->epool
);
1750 isc_mempool_destroy(&mgr
->rpool
);
1751 isc_mempool_destroy(&mgr
->dpool
);
1752 isc_mempool_destroy(&mgr
->bpool
);
1753 isc_mempool_destroy(&mgr
->spool
);
1755 DESTROYLOCK(&mgr
->pool_lock
);
1758 if (mgr
->entropy
!= NULL
)
1759 isc_entropy_detach(&mgr
->entropy
);
1761 if (mgr
->qid
!= NULL
)
1762 qid_destroy(mctx
, &mgr
->qid
);
1764 DESTROYLOCK(&mgr
->buffer_lock
);
1766 if (mgr
->blackhole
!= NULL
)
1767 dns_acl_detach(&mgr
->blackhole
);
1769 if (mgr
->stats
!= NULL
)
1770 isc_stats_detach(&mgr
->stats
);
1772 if (mgr
->v4ports
!= NULL
) {
1773 isc_mem_put(mctx
, mgr
->v4ports
,
1774 mgr
->nv4ports
* sizeof(in_port_t
));
1776 if (mgr
->v6ports
!= NULL
) {
1777 isc_mem_put(mctx
, mgr
->v6ports
,
1778 mgr
->nv6ports
* sizeof(in_port_t
));
1780 isc_mem_put(mctx
, mgr
, sizeof(dns_dispatchmgr_t
));
1781 isc_mem_detach(&mctx
);
1785 open_socket(isc_socketmgr_t
*mgr
, isc_sockaddr_t
*local
,
1786 unsigned int options
, isc_socket_t
**sockp
)
1789 isc_result_t result
;
1793 result
= isc_socket_create(mgr
, isc_sockaddr_pf(local
),
1794 isc_sockettype_udp
, &sock
);
1795 if (result
!= ISC_R_SUCCESS
)
1797 isc_socket_setname(sock
, "dispatcher", NULL
);
1800 result
= isc_socket_open(sock
);
1801 if (result
!= ISC_R_SUCCESS
)
1808 #ifndef ISC_ALLOW_MAPPED
1809 isc_socket_ipv6only(sock
, ISC_TRUE
);
1811 result
= isc_socket_bind(sock
, local
, options
);
1812 if (result
!= ISC_R_SUCCESS
) {
1814 isc_socket_detach(&sock
);
1817 isc_socket_close(sock
);
1826 return (ISC_R_SUCCESS
);
1830 * Create a temporary port list to set the initial default set of dispatch
1831 * ports: [1024, 65535]. This is almost meaningless as the application will
1832 * normally set the ports explicitly, but is provided to fill some minor corner
1836 create_default_portset(isc_mem_t
*mctx
, isc_portset_t
**portsetp
) {
1837 isc_result_t result
;
1839 result
= isc_portset_create(mctx
, portsetp
);
1840 if (result
!= ISC_R_SUCCESS
)
1842 isc_portset_addrange(*portsetp
, 1024, 65535);
1844 return (ISC_R_SUCCESS
);
1852 dns_dispatchmgr_create(isc_mem_t
*mctx
, isc_entropy_t
*entropy
,
1853 dns_dispatchmgr_t
**mgrp
)
1855 dns_dispatchmgr_t
*mgr
;
1856 isc_result_t result
;
1857 isc_portset_t
*v4portset
= NULL
;
1858 isc_portset_t
*v6portset
= NULL
;
1860 REQUIRE(mctx
!= NULL
);
1861 REQUIRE(mgrp
!= NULL
&& *mgrp
== NULL
);
1863 mgr
= isc_mem_get(mctx
, sizeof(dns_dispatchmgr_t
));
1865 return (ISC_R_NOMEMORY
);
1868 isc_mem_attach(mctx
, &mgr
->mctx
);
1870 mgr
->blackhole
= NULL
;
1873 result
= isc_mutex_init(&mgr
->lock
);
1874 if (result
!= ISC_R_SUCCESS
)
1877 result
= isc_mutex_init(&mgr
->arc4_lock
);
1878 if (result
!= ISC_R_SUCCESS
)
1881 result
= isc_mutex_init(&mgr
->buffer_lock
);
1882 if (result
!= ISC_R_SUCCESS
)
1883 goto kill_arc4_lock
;
1885 result
= isc_mutex_init(&mgr
->pool_lock
);
1886 if (result
!= ISC_R_SUCCESS
)
1887 goto kill_buffer_lock
;
1890 if (isc_mempool_create(mgr
->mctx
, sizeof(dns_dispatchevent_t
),
1891 &mgr
->epool
) != ISC_R_SUCCESS
) {
1892 result
= ISC_R_NOMEMORY
;
1893 goto kill_pool_lock
;
1897 if (isc_mempool_create(mgr
->mctx
, sizeof(dns_dispentry_t
),
1898 &mgr
->rpool
) != ISC_R_SUCCESS
) {
1899 result
= ISC_R_NOMEMORY
;
1904 if (isc_mempool_create(mgr
->mctx
, sizeof(dns_dispatch_t
),
1905 &mgr
->dpool
) != ISC_R_SUCCESS
) {
1906 result
= ISC_R_NOMEMORY
;
1910 isc_mempool_setname(mgr
->epool
, "dispmgr_epool");
1911 isc_mempool_setfreemax(mgr
->epool
, 1024);
1912 isc_mempool_associatelock(mgr
->epool
, &mgr
->pool_lock
);
1914 isc_mempool_setname(mgr
->rpool
, "dispmgr_rpool");
1915 isc_mempool_setfreemax(mgr
->rpool
, 1024);
1916 isc_mempool_associatelock(mgr
->rpool
, &mgr
->pool_lock
);
1918 isc_mempool_setname(mgr
->dpool
, "dispmgr_dpool");
1919 isc_mempool_setfreemax(mgr
->dpool
, 1024);
1920 isc_mempool_associatelock(mgr
->dpool
, &mgr
->pool_lock
);
1923 mgr
->buffersize
= 0;
1924 mgr
->maxbuffers
= 0;
1927 mgr
->entropy
= NULL
;
1930 ISC_LIST_INIT(mgr
->list
);
1931 mgr
->v4ports
= NULL
;
1932 mgr
->v6ports
= NULL
;
1935 mgr
->magic
= DNS_DISPATCHMGR_MAGIC
;
1937 result
= create_default_portset(mctx
, &v4portset
);
1938 if (result
== ISC_R_SUCCESS
) {
1939 result
= create_default_portset(mctx
, &v6portset
);
1940 if (result
== ISC_R_SUCCESS
) {
1941 result
= dns_dispatchmgr_setavailports(mgr
,
1946 if (v4portset
!= NULL
)
1947 isc_portset_destroy(mctx
, &v4portset
);
1948 if (v6portset
!= NULL
)
1949 isc_portset_destroy(mctx
, &v6portset
);
1950 if (result
!= ISC_R_SUCCESS
)
1954 if (entropy
!= NULL
)
1955 isc_entropy_attach(entropy
, &mgr
->entropy
);
1960 dispatch_initrandom(&mgr
->arc4ctx
, mgr
->entropy
, &mgr
->arc4_lock
);
1963 return (ISC_R_SUCCESS
);
1966 isc_mempool_destroy(&mgr
->dpool
);
1968 isc_mempool_destroy(&mgr
->rpool
);
1970 isc_mempool_destroy(&mgr
->epool
);
1972 DESTROYLOCK(&mgr
->pool_lock
);
1974 DESTROYLOCK(&mgr
->buffer_lock
);
1976 DESTROYLOCK(&mgr
->arc4_lock
);
1978 DESTROYLOCK(&mgr
->lock
);
1980 isc_mem_put(mctx
, mgr
, sizeof(dns_dispatchmgr_t
));
1981 isc_mem_detach(&mctx
);
1987 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t
*mgr
, dns_acl_t
*blackhole
) {
1988 REQUIRE(VALID_DISPATCHMGR(mgr
));
1989 if (mgr
->blackhole
!= NULL
)
1990 dns_acl_detach(&mgr
->blackhole
);
1991 dns_acl_attach(blackhole
, &mgr
->blackhole
);
1995 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t
*mgr
) {
1996 REQUIRE(VALID_DISPATCHMGR(mgr
));
1997 return (mgr
->blackhole
);
2001 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t
*mgr
,
2002 dns_portlist_t
*portlist
)
2004 REQUIRE(VALID_DISPATCHMGR(mgr
));
2007 /* This function is deprecated: use dns_dispatchmgr_setavailports(). */
2012 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t
*mgr
) {
2013 REQUIRE(VALID_DISPATCHMGR(mgr
));
2014 return (NULL
); /* this function is deprecated */
2018 dns_dispatchmgr_setavailports(dns_dispatchmgr_t
*mgr
, isc_portset_t
*v4portset
,
2019 isc_portset_t
*v6portset
)
2021 in_port_t
*v4ports
, *v6ports
, p
;
2022 unsigned int nv4ports
, nv6ports
, i4
, i6
;
2024 REQUIRE(VALID_DISPATCHMGR(mgr
));
2026 nv4ports
= isc_portset_nports(v4portset
);
2027 nv6ports
= isc_portset_nports(v6portset
);
2030 if (nv4ports
!= 0) {
2031 v4ports
= isc_mem_get(mgr
->mctx
, sizeof(in_port_t
) * nv4ports
);
2032 if (v4ports
== NULL
)
2033 return (ISC_R_NOMEMORY
);
2036 if (nv6ports
!= 0) {
2037 v6ports
= isc_mem_get(mgr
->mctx
, sizeof(in_port_t
) * nv6ports
);
2038 if (v6ports
== NULL
) {
2039 if (v4ports
!= NULL
) {
2040 isc_mem_put(mgr
->mctx
, v4ports
,
2042 isc_portset_nports(v4portset
));
2044 return (ISC_R_NOMEMORY
);
2052 if (isc_portset_isset(v4portset
, p
)) {
2053 INSIST(i4
< nv4ports
);
2056 if (isc_portset_isset(v6portset
, p
)) {
2057 INSIST(i6
< nv6ports
);
2060 } while (p
++ < 65535);
2061 INSIST(i4
== nv4ports
&& i6
== nv6ports
);
2064 if (mgr
->v4ports
!= NULL
) {
2065 isc_mem_put(mgr
->mctx
, mgr
->v4ports
,
2066 mgr
->nv4ports
* sizeof(in_port_t
));
2068 mgr
->v4ports
= v4ports
;
2069 mgr
->nv4ports
= nv4ports
;
2071 if (mgr
->v6ports
!= NULL
) {
2072 isc_mem_put(mgr
->mctx
, mgr
->v6ports
,
2073 mgr
->nv6ports
* sizeof(in_port_t
));
2075 mgr
->v6ports
= v6ports
;
2076 mgr
->nv6ports
= nv6ports
;
2079 return (ISC_R_SUCCESS
);
2083 dns_dispatchmgr_setudp(dns_dispatchmgr_t
*mgr
,
2084 unsigned int buffersize
, unsigned int maxbuffers
,
2085 unsigned int maxrequests
, unsigned int buckets
,
2086 unsigned int increment
)
2088 isc_result_t result
;
2090 REQUIRE(VALID_DISPATCHMGR(mgr
));
2091 REQUIRE(buffersize
>= 512 && buffersize
< (64 * 1024));
2092 REQUIRE(maxbuffers
> 0);
2093 REQUIRE(buckets
< 2097169); /* next prime > 65536 * 32 */
2094 REQUIRE(increment
> buckets
);
2097 * Keep some number of items around. This should be a config
2098 * option. For now, keep 8, but later keep at least two even
2099 * if the caller wants less. This allows us to ensure certain
2100 * things, like an event can be "freed" and the next allocation
2101 * will always succeed.
2103 * Note that if limits are placed on anything here, we use one
2104 * event internally, so the actual limit should be "wanted + 1."
2112 LOCK(&mgr
->buffer_lock
);
2114 /* Create or adjust buffer pool */
2115 if (mgr
->bpool
!= NULL
) {
2117 * We only increase the maxbuffers to avoid accidental buffer
2118 * shortage. Ideally we'd separate the manager-wide maximum
2119 * from per-dispatch limits and respect the latter within the
2120 * global limit. But at this moment that's deemed to be
2121 * overkilling and isn't worth additional implementation
2124 if (maxbuffers
> mgr
->maxbuffers
) {
2125 isc_mempool_setmaxalloc(mgr
->bpool
, maxbuffers
);
2126 mgr
->maxbuffers
= maxbuffers
;
2129 result
= isc_mempool_create(mgr
->mctx
, buffersize
, &mgr
->bpool
);
2130 if (result
!= ISC_R_SUCCESS
) {
2131 UNLOCK(&mgr
->buffer_lock
);
2134 isc_mempool_setname(mgr
->bpool
, "dispmgr_bpool");
2135 isc_mempool_setmaxalloc(mgr
->bpool
, maxbuffers
);
2136 isc_mempool_associatelock(mgr
->bpool
, &mgr
->pool_lock
);
2139 /* Create or adjust socket pool */
2140 if (mgr
->spool
!= NULL
) {
2141 isc_mempool_setmaxalloc(mgr
->spool
, DNS_DISPATCH_POOLSOCKS
* 2);
2142 UNLOCK(&mgr
->buffer_lock
);
2143 return (ISC_R_SUCCESS
);
2145 result
= isc_mempool_create(mgr
->mctx
, sizeof(dispsocket_t
),
2147 if (result
!= ISC_R_SUCCESS
) {
2148 UNLOCK(&mgr
->buffer_lock
);
2151 isc_mempool_setname(mgr
->spool
, "dispmgr_spool");
2152 isc_mempool_setmaxalloc(mgr
->spool
, maxrequests
);
2153 isc_mempool_associatelock(mgr
->spool
, &mgr
->pool_lock
);
2155 result
= qid_allocate(mgr
, buckets
, increment
, &mgr
->qid
, ISC_TRUE
);
2156 if (result
!= ISC_R_SUCCESS
)
2159 mgr
->buffersize
= buffersize
;
2160 mgr
->maxbuffers
= maxbuffers
;
2161 UNLOCK(&mgr
->buffer_lock
);
2162 return (ISC_R_SUCCESS
);
2165 isc_mempool_destroy(&mgr
->bpool
);
2166 if (mgr
->spool
!= NULL
)
2167 isc_mempool_destroy(&mgr
->spool
);
2168 UNLOCK(&mgr
->buffer_lock
);
2173 dns_dispatchmgr_destroy(dns_dispatchmgr_t
**mgrp
) {
2174 dns_dispatchmgr_t
*mgr
;
2175 isc_boolean_t killit
;
2177 REQUIRE(mgrp
!= NULL
);
2178 REQUIRE(VALID_DISPATCHMGR(*mgrp
));
2184 mgr
->state
|= MGR_SHUTTINGDOWN
;
2186 killit
= destroy_mgr_ok(mgr
);
2189 mgr_log(mgr
, LVL(90), "destroy: killit=%d", killit
);
2196 dns_dispatchmgr_setstats(dns_dispatchmgr_t
*mgr
, isc_stats_t
*stats
) {
2197 REQUIRE(VALID_DISPATCHMGR(mgr
));
2198 REQUIRE(ISC_LIST_EMPTY(mgr
->list
));
2199 REQUIRE(mgr
->stats
== NULL
);
2201 isc_stats_attach(stats
, &mgr
->stats
);
2205 port_cmp(const void *key
, const void *ent
) {
2206 in_port_t p1
= *(const in_port_t
*)key
;
2207 in_port_t p2
= *(const in_port_t
*)ent
;
2217 static isc_boolean_t
2218 portavailable(dns_dispatchmgr_t
*mgr
, isc_socket_t
*sock
,
2219 isc_sockaddr_t
*sockaddrp
)
2221 isc_sockaddr_t sockaddr
;
2222 isc_result_t result
;
2223 in_port_t
*ports
, port
;
2224 unsigned int nports
;
2225 isc_boolean_t available
= ISC_FALSE
;
2227 REQUIRE(sock
!= NULL
|| sockaddrp
!= NULL
);
2231 sockaddrp
= &sockaddr
;
2232 result
= isc_socket_getsockname(sock
, sockaddrp
);
2233 if (result
!= ISC_R_SUCCESS
)
2237 if (isc_sockaddr_pf(sockaddrp
) == AF_INET
) {
2238 ports
= mgr
->v4ports
;
2239 nports
= mgr
->nv4ports
;
2241 ports
= mgr
->v6ports
;
2242 nports
= mgr
->nv6ports
;
2247 port
= isc_sockaddr_getport(sockaddrp
);
2248 if (bsearch(&port
, ports
, nports
, sizeof(in_port_t
), port_cmp
) != NULL
)
2249 available
= ISC_TRUE
;
2256 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2258 static isc_boolean_t
2259 local_addr_match(dns_dispatch_t
*disp
, isc_sockaddr_t
*addr
) {
2260 isc_sockaddr_t sockaddr
;
2261 isc_result_t result
;
2263 REQUIRE(disp
->socket
!= NULL
);
2269 * Don't match wildcard ports unless the port is available in the
2270 * current configuration.
2272 if (isc_sockaddr_getport(addr
) == 0 &&
2273 isc_sockaddr_getport(&disp
->local
) == 0 &&
2274 !portavailable(disp
->mgr
, disp
->socket
, NULL
)) {
2279 * Check if we match the binding <address,port>.
2280 * Wildcard ports match/fail here.
2282 if (isc_sockaddr_equal(&disp
->local
, addr
))
2284 if (isc_sockaddr_getport(addr
) == 0)
2288 * Check if we match a bound wildcard port <address,port>.
2290 if (!isc_sockaddr_eqaddr(&disp
->local
, addr
))
2292 result
= isc_socket_getsockname(disp
->socket
, &sockaddr
);
2293 if (result
!= ISC_R_SUCCESS
)
2296 return (isc_sockaddr_equal(&sockaddr
, addr
));
2300 * Requires mgr be locked.
2302 * No dispatcher can be locked by this thread when calling this function.
2306 * If a matching dispatcher is found, it is locked after this function
2307 * returns, and must be unlocked by the caller.
2310 dispatch_find(dns_dispatchmgr_t
*mgr
, isc_sockaddr_t
*local
,
2311 unsigned int attributes
, unsigned int mask
,
2312 dns_dispatch_t
**dispp
)
2314 dns_dispatch_t
*disp
;
2315 isc_result_t result
;
2318 * Make certain that we will not match a private or exclusive dispatch.
2320 attributes
&= ~(DNS_DISPATCHATTR_PRIVATE
|DNS_DISPATCHATTR_EXCLUSIVE
);
2321 mask
|= (DNS_DISPATCHATTR_PRIVATE
|DNS_DISPATCHATTR_EXCLUSIVE
);
2323 disp
= ISC_LIST_HEAD(mgr
->list
);
2324 while (disp
!= NULL
) {
2326 if ((disp
->shutting_down
== 0)
2327 && ATTRMATCH(disp
->attributes
, attributes
, mask
)
2328 && local_addr_match(disp
, local
))
2330 UNLOCK(&disp
->lock
);
2331 disp
= ISC_LIST_NEXT(disp
, link
);
2335 result
= ISC_R_NOTFOUND
;
2340 result
= ISC_R_SUCCESS
;
2347 qid_allocate(dns_dispatchmgr_t
*mgr
, unsigned int buckets
,
2348 unsigned int increment
, dns_qid_t
**qidp
,
2349 isc_boolean_t needsocktable
)
2353 isc_result_t result
;
2355 REQUIRE(VALID_DISPATCHMGR(mgr
));
2356 REQUIRE(buckets
< 2097169); /* next prime > 65536 * 32 */
2357 REQUIRE(increment
> buckets
);
2358 REQUIRE(qidp
!= NULL
&& *qidp
== NULL
);
2360 qid
= isc_mem_get(mgr
->mctx
, sizeof(*qid
));
2362 return (ISC_R_NOMEMORY
);
2364 qid
->qid_table
= isc_mem_get(mgr
->mctx
,
2365 buckets
* sizeof(dns_displist_t
));
2366 if (qid
->qid_table
== NULL
) {
2367 isc_mem_put(mgr
->mctx
, qid
, sizeof(*qid
));
2368 return (ISC_R_NOMEMORY
);
2371 qid
->sock_table
= NULL
;
2372 if (needsocktable
) {
2373 qid
->sock_table
= isc_mem_get(mgr
->mctx
, buckets
*
2374 sizeof(dispsocketlist_t
));
2375 if (qid
->sock_table
== NULL
) {
2376 isc_mem_put(mgr
->mctx
, qid
, sizeof(*qid
));
2377 isc_mem_put(mgr
->mctx
, qid
->qid_table
,
2378 buckets
* sizeof(dns_displist_t
));
2379 return (ISC_R_NOMEMORY
);
2383 result
= isc_mutex_init(&qid
->lock
);
2384 if (result
!= ISC_R_SUCCESS
) {
2385 if (qid
->sock_table
!= NULL
) {
2386 isc_mem_put(mgr
->mctx
, qid
->sock_table
,
2387 buckets
* sizeof(dispsocketlist_t
));
2389 isc_mem_put(mgr
->mctx
, qid
->qid_table
,
2390 buckets
* sizeof(dns_displist_t
));
2391 isc_mem_put(mgr
->mctx
, qid
, sizeof(*qid
));
2395 for (i
= 0; i
< buckets
; i
++) {
2396 ISC_LIST_INIT(qid
->qid_table
[i
]);
2397 if (qid
->sock_table
!= NULL
)
2398 ISC_LIST_INIT(qid
->sock_table
[i
]);
2401 qid
->qid_nbuckets
= buckets
;
2402 qid
->qid_increment
= increment
;
2403 qid
->magic
= QID_MAGIC
;
2405 return (ISC_R_SUCCESS
);
2409 qid_destroy(isc_mem_t
*mctx
, dns_qid_t
**qidp
) {
2412 REQUIRE(qidp
!= NULL
);
2415 REQUIRE(VALID_QID(qid
));
2419 isc_mem_put(mctx
, qid
->qid_table
,
2420 qid
->qid_nbuckets
* sizeof(dns_displist_t
));
2421 if (qid
->sock_table
!= NULL
) {
2422 isc_mem_put(mctx
, qid
->sock_table
,
2423 qid
->qid_nbuckets
* sizeof(dispsocketlist_t
));
2425 DESTROYLOCK(&qid
->lock
);
2426 isc_mem_put(mctx
, qid
, sizeof(*qid
));
2430 * Allocate and set important limits.
2433 dispatch_allocate(dns_dispatchmgr_t
*mgr
, unsigned int maxrequests
,
2434 dns_dispatch_t
**dispp
)
2436 dns_dispatch_t
*disp
;
2437 isc_result_t result
;
2439 REQUIRE(VALID_DISPATCHMGR(mgr
));
2440 REQUIRE(dispp
!= NULL
&& *dispp
== NULL
);
2443 * Set up the dispatcher, mostly. Don't bother setting some of
2444 * the options that are controlled by tcp vs. udp, etc.
2447 disp
= isc_mempool_get(mgr
->dpool
);
2449 return (ISC_R_NOMEMORY
);
2453 disp
->maxrequests
= maxrequests
;
2454 disp
->attributes
= 0;
2455 ISC_LINK_INIT(disp
, link
);
2457 disp
->recv_pending
= 0;
2458 memset(&disp
->local
, 0, sizeof(disp
->local
));
2459 disp
->localport
= 0;
2460 disp
->shutting_down
= 0;
2461 disp
->shutdown_out
= 0;
2462 disp
->connected
= 0;
2463 disp
->tcpmsg_valid
= 0;
2464 disp
->shutdown_why
= ISC_R_UNEXPECTED
;
2466 disp
->tcpbuffers
= 0;
2468 ISC_LIST_INIT(disp
->activesockets
);
2469 ISC_LIST_INIT(disp
->inactivesockets
);
2471 dispatch_initrandom(&disp
->arc4ctx
, mgr
->entropy
, NULL
);
2472 disp
->port_table
= NULL
;
2473 disp
->portpool
= NULL
;
2475 result
= isc_mutex_init(&disp
->lock
);
2476 if (result
!= ISC_R_SUCCESS
)
2479 disp
->failsafe_ev
= allocate_event(disp
);
2480 if (disp
->failsafe_ev
== NULL
) {
2481 result
= ISC_R_NOMEMORY
;
2485 disp
->magic
= DISPATCH_MAGIC
;
2488 return (ISC_R_SUCCESS
);
2494 DESTROYLOCK(&disp
->lock
);
2496 isc_mempool_put(mgr
->dpool
, disp
);
2503 * MUST be unlocked, and not used by anything.
2506 dispatch_free(dns_dispatch_t
**dispp
)
2508 dns_dispatch_t
*disp
;
2509 dns_dispatchmgr_t
*mgr
;
2512 REQUIRE(VALID_DISPATCH(*dispp
));
2517 REQUIRE(VALID_DISPATCHMGR(mgr
));
2519 if (disp
->tcpmsg_valid
) {
2520 dns_tcpmsg_invalidate(&disp
->tcpmsg
);
2521 disp
->tcpmsg_valid
= 0;
2524 INSIST(disp
->tcpbuffers
== 0);
2525 INSIST(disp
->requests
== 0);
2526 INSIST(disp
->recv_pending
== 0);
2527 INSIST(ISC_LIST_EMPTY(disp
->activesockets
));
2528 INSIST(ISC_LIST_EMPTY(disp
->inactivesockets
));
2530 isc_mempool_put(mgr
->epool
, disp
->failsafe_ev
);
2531 disp
->failsafe_ev
= NULL
;
2533 if (disp
->qid
!= NULL
)
2534 qid_destroy(mgr
->mctx
, &disp
->qid
);
2536 if (disp
->port_table
!= NULL
) {
2537 for (i
= 0; i
< DNS_DISPATCH_PORTTABLESIZE
; i
++)
2538 INSIST(ISC_LIST_EMPTY(disp
->port_table
[i
]));
2539 isc_mem_put(mgr
->mctx
, disp
->port_table
,
2540 sizeof(disp
->port_table
[0]) *
2541 DNS_DISPATCH_PORTTABLESIZE
);
2544 if (disp
->portpool
!= NULL
)
2545 isc_mempool_destroy(&disp
->portpool
);
2548 DESTROYLOCK(&disp
->lock
);
2550 isc_mempool_put(mgr
->dpool
, disp
);
2554 dns_dispatch_createtcp(dns_dispatchmgr_t
*mgr
, isc_socket_t
*sock
,
2555 isc_taskmgr_t
*taskmgr
, unsigned int buffersize
,
2556 unsigned int maxbuffers
, unsigned int maxrequests
,
2557 unsigned int buckets
, unsigned int increment
,
2558 unsigned int attributes
, dns_dispatch_t
**dispp
)
2560 isc_result_t result
;
2561 dns_dispatch_t
*disp
;
2566 REQUIRE(VALID_DISPATCHMGR(mgr
));
2567 REQUIRE(isc_socket_gettype(sock
) == isc_sockettype_tcp
);
2568 REQUIRE((attributes
& DNS_DISPATCHATTR_TCP
) != 0);
2569 REQUIRE((attributes
& DNS_DISPATCHATTR_UDP
) == 0);
2571 attributes
|= DNS_DISPATCHATTR_PRIVATE
; /* XXXMLG */
2576 * dispatch_allocate() checks mgr for us.
2577 * qid_allocate() checks buckets and increment for us.
2580 result
= dispatch_allocate(mgr
, maxrequests
, &disp
);
2581 if (result
!= ISC_R_SUCCESS
) {
2586 result
= qid_allocate(mgr
, buckets
, increment
, &disp
->qid
, ISC_FALSE
);
2587 if (result
!= ISC_R_SUCCESS
)
2588 goto deallocate_dispatch
;
2590 disp
->socktype
= isc_sockettype_tcp
;
2591 disp
->socket
= NULL
;
2592 isc_socket_attach(sock
, &disp
->socket
);
2595 disp
->task
[0] = NULL
;
2596 result
= isc_task_create(taskmgr
, 0, &disp
->task
[0]);
2597 if (result
!= ISC_R_SUCCESS
)
2600 disp
->ctlevent
= isc_event_allocate(mgr
->mctx
, disp
,
2601 DNS_EVENT_DISPATCHCONTROL
,
2603 sizeof(isc_event_t
));
2604 if (disp
->ctlevent
== NULL
) {
2605 result
= ISC_R_NOMEMORY
;
2609 isc_task_setname(disp
->task
[0], "tcpdispatch", disp
);
2611 dns_tcpmsg_init(mgr
->mctx
, disp
->socket
, &disp
->tcpmsg
);
2612 disp
->tcpmsg_valid
= 1;
2614 disp
->attributes
= attributes
;
2617 * Append it to the dispatcher list.
2619 ISC_LIST_APPEND(mgr
->list
, disp
, link
);
2622 mgr_log(mgr
, LVL(90), "created TCP dispatcher %p", disp
);
2623 dispatch_log(disp
, LVL(90), "created task %p", disp
->task
[0]);
2627 return (ISC_R_SUCCESS
);
2633 isc_task_detach(&disp
->task
[0]);
2635 isc_socket_detach(&disp
->socket
);
2636 deallocate_dispatch
:
2637 dispatch_free(&disp
);
2645 dns_dispatch_getudp(dns_dispatchmgr_t
*mgr
, isc_socketmgr_t
*sockmgr
,
2646 isc_taskmgr_t
*taskmgr
, isc_sockaddr_t
*localaddr
,
2647 unsigned int buffersize
,
2648 unsigned int maxbuffers
, unsigned int maxrequests
,
2649 unsigned int buckets
, unsigned int increment
,
2650 unsigned int attributes
, unsigned int mask
,
2651 dns_dispatch_t
**dispp
)
2653 isc_result_t result
;
2654 dns_dispatch_t
*disp
= NULL
;
2656 REQUIRE(VALID_DISPATCHMGR(mgr
));
2657 REQUIRE(sockmgr
!= NULL
);
2658 REQUIRE(localaddr
!= NULL
);
2659 REQUIRE(taskmgr
!= NULL
);
2660 REQUIRE(buffersize
>= 512 && buffersize
< (64 * 1024));
2661 REQUIRE(maxbuffers
> 0);
2662 REQUIRE(buckets
< 2097169); /* next prime > 65536 * 32 */
2663 REQUIRE(increment
> buckets
);
2664 REQUIRE(dispp
!= NULL
&& *dispp
== NULL
);
2665 REQUIRE((attributes
& DNS_DISPATCHATTR_TCP
) == 0);
2667 result
= dns_dispatchmgr_setudp(mgr
, buffersize
, maxbuffers
,
2668 maxrequests
, buckets
, increment
);
2669 if (result
!= ISC_R_SUCCESS
)
2674 if ((attributes
& DNS_DISPATCHATTR_EXCLUSIVE
) != 0) {
2675 REQUIRE(isc_sockaddr_getport(localaddr
) == 0);
2680 * See if we have a dispatcher that matches.
2682 result
= dispatch_find(mgr
, localaddr
, attributes
, mask
, &disp
);
2683 if (result
== ISC_R_SUCCESS
) {
2686 if (disp
->maxrequests
< maxrequests
)
2687 disp
->maxrequests
= maxrequests
;
2689 if ((disp
->attributes
& DNS_DISPATCHATTR_NOLISTEN
) == 0 &&
2690 (attributes
& DNS_DISPATCHATTR_NOLISTEN
) != 0)
2692 disp
->attributes
|= DNS_DISPATCHATTR_NOLISTEN
;
2693 if (disp
->recv_pending
!= 0)
2694 isc_socket_cancel(disp
->socket
, disp
->task
[0],
2695 ISC_SOCKCANCEL_RECV
);
2698 UNLOCK(&disp
->lock
);
2703 return (ISC_R_SUCCESS
);
2710 result
= dispatch_createudp(mgr
, sockmgr
, taskmgr
, localaddr
,
2711 maxrequests
, attributes
, &disp
);
2712 if (result
!= ISC_R_SUCCESS
) {
2719 return (ISC_R_SUCCESS
);
2723 * mgr should be locked.
2726 #ifndef DNS_DISPATCH_HELD
2727 #define DNS_DISPATCH_HELD 20U
2731 get_udpsocket(dns_dispatchmgr_t
*mgr
, dns_dispatch_t
*disp
,
2732 isc_socketmgr_t
*sockmgr
, isc_sockaddr_t
*localaddr
,
2733 isc_socket_t
**sockp
)
2736 isc_socket_t
*held
[DNS_DISPATCH_HELD
];
2737 isc_sockaddr_t localaddr_bound
;
2738 isc_socket_t
*sock
= NULL
;
2739 isc_result_t result
= ISC_R_SUCCESS
;
2740 isc_boolean_t anyport
;
2742 INSIST(sockp
!= NULL
&& *sockp
== NULL
);
2744 localaddr_bound
= *localaddr
;
2745 anyport
= ISC_TF(isc_sockaddr_getport(localaddr
) == 0);
2748 unsigned int nports
;
2752 * If no port is specified, we first try to pick up a random
2753 * port by ourselves.
2755 if (isc_sockaddr_pf(&disp
->local
) == AF_INET
) {
2756 nports
= disp
->mgr
->nv4ports
;
2757 ports
= disp
->mgr
->v4ports
;
2759 nports
= disp
->mgr
->nv6ports
;
2760 ports
= disp
->mgr
->v6ports
;
2763 return (ISC_R_ADDRNOTAVAIL
);
2765 for (i
= 0; i
< 1024; i
++) {
2768 prt
= ports
[dispatch_uniformrandom(
2771 isc_sockaddr_setport(&localaddr_bound
, prt
);
2772 result
= open_socket(sockmgr
, &localaddr_bound
,
2774 if (result
== ISC_R_SUCCESS
||
2775 result
!= ISC_R_ADDRINUSE
) {
2776 disp
->localport
= prt
;
2783 * If this fails 1024 times, we then ask the kernel for
2787 /* Allow to reuse address for non-random ports. */
2788 result
= open_socket(sockmgr
, localaddr
,
2789 ISC_SOCKET_REUSEADDRESS
, &sock
);
2791 if (result
== ISC_R_SUCCESS
)
2797 memset(held
, 0, sizeof(held
));
2800 for (j
= 0; j
< 0xffffU
; j
++) {
2801 result
= open_socket(sockmgr
, localaddr
, 0, &sock
);
2802 if (result
!= ISC_R_SUCCESS
)
2806 else if (portavailable(mgr
, sock
, NULL
))
2808 if (held
[i
] != NULL
)
2809 isc_socket_detach(&held
[i
]);
2812 if (i
== DNS_DISPATCH_HELD
)
2816 mgr_log(mgr
, ISC_LOG_ERROR
,
2817 "avoid-v%s-udp-ports: unable to allocate "
2818 "an available port",
2819 isc_sockaddr_pf(localaddr
) == AF_INET
? "4" : "6");
2820 result
= ISC_R_FAILURE
;
2826 for (i
= 0; i
< DNS_DISPATCH_HELD
; i
++) {
2827 if (held
[i
] != NULL
)
2828 isc_socket_detach(&held
[i
]);
2835 dispatch_createudp(dns_dispatchmgr_t
*mgr
, isc_socketmgr_t
*sockmgr
,
2836 isc_taskmgr_t
*taskmgr
,
2837 isc_sockaddr_t
*localaddr
,
2838 unsigned int maxrequests
,
2839 unsigned int attributes
,
2840 dns_dispatch_t
**dispp
)
2842 isc_result_t result
;
2843 dns_dispatch_t
*disp
;
2844 isc_socket_t
*sock
= NULL
;
2848 * dispatch_allocate() checks mgr for us.
2851 result
= dispatch_allocate(mgr
, maxrequests
, &disp
);
2852 if (result
!= ISC_R_SUCCESS
)
2855 if ((attributes
& DNS_DISPATCHATTR_EXCLUSIVE
) == 0) {
2856 result
= get_udpsocket(mgr
, disp
, sockmgr
, localaddr
, &sock
);
2857 if (result
!= ISC_R_SUCCESS
)
2858 goto deallocate_dispatch
;
2860 isc_sockaddr_t sa_any
;
2863 * For dispatches using exclusive sockets with a specific
2864 * source address, we only check if the specified address is
2865 * available on the system. Query sockets will be created later
2868 isc_sockaddr_anyofpf(&sa_any
, isc_sockaddr_pf(localaddr
));
2869 if (!isc_sockaddr_eqaddr(&sa_any
, localaddr
)) {
2870 result
= open_socket(sockmgr
, localaddr
, 0, &sock
);
2872 isc_socket_detach(&sock
);
2873 if (result
!= ISC_R_SUCCESS
)
2874 goto deallocate_dispatch
;
2877 disp
->port_table
= isc_mem_get(mgr
->mctx
,
2878 sizeof(disp
->port_table
[0]) *
2879 DNS_DISPATCH_PORTTABLESIZE
);
2880 if (disp
->port_table
== NULL
)
2881 goto deallocate_dispatch
;
2882 for (i
= 0; i
< DNS_DISPATCH_PORTTABLESIZE
; i
++)
2883 ISC_LIST_INIT(disp
->port_table
[i
]);
2885 result
= isc_mempool_create(mgr
->mctx
, sizeof(dispportentry_t
),
2887 if (result
!= ISC_R_SUCCESS
)
2888 goto deallocate_dispatch
;
2889 isc_mempool_setname(disp
->portpool
, "disp_portpool");
2890 isc_mempool_setfreemax(disp
->portpool
, 128);
2892 disp
->socktype
= isc_sockettype_udp
;
2893 disp
->socket
= sock
;
2894 disp
->local
= *localaddr
;
2896 if ((attributes
& DNS_DISPATCHATTR_EXCLUSIVE
) != 0)
2897 disp
->ntasks
= MAX_INTERNAL_TASKS
;
2900 for (i
= 0; i
< disp
->ntasks
; i
++) {
2901 disp
->task
[i
] = NULL
;
2902 result
= isc_task_create(taskmgr
, 0, &disp
->task
[i
]);
2903 if (result
!= ISC_R_SUCCESS
) {
2905 isc_task_shutdown(disp
->task
[i
]);
2906 isc_task_detach(&disp
->task
[i
]);
2910 isc_task_setname(disp
->task
[i
], "udpdispatch", disp
);
2913 disp
->ctlevent
= isc_event_allocate(mgr
->mctx
, disp
,
2914 DNS_EVENT_DISPATCHCONTROL
,
2916 sizeof(isc_event_t
));
2917 if (disp
->ctlevent
== NULL
) {
2918 result
= ISC_R_NOMEMORY
;
2922 attributes
&= ~DNS_DISPATCHATTR_TCP
;
2923 attributes
|= DNS_DISPATCHATTR_UDP
;
2924 disp
->attributes
= attributes
;
2927 * Append it to the dispatcher list.
2929 ISC_LIST_APPEND(mgr
->list
, disp
, link
);
2931 mgr_log(mgr
, LVL(90), "created UDP dispatcher %p", disp
);
2932 dispatch_log(disp
, LVL(90), "created task %p", disp
->task
[0]); /* XXX */
2933 if (disp
->socket
!= NULL
)
2934 dispatch_log(disp
, LVL(90), "created socket %p", disp
->socket
);
2943 for (i
= 0; i
< disp
->ntasks
; i
++)
2944 isc_task_detach(&disp
->task
[i
]);
2946 if (disp
->socket
!= NULL
)
2947 isc_socket_detach(&disp
->socket
);
2948 deallocate_dispatch
:
2949 dispatch_free(&disp
);
2955 dns_dispatch_attach(dns_dispatch_t
*disp
, dns_dispatch_t
**dispp
) {
2956 REQUIRE(VALID_DISPATCH(disp
));
2957 REQUIRE(dispp
!= NULL
&& *dispp
== NULL
);
2961 UNLOCK(&disp
->lock
);
2967 * It is important to lock the manager while we are deleting the dispatch,
2968 * since dns_dispatch_getudp will call dispatch_find, which returns to
2969 * the caller a dispatch but does not attach to it until later. _getudp
2970 * locks the manager, however, so locking it here will keep us from attaching
2971 * to a dispatcher that is in the process of going away.
2974 dns_dispatch_detach(dns_dispatch_t
**dispp
) {
2975 dns_dispatch_t
*disp
;
2976 dispsocket_t
*dispsock
;
2977 isc_boolean_t killit
;
2979 REQUIRE(dispp
!= NULL
&& VALID_DISPATCH(*dispp
));
2986 INSIST(disp
->refcount
> 0);
2989 if (disp
->refcount
== 0) {
2990 if (disp
->recv_pending
> 0)
2991 isc_socket_cancel(disp
->socket
, disp
->task
[0],
2992 ISC_SOCKCANCEL_RECV
);
2993 for (dispsock
= ISC_LIST_HEAD(disp
->activesockets
);
2995 dispsock
= ISC_LIST_NEXT(dispsock
, link
)) {
2996 isc_socket_cancel(dispsock
->socket
, dispsock
->task
,
2997 ISC_SOCKCANCEL_RECV
);
2999 disp
->shutting_down
= 1;
3002 dispatch_log(disp
, LVL(90), "detach: refcount %d", disp
->refcount
);
3004 killit
= destroy_disp_ok(disp
);
3005 UNLOCK(&disp
->lock
);
3007 isc_task_send(disp
->task
[0], &disp
->ctlevent
);
3011 dns_dispatch_addresponse2(dns_dispatch_t
*disp
, isc_sockaddr_t
*dest
,
3012 isc_task_t
*task
, isc_taskaction_t action
, void *arg
,
3013 dns_messageid_t
*idp
, dns_dispentry_t
**resp
,
3014 isc_socketmgr_t
*sockmgr
)
3016 dns_dispentry_t
*res
;
3017 unsigned int bucket
;
3018 in_port_t localport
= 0;
3023 dispsocket_t
*dispsocket
= NULL
;
3024 isc_result_t result
;
3026 REQUIRE(VALID_DISPATCH(disp
));
3027 REQUIRE(task
!= NULL
);
3028 REQUIRE(dest
!= NULL
);
3029 REQUIRE(resp
!= NULL
&& *resp
== NULL
);
3030 REQUIRE(idp
!= NULL
);
3031 if ((disp
->attributes
& DNS_DISPATCHATTR_EXCLUSIVE
) != 0)
3032 REQUIRE(sockmgr
!= NULL
);
3036 if (disp
->shutting_down
== 1) {
3037 UNLOCK(&disp
->lock
);
3038 return (ISC_R_SHUTTINGDOWN
);
3041 if (disp
->requests
>= disp
->maxrequests
) {
3042 UNLOCK(&disp
->lock
);
3043 return (ISC_R_QUOTA
);
3046 if ((disp
->attributes
& DNS_DISPATCHATTR_EXCLUSIVE
) != 0 &&
3047 disp
->nsockets
> DNS_DISPATCH_SOCKSQUOTA
) {
3048 dispsocket_t
*oldestsocket
;
3049 dns_dispentry_t
*oldestresp
;
3050 dns_dispatchevent_t
*rev
;
3053 * Kill oldest outstanding query if the number of sockets
3054 * exceeds the quota to keep the room for new queries.
3056 oldestsocket
= ISC_LIST_HEAD(disp
->activesockets
);
3057 oldestresp
= oldestsocket
->resp
;
3058 if (oldestresp
!= NULL
&& !oldestresp
->item_out
) {
3059 rev
= allocate_event(oldestresp
->disp
);
3061 rev
->buffer
.base
= NULL
;
3062 rev
->result
= ISC_R_CANCELED
;
3063 rev
->id
= oldestresp
->id
;
3064 ISC_EVENT_INIT(rev
, sizeof(*rev
), 0,
3065 NULL
, DNS_EVENT_DISPATCH
,
3067 oldestresp
->arg
, oldestresp
,
3069 oldestresp
->item_out
= ISC_TRUE
;
3070 isc_task_send(oldestresp
->task
,
3071 ISC_EVENT_PTR(&rev
));
3072 inc_stats(disp
->mgr
,
3073 dns_resstatscounter_dispabort
);
3078 * Move this entry to the tail so that it won't (easily) be
3079 * examined before actually being canceled.
3081 ISC_LIST_UNLINK(disp
->activesockets
, oldestsocket
, link
);
3082 ISC_LIST_APPEND(disp
->activesockets
, oldestsocket
, link
);
3085 qid
= DNS_QID(disp
);
3088 if ((disp
->attributes
& DNS_DISPATCHATTR_EXCLUSIVE
) != 0) {
3090 * Get a separate UDP socket with a random port number.
3092 result
= get_dispsocket(disp
, dest
, sockmgr
, qid
, &dispsocket
,
3094 if (result
!= ISC_R_SUCCESS
) {
3096 UNLOCK(&disp
->lock
);
3097 inc_stats(disp
->mgr
, dns_resstatscounter_dispsockfail
);
3101 localport
= disp
->localport
;
3105 * Try somewhat hard to find an unique ID.
3107 id
= (dns_messageid_t
)dispatch_random(DISP_ARC4CTX(disp
));
3108 bucket
= dns_hash(qid
, dest
, id
, localport
);
3110 for (i
= 0; i
< 64; i
++) {
3111 if (entry_search(qid
, dest
, id
, localport
, bucket
) == NULL
) {
3115 id
+= qid
->qid_increment
;
3117 bucket
= dns_hash(qid
, dest
, id
, localport
);
3122 UNLOCK(&disp
->lock
);
3123 return (ISC_R_NOMORE
);
3126 res
= isc_mempool_get(disp
->mgr
->rpool
);
3129 UNLOCK(&disp
->lock
);
3130 if (dispsocket
!= NULL
)
3131 destroy_dispsocket(disp
, &dispsocket
);
3132 return (ISC_R_NOMEMORY
);
3138 isc_task_attach(task
, &res
->task
);
3141 res
->port
= localport
;
3142 res
->bucket
= bucket
;
3144 res
->action
= action
;
3146 res
->dispsocket
= dispsocket
;
3147 if (dispsocket
!= NULL
)
3148 dispsocket
->resp
= res
;
3149 res
->item_out
= ISC_FALSE
;
3150 ISC_LIST_INIT(res
->items
);
3151 ISC_LINK_INIT(res
, link
);
3152 res
->magic
= RESPONSE_MAGIC
;
3153 ISC_LIST_APPEND(qid
->qid_table
[bucket
], res
, link
);
3156 request_log(disp
, res
, LVL(90),
3157 "attached to task %p", res
->task
);
3159 if (((disp
->attributes
& DNS_DISPATCHATTR_UDP
) != 0) ||
3160 ((disp
->attributes
& DNS_DISPATCHATTR_CONNECTED
) != 0)) {
3161 result
= startrecv(disp
, dispsocket
);
3162 if (result
!= ISC_R_SUCCESS
) {
3164 ISC_LIST_UNLINK(qid
->qid_table
[bucket
], res
, link
);
3167 if (dispsocket
!= NULL
)
3168 destroy_dispsocket(disp
, &dispsocket
);
3173 UNLOCK(&disp
->lock
);
3174 isc_task_detach(&res
->task
);
3175 isc_mempool_put(disp
->mgr
->rpool
, res
);
3180 if (dispsocket
!= NULL
)
3181 ISC_LIST_APPEND(disp
->activesockets
, dispsocket
, link
);
3183 UNLOCK(&disp
->lock
);
3188 if ((disp
->attributes
& DNS_DISPATCHATTR_EXCLUSIVE
) != 0)
3189 INSIST(res
->dispsocket
!= NULL
);
3191 return (ISC_R_SUCCESS
);
3195 dns_dispatch_addresponse(dns_dispatch_t
*disp
, isc_sockaddr_t
*dest
,
3196 isc_task_t
*task
, isc_taskaction_t action
, void *arg
,
3197 dns_messageid_t
*idp
, dns_dispentry_t
**resp
)
3199 REQUIRE(VALID_DISPATCH(disp
));
3200 REQUIRE((disp
->attributes
& DNS_DISPATCHATTR_EXCLUSIVE
) == 0);
3202 return (dns_dispatch_addresponse2(disp
, dest
, task
, action
, arg
,
3207 dns_dispatch_starttcp(dns_dispatch_t
*disp
) {
3209 REQUIRE(VALID_DISPATCH(disp
));
3211 dispatch_log(disp
, LVL(90), "starttcp %p", disp
->task
[0]);
3214 disp
->attributes
|= DNS_DISPATCHATTR_CONNECTED
;
3215 (void)startrecv(disp
, NULL
);
3216 UNLOCK(&disp
->lock
);
3220 dns_dispatch_removeresponse(dns_dispentry_t
**resp
,
3221 dns_dispatchevent_t
**sockevent
)
3223 dns_dispatchmgr_t
*mgr
;
3224 dns_dispatch_t
*disp
;
3225 dns_dispentry_t
*res
;
3226 dispsocket_t
*dispsock
;
3227 dns_dispatchevent_t
*ev
;
3228 unsigned int bucket
;
3229 isc_boolean_t killit
;
3231 isc_eventlist_t events
;
3234 REQUIRE(resp
!= NULL
);
3235 REQUIRE(VALID_RESPONSE(*resp
));
3241 REQUIRE(VALID_DISPATCH(disp
));
3243 REQUIRE(VALID_DISPATCHMGR(mgr
));
3245 qid
= DNS_QID(disp
);
3247 if (sockevent
!= NULL
) {
3248 REQUIRE(*sockevent
!= NULL
);
3257 INSIST(disp
->requests
> 0);
3259 INSIST(disp
->refcount
> 0);
3262 if (disp
->refcount
== 0) {
3263 if (disp
->recv_pending
> 0)
3264 isc_socket_cancel(disp
->socket
, disp
->task
[0],
3265 ISC_SOCKCANCEL_RECV
);
3266 for (dispsock
= ISC_LIST_HEAD(disp
->activesockets
);
3268 dispsock
= ISC_LIST_NEXT(dispsock
, link
)) {
3269 isc_socket_cancel(dispsock
->socket
, dispsock
->task
,
3270 ISC_SOCKCANCEL_RECV
);
3272 disp
->shutting_down
= 1;
3275 bucket
= res
->bucket
;
3278 ISC_LIST_UNLINK(qid
->qid_table
[bucket
], res
, link
);
3281 if (ev
== NULL
&& res
->item_out
) {
3283 * We've posted our event, but the caller hasn't gotten it
3284 * yet. Take it back.
3286 ISC_LIST_INIT(events
);
3287 n
= isc_task_unsend(res
->task
, res
, DNS_EVENT_DISPATCH
,
3290 * We had better have gotten it back.
3293 ev
= (dns_dispatchevent_t
*)ISC_LIST_HEAD(events
);
3297 REQUIRE(res
->item_out
== ISC_TRUE
);
3298 res
->item_out
= ISC_FALSE
;
3299 if (ev
->buffer
.base
!= NULL
)
3300 free_buffer(disp
, ev
->buffer
.base
, ev
->buffer
.length
);
3301 free_event(disp
, ev
);
3304 request_log(disp
, res
, LVL(90), "detaching from task %p", res
->task
);
3305 isc_task_detach(&res
->task
);
3307 if (res
->dispsocket
!= NULL
) {
3308 isc_socket_cancel(res
->dispsocket
->socket
,
3309 res
->dispsocket
->task
, ISC_SOCKCANCEL_RECV
);
3310 res
->dispsocket
->resp
= NULL
;
3314 * Free any buffered requests as well
3316 ev
= ISC_LIST_HEAD(res
->items
);
3317 while (ev
!= NULL
) {
3318 ISC_LIST_UNLINK(res
->items
, ev
, ev_link
);
3319 if (ev
->buffer
.base
!= NULL
)
3320 free_buffer(disp
, ev
->buffer
.base
, ev
->buffer
.length
);
3321 free_event(disp
, ev
);
3322 ev
= ISC_LIST_HEAD(res
->items
);
3325 isc_mempool_put(disp
->mgr
->rpool
, res
);
3326 if (disp
->shutting_down
== 1)
3329 (void)startrecv(disp
, NULL
);
3331 killit
= destroy_disp_ok(disp
);
3332 UNLOCK(&disp
->lock
);
3334 isc_task_send(disp
->task
[0], &disp
->ctlevent
);
3338 do_cancel(dns_dispatch_t
*disp
) {
3339 dns_dispatchevent_t
*ev
;
3340 dns_dispentry_t
*resp
;
3343 if (disp
->shutdown_out
== 1)
3346 qid
= DNS_QID(disp
);
3349 * Search for the first response handler without packets outstanding
3350 * unless a specific hander is given.
3353 for (resp
= linear_first(qid
);
3354 resp
!= NULL
&& resp
->item_out
;
3356 resp
= linear_next(qid
, resp
);
3359 * No one to send the cancel event to, so nothing to do.
3365 * Send the shutdown failsafe event to this resp.
3367 ev
= disp
->failsafe_ev
;
3368 ISC_EVENT_INIT(ev
, sizeof(*ev
), 0, NULL
, DNS_EVENT_DISPATCH
,
3369 resp
->action
, resp
->arg
, resp
, NULL
, NULL
);
3370 ev
->result
= disp
->shutdown_why
;
3371 ev
->buffer
.base
= NULL
;
3372 ev
->buffer
.length
= 0;
3373 disp
->shutdown_out
= 1;
3374 request_log(disp
, resp
, LVL(10),
3375 "cancel: failsafe event %p -> task %p",
3377 resp
->item_out
= ISC_TRUE
;
3378 isc_task_send(resp
->task
, ISC_EVENT_PTR(&ev
));
3384 dns_dispatch_getsocket(dns_dispatch_t
*disp
) {
3385 REQUIRE(VALID_DISPATCH(disp
));
3387 return (disp
->socket
);
3391 dns_dispatch_getentrysocket(dns_dispentry_t
*resp
) {
3392 REQUIRE(VALID_RESPONSE(resp
));
3394 if (resp
->dispsocket
!= NULL
)
3395 return (resp
->dispsocket
->socket
);
3401 dns_dispatch_getlocaladdress(dns_dispatch_t
*disp
, isc_sockaddr_t
*addrp
) {
3403 REQUIRE(VALID_DISPATCH(disp
));
3404 REQUIRE(addrp
!= NULL
);
3406 if (disp
->socktype
== isc_sockettype_udp
) {
3407 *addrp
= disp
->local
;
3408 return (ISC_R_SUCCESS
);
3410 return (ISC_R_NOTIMPLEMENTED
);
3414 dns_dispatch_cancel(dns_dispatch_t
*disp
) {
3415 REQUIRE(VALID_DISPATCH(disp
));
3419 if (disp
->shutting_down
== 1) {
3420 UNLOCK(&disp
->lock
);
3424 disp
->shutdown_why
= ISC_R_CANCELED
;
3425 disp
->shutting_down
= 1;
3428 UNLOCK(&disp
->lock
);
3434 dns_dispatch_getattributes(dns_dispatch_t
*disp
) {
3435 REQUIRE(VALID_DISPATCH(disp
));
3438 * We don't bother locking disp here; it's the caller's responsibility
3439 * to use only non volatile flags.
3441 return (disp
->attributes
);
3445 dns_dispatch_changeattributes(dns_dispatch_t
*disp
,
3446 unsigned int attributes
, unsigned int mask
)
3448 REQUIRE(VALID_DISPATCH(disp
));
3449 /* Exclusive attribute can only be set on creation */
3450 REQUIRE((attributes
& DNS_DISPATCHATTR_EXCLUSIVE
) == 0);
3451 /* Also, a dispatch with randomport specified cannot start listening */
3452 REQUIRE((disp
->attributes
& DNS_DISPATCHATTR_EXCLUSIVE
) == 0 ||
3453 (attributes
& DNS_DISPATCHATTR_NOLISTEN
) == 0);
3456 * Should check for valid attributes here!
3461 if ((mask
& DNS_DISPATCHATTR_NOLISTEN
) != 0) {
3462 if ((disp
->attributes
& DNS_DISPATCHATTR_NOLISTEN
) != 0 &&
3463 (attributes
& DNS_DISPATCHATTR_NOLISTEN
) == 0) {
3464 disp
->attributes
&= ~DNS_DISPATCHATTR_NOLISTEN
;
3465 (void)startrecv(disp
, NULL
);
3466 } else if ((disp
->attributes
& DNS_DISPATCHATTR_NOLISTEN
)
3468 (attributes
& DNS_DISPATCHATTR_NOLISTEN
) != 0) {
3469 disp
->attributes
|= DNS_DISPATCHATTR_NOLISTEN
;
3470 if (disp
->recv_pending
!= 0)
3471 isc_socket_cancel(disp
->socket
, disp
->task
[0],
3472 ISC_SOCKCANCEL_RECV
);
3476 disp
->attributes
&= ~mask
;
3477 disp
->attributes
|= (attributes
& mask
);
3478 UNLOCK(&disp
->lock
);
3482 dns_dispatch_importrecv(dns_dispatch_t
*disp
, isc_event_t
*event
) {
3484 isc_socketevent_t
*sevent
, *newsevent
;
3486 REQUIRE(VALID_DISPATCH(disp
));
3487 REQUIRE((disp
->attributes
& DNS_DISPATCHATTR_NOLISTEN
) != 0);
3488 REQUIRE(event
!= NULL
);
3490 sevent
= (isc_socketevent_t
*)event
;
3492 INSIST(sevent
->n
<= disp
->mgr
->buffersize
);
3493 newsevent
= (isc_socketevent_t
*)
3494 isc_event_allocate(disp
->mgr
->mctx
, NULL
,
3495 DNS_EVENT_IMPORTRECVDONE
, udp_shrecv
,
3496 disp
, sizeof(isc_socketevent_t
));
3497 if (newsevent
== NULL
)
3500 buf
= allocate_udp_buffer(disp
);
3502 isc_event_free(ISC_EVENT_PTR(&newsevent
));
3505 memcpy(buf
, sevent
->region
.base
, sevent
->n
);
3506 newsevent
->region
.base
= buf
;
3507 newsevent
->region
.length
= disp
->mgr
->buffersize
;
3508 newsevent
->n
= sevent
->n
;
3509 newsevent
->result
= sevent
->result
;
3510 newsevent
->address
= sevent
->address
;
3511 newsevent
->timestamp
= sevent
->timestamp
;
3512 newsevent
->pktinfo
= sevent
->pktinfo
;
3513 newsevent
->attributes
= sevent
->attributes
;
3515 isc_task_send(disp
->task
[0], ISC_EVENT_PTR(&newsevent
));
3520 dns_dispatchmgr_dump(dns_dispatchmgr_t
*mgr
) {
3521 dns_dispatch_t
*disp
;
3524 disp
= ISC_LIST_HEAD(mgr
->list
);
3525 while (disp
!= NULL
) {
3526 isc_sockaddr_format(&disp
->local
, foo
, sizeof(foo
));
3527 printf("\tdispatch %p, addr %s\n", disp
, foo
);
3528 disp
= ISC_LIST_NEXT(disp
, link
);