Sync usage with man page.
[netbsd-mini2440.git] / external / bsd / bind / dist / lib / dns / rbtdb.c
blob009de37a68d58583dd106215d15b95877966c588
1 /* $NetBSD: rbtdb.c,v 1.1.1.4 2009/12/26 22:24:50 christos Exp $ */
3 /*
4 * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
5 * Copyright (C) 1999-2003 Internet Software Consortium.
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
20 /* Id: rbtdb.c,v 1.292 2009/11/26 23:48:14 tbox Exp */
22 /*! \file */
25 * Principal Author: Bob Halley
28 #include <config.h>
30 /* #define inline */
32 #include <isc/event.h>
33 #include <isc/heap.h>
34 #include <isc/mem.h>
35 #include <isc/mutex.h>
36 #include <isc/platform.h>
37 #include <isc/print.h>
38 #include <isc/random.h>
39 #include <isc/refcount.h>
40 #include <isc/rwlock.h>
41 #include <isc/serial.h>
42 #include <isc/string.h>
43 #include <isc/task.h>
44 #include <isc/time.h>
45 #include <isc/util.h>
47 #include <dns/acache.h>
48 #include <dns/db.h>
49 #include <dns/dbiterator.h>
50 #include <dns/events.h>
51 #include <dns/fixedname.h>
52 #include <dns/lib.h>
53 #include <dns/log.h>
54 #include <dns/masterdump.h>
55 #include <dns/nsec.h>
56 #include <dns/nsec3.h>
57 #include <dns/rbt.h>
58 #include <dns/rdata.h>
59 #include <dns/rdataset.h>
60 #include <dns/rdatasetiter.h>
61 #include <dns/rdataslab.h>
62 #include <dns/rdatastruct.h>
63 #include <dns/result.h>
64 #include <dns/stats.h>
65 #include <dns/view.h>
66 #include <dns/zone.h>
67 #include <dns/zonekey.h>
69 #ifdef DNS_RBTDB_VERSION64
70 #include "rbtdb64.h"
71 #else
72 #include "rbtdb.h"
73 #endif
75 #ifdef DNS_RBTDB_VERSION64
76 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
77 #else
78 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
79 #endif
81 /*%
82 * Note that "impmagic" is not the first four bytes of the struct, so
83 * ISC_MAGIC_VALID cannot be used.
85 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
86 (rbtdb)->common.impmagic == RBTDB_MAGIC)
88 #ifdef DNS_RBTDB_VERSION64
89 typedef isc_uint64_t rbtdb_serial_t;
90 /*%
91 * Make casting easier in symbolic debuggers by using different names
92 * for the 64 bit version.
94 #define dns_rbtdb_t dns_rbtdb64_t
95 #define rdatasetheader_t rdatasetheader64_t
96 #define rbtdb_version_t rbtdb_version64_t
97 #else
98 typedef isc_uint32_t rbtdb_serial_t;
99 #endif
101 typedef isc_uint32_t rbtdb_rdatatype_t;
103 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
104 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
105 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
107 #define RBTDB_RDATATYPE_SIGNSEC \
108 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
109 #define RBTDB_RDATATYPE_SIGNSEC3 \
110 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
111 #define RBTDB_RDATATYPE_SIGNS \
112 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
113 #define RBTDB_RDATATYPE_SIGCNAME \
114 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
115 #define RBTDB_RDATATYPE_SIGDNAME \
116 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
117 #define RBTDB_RDATATYPE_NCACHEANY \
118 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
121 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
122 * Using rwlock is effective with regard to lookup performance only when
123 * it is implemented in an efficient way.
124 * Otherwise, it is generally wise to stick to the simple locking since rwlock
125 * would require more memory or can even make lookups slower due to its own
126 * overhead (when it internally calls mutex locks).
128 #ifdef ISC_RWLOCK_USEATOMIC
129 #define DNS_RBTDB_USERWLOCK 1
130 #else
131 #define DNS_RBTDB_USERWLOCK 0
132 #endif
134 #if DNS_RBTDB_USERWLOCK
135 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
136 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
137 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
138 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
139 #else
140 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
141 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
142 #define RBTDB_LOCK(l, t) LOCK(l)
143 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
144 #endif
147 * Since node locking is sensitive to both performance and memory footprint,
148 * we need some trick here. If we have both high-performance rwlock and
149 * high performance and small-memory reference counters, we use rwlock for
150 * node lock and isc_refcount for node references. In this case, we don't have
151 * to protect the access to the counters by locks.
152 * Otherwise, we simply use ordinary mutex lock for node locking, and use
153 * simple integers as reference counters which is protected by the lock.
154 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
155 * NODE_UNLOCK. In some other cases, however, we need to protect reference
156 * counters first and then protect other parts of a node as read-only data.
157 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
158 * provided for these special cases. When we can use the efficient backend
159 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
160 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
161 * section including the access to the reference counter.
162 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
163 * section is also protected by NODE_STRONGLOCK().
165 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
166 typedef isc_rwlock_t nodelock_t;
168 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
169 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
170 #define NODE_LOCK(l, t) RWLOCK((l), (t))
171 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
172 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
174 #define NODE_STRONGLOCK(l) ((void)0)
175 #define NODE_STRONGUNLOCK(l) ((void)0)
176 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
177 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
178 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
179 #else
180 typedef isc_mutex_t nodelock_t;
182 #define NODE_INITLOCK(l) isc_mutex_init(l)
183 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
184 #define NODE_LOCK(l, t) LOCK(l)
185 #define NODE_UNLOCK(l, t) UNLOCK(l)
186 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
188 #define NODE_STRONGLOCK(l) LOCK(l)
189 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
190 #define NODE_WEAKLOCK(l, t) ((void)0)
191 #define NODE_WEAKUNLOCK(l, t) ((void)0)
192 #define NODE_WEAKDOWNGRADE(l) ((void)0)
193 #endif
196 * Whether to rate-limit updating the LRU to avoid possible thread contention.
197 * Our performance measurement has shown the cost is marginal, so it's defined
198 * to be 0 by default either with or without threads.
200 #ifndef DNS_RBTDB_LIMITLRUUPDATE
201 #define DNS_RBTDB_LIMITLRUUPDATE 0
202 #endif
205 * Allow clients with a virtual time of up to 5 minutes in the past to see
206 * records that would have otherwise have expired.
208 #define RBTDB_VIRTUAL 300
210 struct noqname {
211 dns_name_t name;
212 void * neg;
213 void * negsig;
214 dns_rdatatype_t type;
217 typedef struct acachectl acachectl_t;
219 typedef struct rdatasetheader {
221 * Locked by the owning node's lock.
223 rbtdb_serial_t serial;
224 dns_ttl_t rdh_ttl;
225 rbtdb_rdatatype_t type;
226 isc_uint16_t attributes;
227 dns_trust_t trust;
228 struct noqname *noqname;
229 struct noqname *closest;
230 /*%<
231 * We don't use the LIST macros, because the LIST structure has
232 * both head and tail pointers, and is doubly linked.
235 struct rdatasetheader *next;
236 /*%<
237 * If this is the top header for an rdataset, 'next' points
238 * to the top header for the next rdataset (i.e., the next type).
239 * Otherwise, it points up to the header whose down pointer points
240 * at this header.
243 struct rdatasetheader *down;
244 /*%<
245 * Points to the header for the next older version of
246 * this rdataset.
249 isc_uint32_t count;
250 /*%<
251 * Monotonously increased every time this rdataset is bound so that
252 * it is used as the base of the starting point in DNS responses
253 * when the "cyclic" rrset-order is required. Since the ordering
254 * should not be so crucial, no lock is set for the counter for
255 * performance reasons.
258 acachectl_t *additional_auth;
259 acachectl_t *additional_glue;
261 dns_rbtnode_t *node;
262 isc_stdtime_t last_used;
263 ISC_LINK(struct rdatasetheader) link;
265 unsigned int heap_index;
266 /*%<
267 * Used for TTL-based cache cleaning.
269 isc_stdtime_t resign;
270 } rdatasetheader_t;
272 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
273 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
275 #define RDATASET_ATTR_NONEXISTENT 0x0001
276 #define RDATASET_ATTR_STALE 0x0002
277 #define RDATASET_ATTR_IGNORE 0x0004
278 #define RDATASET_ATTR_RETAIN 0x0008
279 #define RDATASET_ATTR_NXDOMAIN 0x0010
280 #define RDATASET_ATTR_RESIGN 0x0020
281 #define RDATASET_ATTR_STATCOUNT 0x0040
282 #define RDATASET_ATTR_OPTOUT 0x0080
284 typedef struct acache_cbarg {
285 dns_rdatasetadditional_t type;
286 unsigned int count;
287 dns_db_t *db;
288 dns_dbnode_t *node;
289 rdatasetheader_t *header;
290 } acache_cbarg_t;
292 struct acachectl {
293 dns_acacheentry_t *entry;
294 acache_cbarg_t *cbarg;
298 * XXX
299 * When the cache will pre-expire data (due to memory low or other
300 * situations) before the rdataset's TTL has expired, it MUST
301 * respect the RETAIN bit and not expire the data until its TTL is
302 * expired.
305 #undef IGNORE /* WIN32 winbase.h defines this. */
307 #define EXISTS(header) \
308 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
309 #define NONEXISTENT(header) \
310 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
311 #define IGNORE(header) \
312 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
313 #define RETAIN(header) \
314 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
315 #define NXDOMAIN(header) \
316 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
317 #define RESIGN(header) \
318 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
319 #define OPTOUT(header) \
320 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
322 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
325 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
326 * There is a tradeoff issue about configuring this value: if this is too
327 * small, it may cause heavier contention between threads; if this is too large,
328 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
329 * The default value should work well for most environments, but this can
330 * also be configurable at compilation time via the
331 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
332 * 1 due to the assumption of overmem_purge().
334 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
335 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
336 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
337 #else
338 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
339 #endif
340 #else
341 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
342 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
344 typedef struct {
345 nodelock_t lock;
346 /* Protected in the refcount routines. */
347 isc_refcount_t references;
348 /* Locked by lock. */
349 isc_boolean_t exiting;
350 } rbtdb_nodelock_t;
352 typedef struct rbtdb_changed {
353 dns_rbtnode_t * node;
354 isc_boolean_t dirty;
355 ISC_LINK(struct rbtdb_changed) link;
356 } rbtdb_changed_t;
358 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
360 typedef enum {
361 dns_db_insecure,
362 dns_db_partial,
363 dns_db_secure
364 } dns_db_secure_t;
366 typedef struct rbtdb_version {
367 /* Not locked */
368 rbtdb_serial_t serial;
370 * Protected in the refcount routines.
371 * XXXJT: should we change the lock policy based on the refcount
372 * performance?
374 isc_refcount_t references;
375 /* Locked by database lock. */
376 isc_boolean_t writer;
377 isc_boolean_t commit_ok;
378 rbtdb_changedlist_t changed_list;
379 rdatasetheaderlist_t resigned_list;
380 ISC_LINK(struct rbtdb_version) link;
381 dns_db_secure_t secure;
382 isc_boolean_t havensec3;
383 /* NSEC3 parameters */
384 dns_hash_t hash;
385 isc_uint8_t flags;
386 isc_uint16_t iterations;
387 isc_uint8_t salt_length;
388 unsigned char salt[DNS_NSEC3_SALTSIZE];
389 } rbtdb_version_t;
391 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
393 typedef struct {
394 /* Unlocked. */
395 dns_db_t common;
396 #if DNS_RBTDB_USERWLOCK
397 isc_rwlock_t lock;
398 #else
399 isc_mutex_t lock;
400 #endif
401 isc_rwlock_t tree_lock;
402 unsigned int node_lock_count;
403 rbtdb_nodelock_t * node_locks;
404 dns_rbtnode_t * origin_node;
405 dns_stats_t * rrsetstats; /* cache DB only */
406 /* Locked by lock. */
407 unsigned int active;
408 isc_refcount_t references;
409 unsigned int attributes;
410 rbtdb_serial_t current_serial;
411 rbtdb_serial_t least_serial;
412 rbtdb_serial_t next_serial;
413 rbtdb_version_t * current_version;
414 rbtdb_version_t * future_version;
415 rbtdb_versionlist_t open_versions;
416 isc_boolean_t overmem;
417 isc_task_t * task;
418 dns_dbnode_t *soanode;
419 dns_dbnode_t *nsnode;
422 * This is a linked list used to implement the LRU cache. There will
423 * be node_lock_count linked lists here. Nodes in bucket 1 will be
424 * placed on the linked list rdatasets[1].
426 rdatasetheaderlist_t *rdatasets;
429 * Temporary storage for stale cache nodes and dynamically deleted
430 * nodes that await being cleaned up.
432 rbtnodelist_t *deadnodes;
435 * Heaps. Each of these is used for TTL based expiry.
437 isc_heap_t **heaps;
439 /* Locked by tree_lock. */
440 dns_rbt_t * tree;
441 dns_rbt_t * nsec;
442 dns_rbt_t * nsec3;
444 /* Unlocked */
445 unsigned int quantum;
446 } dns_rbtdb_t;
448 #define RBTDB_ATTR_LOADED 0x01
449 #define RBTDB_ATTR_LOADING 0x02
452 * Search Context
454 typedef struct {
455 dns_rbtdb_t * rbtdb;
456 rbtdb_version_t * rbtversion;
457 rbtdb_serial_t serial;
458 unsigned int options;
459 dns_rbtnodechain_t chain;
460 isc_boolean_t copy_name;
461 isc_boolean_t need_cleanup;
462 isc_boolean_t wild;
463 dns_rbtnode_t * zonecut;
464 rdatasetheader_t * zonecut_rdataset;
465 rdatasetheader_t * zonecut_sigrdataset;
466 dns_fixedname_t zonecut_name;
467 isc_stdtime_t now;
468 } rbtdb_search_t;
471 * Load Context
473 typedef struct {
474 dns_rbtdb_t * rbtdb;
475 isc_stdtime_t now;
476 } rbtdb_load_t;
478 static void rdataset_disassociate(dns_rdataset_t *rdataset);
479 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
480 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
481 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
482 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
483 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
484 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
485 dns_name_t *name,
486 dns_rdataset_t *neg,
487 dns_rdataset_t *negsig);
488 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
489 dns_name_t *name,
490 dns_rdataset_t *neg,
491 dns_rdataset_t *negsig);
492 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
493 dns_rdatasetadditional_t type,
494 dns_rdatatype_t qtype,
495 dns_acache_t *acache,
496 dns_zone_t **zonep,
497 dns_db_t **dbp,
498 dns_dbversion_t **versionp,
499 dns_dbnode_t **nodep,
500 dns_name_t *fname,
501 dns_message_t *msg,
502 isc_stdtime_t now);
503 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
504 dns_rdatasetadditional_t type,
505 dns_rdatatype_t qtype,
506 dns_acache_t *acache,
507 dns_zone_t *zone,
508 dns_db_t *db,
509 dns_dbversion_t *version,
510 dns_dbnode_t *node,
511 dns_name_t *fname);
512 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
513 dns_rdataset_t *rdataset,
514 dns_rdatasetadditional_t type,
515 dns_rdatatype_t qtype);
516 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
517 isc_stdtime_t now);
518 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
519 isc_stdtime_t now);
520 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
521 isc_boolean_t tree_locked);
522 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
523 isc_stdtime_t now, isc_boolean_t tree_locked);
524 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
525 rdatasetheader_t *newheader);
526 static void prune_tree(isc_task_t *task, isc_event_t *event);
528 static dns_rdatasetmethods_t rdataset_methods = {
529 rdataset_disassociate,
530 rdataset_first,
531 rdataset_next,
532 rdataset_current,
533 rdataset_clone,
534 rdataset_count,
535 NULL,
536 rdataset_getnoqname,
537 NULL,
538 rdataset_getclosest,
539 rdataset_getadditional,
540 rdataset_setadditional,
541 rdataset_putadditional
544 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
545 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
546 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
547 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
548 dns_rdataset_t *rdataset);
550 static dns_rdatasetitermethods_t rdatasetiter_methods = {
551 rdatasetiter_destroy,
552 rdatasetiter_first,
553 rdatasetiter_next,
554 rdatasetiter_current
557 typedef struct rbtdb_rdatasetiter {
558 dns_rdatasetiter_t common;
559 rdatasetheader_t * current;
560 } rbtdb_rdatasetiter_t;
562 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
563 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
564 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
565 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
566 dns_name_t *name);
567 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
568 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
569 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
570 dns_dbnode_t **nodep,
571 dns_name_t *name);
572 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
573 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
574 dns_name_t *name);
576 static dns_dbiteratormethods_t dbiterator_methods = {
577 dbiterator_destroy,
578 dbiterator_first,
579 dbiterator_last,
580 dbiterator_seek,
581 dbiterator_prev,
582 dbiterator_next,
583 dbiterator_current,
584 dbiterator_pause,
585 dbiterator_origin
588 #define DELETION_BATCH_MAX 64
591 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
593 typedef struct rbtdb_dbiterator {
594 dns_dbiterator_t common;
595 isc_boolean_t paused;
596 isc_boolean_t new_origin;
597 isc_rwlocktype_t tree_locked;
598 isc_result_t result;
599 dns_fixedname_t name;
600 dns_fixedname_t origin;
601 dns_rbtnodechain_t chain;
602 dns_rbtnodechain_t nsec3chain;
603 dns_rbtnodechain_t *current;
604 dns_rbtnode_t *node;
605 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
606 int delete;
607 isc_boolean_t nsec3only;
608 isc_boolean_t nonsec3;
609 } rbtdb_dbiterator_t;
612 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
613 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
615 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
616 isc_event_t *event);
617 static void overmem(dns_db_t *db, isc_boolean_t overmem);
618 #ifdef BIND9
619 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
620 #endif
623 * 'init_count' is used to initialize 'newheader->count' which inturn
624 * is used to determine where in the cycle rrset-order cyclic starts.
625 * We don't lock this as we don't care about simultaneous updates.
627 * Note:
628 * Both init_count and header->count can be ISC_UINT32_MAX.
629 * The count on the returned rdataset however can't be as
630 * that indicates that the database does not implement cyclic
631 * processing.
633 static unsigned int init_count;
636 * Locking
638 * If a routine is going to lock more than one lock in this module, then
639 * the locking must be done in the following order:
641 * Tree Lock
643 * Node Lock (Only one from the set may be locked at one time by
644 * any caller)
646 * Database Lock
648 * Failure to follow this hierarchy can result in deadlock.
652 * Deleting Nodes
654 * For zone databases the node for the origin of the zone MUST NOT be deleted.
659 * DB Routines
662 static void
663 attach(dns_db_t *source, dns_db_t **targetp) {
664 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
666 REQUIRE(VALID_RBTDB(rbtdb));
668 isc_refcount_increment(&rbtdb->references, NULL);
670 *targetp = source;
673 static void
674 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
675 dns_rbtdb_t *rbtdb = event->ev_arg;
677 UNUSED(task);
679 free_rbtdb(rbtdb, ISC_TRUE, event);
682 static void
683 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
684 isc_boolean_t increment)
686 dns_rdatastatstype_t statattributes = 0;
687 dns_rdatastatstype_t base = 0;
688 dns_rdatastatstype_t type;
690 /* At the moment we count statistics only for cache DB */
691 INSIST(IS_CACHE(rbtdb));
693 if (NXDOMAIN(header))
694 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
695 else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
696 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
697 base = RBTDB_RDATATYPE_EXT(header->type);
698 } else
699 base = RBTDB_RDATATYPE_BASE(header->type);
701 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
702 if (increment)
703 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
704 else
705 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
708 static void
709 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
710 int idx;
711 isc_heap_t *heap;
712 dns_ttl_t oldttl;
714 oldttl = header->rdh_ttl;
715 header->rdh_ttl = newttl;
717 if (!IS_CACHE(rbtdb))
718 return;
721 * It's possible the rbtdb is not a cache. If this is the case,
722 * we will not have a heap, and we move on. If we do, though,
723 * we might need to adjust things.
725 if (header->heap_index == 0 || newttl == oldttl)
726 return;
727 idx = header->node->locknum;
728 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
729 return;
730 heap = rbtdb->heaps[idx];
732 if (newttl < oldttl)
733 isc_heap_increased(heap, header->heap_index);
734 else
735 isc_heap_decreased(heap, header->heap_index);
739 * These functions allow the heap code to rank the priority of each
740 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
742 static isc_boolean_t
743 ttl_sooner(void *v1, void *v2) {
744 rdatasetheader_t *h1 = v1;
745 rdatasetheader_t *h2 = v2;
747 if (h1->rdh_ttl < h2->rdh_ttl)
748 return (ISC_TRUE);
749 return (ISC_FALSE);
752 static isc_boolean_t
753 resign_sooner(void *v1, void *v2) {
754 rdatasetheader_t *h1 = v1;
755 rdatasetheader_t *h2 = v2;
757 if (h1->resign < h2->resign)
758 return (ISC_TRUE);
759 return (ISC_FALSE);
763 * This function sets the heap index into the header.
765 static void
766 set_index(void *what, unsigned int index) {
767 rdatasetheader_t *h = what;
769 h->heap_index = index;
773 * Work out how many nodes can be deleted in the time between two
774 * requests to the nameserver. Smooth the resulting number and use it
775 * as a estimate for the number of nodes to be deleted in the next
776 * iteration.
778 static unsigned int
779 adjust_quantum(unsigned int old, isc_time_t *start) {
780 unsigned int pps = dns_pps; /* packets per second */
781 unsigned int interval;
782 isc_uint64_t usecs;
783 isc_time_t end;
784 unsigned int new;
786 if (pps < 100)
787 pps = 100;
788 isc_time_now(&end);
790 interval = 1000000 / pps; /* interval in usec */
791 if (interval == 0)
792 interval = 1;
793 usecs = isc_time_microdiff(&end, start);
794 if (usecs == 0) {
796 * We were unable to measure the amount of time taken.
797 * Double the nodes deleted next time.
799 old *= 2;
800 if (old > 1000)
801 old = 1000;
802 return (old);
804 new = old * interval;
805 new /= (unsigned int)usecs;
806 if (new == 0)
807 new = 1;
808 else if (new > 1000)
809 new = 1000;
811 /* Smooth */
812 new = (new + old * 3) / 4;
814 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
815 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
817 return (new);
820 static void
821 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
822 unsigned int i;
823 isc_ondestroy_t ondest;
824 isc_result_t result;
825 char buf[DNS_NAME_FORMATSIZE];
826 dns_rbt_t **treep;
827 isc_time_t start;
829 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
830 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
832 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
833 REQUIRE(rbtdb->future_version == NULL);
835 if (rbtdb->current_version != NULL) {
836 unsigned int refs;
838 isc_refcount_decrement(&rbtdb->current_version->references,
839 &refs);
840 INSIST(refs == 0);
841 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
842 isc_refcount_destroy(&rbtdb->current_version->references);
843 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
844 sizeof(rbtdb_version_t));
848 * We assume the number of remaining dead nodes is reasonably small;
849 * the overhead of unlinking all nodes here should be negligible.
851 for (i = 0; i < rbtdb->node_lock_count; i++) {
852 dns_rbtnode_t *node;
854 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
855 while (node != NULL) {
856 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
857 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
861 if (event == NULL)
862 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
864 for (;;) {
866 * pick the next tree to (start to) destroy
868 treep = &rbtdb->tree;
869 if (*treep == NULL) {
870 treep = &rbtdb->nsec;
871 if (*treep == NULL) {
872 treep = &rbtdb->nsec3;
874 * we're finished after clear cutting
876 if (*treep == NULL)
877 break;
881 isc_time_now(&start);
882 result = dns_rbt_destroy2(treep, rbtdb->quantum);
883 if (result == ISC_R_QUOTA) {
884 INSIST(rbtdb->task != NULL);
885 if (rbtdb->quantum != 0)
886 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
887 &start);
888 if (event == NULL)
889 event = isc_event_allocate(rbtdb->common.mctx,
890 NULL,
891 DNS_EVENT_FREESTORAGE,
892 free_rbtdb_callback,
893 rbtdb,
894 sizeof(isc_event_t));
895 if (event == NULL)
896 continue;
897 isc_task_send(rbtdb->task, &event);
898 return;
900 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
903 if (event != NULL)
904 isc_event_free(&event);
905 if (log) {
906 if (dns_name_dynamic(&rbtdb->common.origin))
907 dns_name_format(&rbtdb->common.origin, buf,
908 sizeof(buf));
909 else
910 strcpy(buf, "<UNKNOWN>");
911 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
912 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
913 "done free_rbtdb(%s)", buf);
915 if (dns_name_dynamic(&rbtdb->common.origin))
916 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
917 for (i = 0; i < rbtdb->node_lock_count; i++) {
918 isc_refcount_destroy(&rbtdb->node_locks[i].references);
919 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
923 * Clean up LRU / re-signing order lists.
925 if (rbtdb->rdatasets != NULL) {
926 for (i = 0; i < rbtdb->node_lock_count; i++)
927 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
928 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
929 rbtdb->node_lock_count *
930 sizeof(rdatasetheaderlist_t));
933 * Clean up dead node buckets.
935 if (rbtdb->deadnodes != NULL) {
936 for (i = 0; i < rbtdb->node_lock_count; i++)
937 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
938 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
939 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
942 * Clean up heap objects.
944 if (rbtdb->heaps != NULL) {
945 for (i = 0; i < rbtdb->node_lock_count; i++)
946 isc_heap_destroy(&rbtdb->heaps[i]);
947 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
948 rbtdb->node_lock_count *
949 sizeof(isc_heap_t *));
952 if (rbtdb->rrsetstats != NULL)
953 dns_stats_detach(&rbtdb->rrsetstats);
955 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
956 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
957 isc_rwlock_destroy(&rbtdb->tree_lock);
958 isc_refcount_destroy(&rbtdb->references);
959 if (rbtdb->task != NULL)
960 isc_task_detach(&rbtdb->task);
962 RBTDB_DESTROYLOCK(&rbtdb->lock);
963 rbtdb->common.magic = 0;
964 rbtdb->common.impmagic = 0;
965 ondest = rbtdb->common.ondest;
966 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
967 isc_ondestroy_notify(&ondest, rbtdb);
970 static inline void
971 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
972 isc_boolean_t want_free = ISC_FALSE;
973 unsigned int i;
974 unsigned int inactive = 0;
976 /* XXX check for open versions here */
978 if (rbtdb->soanode != NULL)
979 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
980 if (rbtdb->nsnode != NULL)
981 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
984 * Even though there are no external direct references, there still
985 * may be nodes in use.
987 for (i = 0; i < rbtdb->node_lock_count; i++) {
988 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
989 rbtdb->node_locks[i].exiting = ISC_TRUE;
990 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
991 if (isc_refcount_current(&rbtdb->node_locks[i].references)
992 == 0) {
993 inactive++;
997 if (inactive != 0) {
998 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
999 rbtdb->active -= inactive;
1000 if (rbtdb->active == 0)
1001 want_free = ISC_TRUE;
1002 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1003 if (want_free) {
1004 char buf[DNS_NAME_FORMATSIZE];
1005 if (dns_name_dynamic(&rbtdb->common.origin))
1006 dns_name_format(&rbtdb->common.origin, buf,
1007 sizeof(buf));
1008 else
1009 strcpy(buf, "<UNKNOWN>");
1010 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1011 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1012 "calling free_rbtdb(%s)", buf);
1013 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1018 static void
1019 detach(dns_db_t **dbp) {
1020 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1021 unsigned int refs;
1023 REQUIRE(VALID_RBTDB(rbtdb));
1025 isc_refcount_decrement(&rbtdb->references, &refs);
1027 if (refs == 0)
1028 maybe_free_rbtdb(rbtdb);
1030 *dbp = NULL;
1033 static void
1034 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1035 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1036 rbtdb_version_t *version;
1037 unsigned int refs;
1039 REQUIRE(VALID_RBTDB(rbtdb));
1041 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1042 version = rbtdb->current_version;
1043 isc_refcount_increment(&version->references, &refs);
1044 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1046 *versionp = (dns_dbversion_t *)version;
1049 static inline rbtdb_version_t *
1050 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1051 unsigned int references, isc_boolean_t writer)
1053 isc_result_t result;
1054 rbtdb_version_t *version;
1056 version = isc_mem_get(mctx, sizeof(*version));
1057 if (version == NULL)
1058 return (NULL);
1059 version->serial = serial;
1060 result = isc_refcount_init(&version->references, references);
1061 if (result != ISC_R_SUCCESS) {
1062 isc_mem_put(mctx, version, sizeof(*version));
1063 return (NULL);
1065 version->writer = writer;
1066 version->commit_ok = ISC_FALSE;
1067 ISC_LIST_INIT(version->changed_list);
1068 ISC_LIST_INIT(version->resigned_list);
1069 ISC_LINK_INIT(version, link);
1071 return (version);
1074 static isc_result_t
1075 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1076 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1077 rbtdb_version_t *version;
1079 REQUIRE(VALID_RBTDB(rbtdb));
1080 REQUIRE(versionp != NULL && *versionp == NULL);
1081 REQUIRE(rbtdb->future_version == NULL);
1083 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1084 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1085 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1086 ISC_TRUE);
1087 if (version != NULL) {
1088 version->commit_ok = ISC_TRUE;
1089 version->secure = rbtdb->current_version->secure;
1090 version->havensec3 = rbtdb->current_version->havensec3;
1091 if (version->havensec3) {
1092 version->flags = rbtdb->current_version->flags;
1093 version->iterations =
1094 rbtdb->current_version->iterations;
1095 version->hash = rbtdb->current_version->hash;
1096 version->salt_length =
1097 rbtdb->current_version->salt_length;
1098 memcpy(version->salt, rbtdb->current_version->salt,
1099 version->salt_length);
1100 } else {
1101 version->flags = 0;
1102 version->iterations = 0;
1103 version->hash = 0;
1104 version->salt_length = 0;
1105 memset(version->salt, 0, sizeof(version->salt));
1107 rbtdb->next_serial++;
1108 rbtdb->future_version = version;
1110 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1112 if (version == NULL)
1113 return (ISC_R_NOMEMORY);
1115 *versionp = version;
1117 return (ISC_R_SUCCESS);
1120 static void
1121 attachversion(dns_db_t *db, dns_dbversion_t *source,
1122 dns_dbversion_t **targetp)
1124 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1125 rbtdb_version_t *rbtversion = source;
1126 unsigned int refs;
1128 REQUIRE(VALID_RBTDB(rbtdb));
1130 isc_refcount_increment(&rbtversion->references, &refs);
1131 INSIST(refs > 1);
1133 *targetp = rbtversion;
1136 static rbtdb_changed_t *
1137 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1138 dns_rbtnode_t *node)
1140 rbtdb_changed_t *changed;
1141 unsigned int refs;
1144 * Caller must be holding the node lock if its reference must be
1145 * protected by the lock.
1148 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1150 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1152 REQUIRE(version->writer);
1154 if (changed != NULL) {
1155 dns_rbtnode_refincrement(node, &refs);
1156 INSIST(refs != 0);
1157 changed->node = node;
1158 changed->dirty = ISC_FALSE;
1159 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1160 } else
1161 version->commit_ok = ISC_FALSE;
1163 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1165 return (changed);
1168 static void
1169 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1170 acachectl_t *array)
1172 unsigned int count;
1173 unsigned int i;
1174 unsigned char *raw; /* RDATASLAB */
1177 * The caller must be holding the corresponding node lock.
1180 if (array == NULL)
1181 return;
1183 raw = (unsigned char *)header + sizeof(*header);
1184 count = raw[0] * 256 + raw[1];
1187 * Sanity check: since an additional cache entry has a reference to
1188 * the original DB node (in the callback arg), there should be no
1189 * acache entries when the node can be freed.
1191 for (i = 0; i < count; i++)
1192 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1194 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1197 static inline void
1198 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1200 if (dns_name_dynamic(&(*noqname)->name))
1201 dns_name_free(&(*noqname)->name, mctx);
1202 if ((*noqname)->neg != NULL)
1203 isc_mem_put(mctx, (*noqname)->neg,
1204 dns_rdataslab_size((*noqname)->neg, 0));
1205 if ((*noqname)->negsig != NULL)
1206 isc_mem_put(mctx, (*noqname)->negsig,
1207 dns_rdataslab_size((*noqname)->negsig, 0));
1208 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1209 *noqname = NULL;
1212 static inline void
1213 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1215 ISC_LINK_INIT(h, link);
1216 h->heap_index = 0;
1218 #if TRACE_HEADER
1219 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1220 fprintf(stderr, "initialized header: %p\n", h);
1221 #else
1222 UNUSED(rbtdb);
1223 #endif
1226 static inline rdatasetheader_t *
1227 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1229 rdatasetheader_t *h;
1231 h = isc_mem_get(mctx, sizeof(*h));
1232 if (h == NULL)
1233 return (NULL);
1235 #if TRACE_HEADER
1236 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1237 fprintf(stderr, "allocated header: %p\n", h);
1238 #endif
1239 init_rdataset(rbtdb, h);
1240 return (h);
1243 static inline void
1244 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1246 unsigned int size;
1247 int idx;
1249 if (EXISTS(rdataset) &&
1250 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1251 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1254 idx = rdataset->node->locknum;
1255 if (ISC_LINK_LINKED(rdataset, link)) {
1256 INSIST(IS_CACHE(rbtdb));
1257 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1259 if (rdataset->heap_index != 0)
1260 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1261 rdataset->heap_index = 0;
1263 if (rdataset->noqname != NULL)
1264 free_noqname(mctx, &rdataset->noqname);
1265 if (rdataset->closest != NULL)
1266 free_noqname(mctx, &rdataset->closest);
1268 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1269 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1271 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1272 size = sizeof(*rdataset);
1273 else
1274 size = dns_rdataslab_size((unsigned char *)rdataset,
1275 sizeof(*rdataset));
1276 isc_mem_put(mctx, rdataset, size);
1279 static inline void
1280 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1281 rdatasetheader_t *header, *dcurrent;
1282 isc_boolean_t make_dirty = ISC_FALSE;
1285 * Caller must hold the node lock.
1289 * We set the IGNORE attribute on rdatasets with serial number
1290 * 'serial'. When the reference count goes to zero, these rdatasets
1291 * will be cleaned up; until that time, they will be ignored.
1293 for (header = node->data; header != NULL; header = header->next) {
1294 if (header->serial == serial) {
1295 header->attributes |= RDATASET_ATTR_IGNORE;
1296 make_dirty = ISC_TRUE;
1298 for (dcurrent = header->down;
1299 dcurrent != NULL;
1300 dcurrent = dcurrent->down) {
1301 if (dcurrent->serial == serial) {
1302 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1303 make_dirty = ISC_TRUE;
1307 if (make_dirty)
1308 node->dirty = 1;
1311 static inline void
1312 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1314 rdatasetheader_t *d, *down_next;
1316 for (d = top->down; d != NULL; d = down_next) {
1317 down_next = d->down;
1318 free_rdataset(rbtdb, mctx, d);
1320 top->down = NULL;
1323 static inline void
1324 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1325 rdatasetheader_t *current, *top_prev, *top_next;
1326 isc_mem_t *mctx = rbtdb->common.mctx;
1329 * Caller must be holding the node lock.
1332 top_prev = NULL;
1333 for (current = node->data; current != NULL; current = top_next) {
1334 top_next = current->next;
1335 clean_stale_headers(rbtdb, mctx, current);
1337 * If current is nonexistent or stale, we can clean it up.
1339 if ((current->attributes &
1340 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1341 if (top_prev != NULL)
1342 top_prev->next = current->next;
1343 else
1344 node->data = current->next;
1345 free_rdataset(rbtdb, mctx, current);
1346 } else
1347 top_prev = current;
1349 node->dirty = 0;
1352 static inline void
1353 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1354 rbtdb_serial_t least_serial)
1356 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1357 rdatasetheader_t *top_prev, *top_next;
1358 isc_mem_t *mctx = rbtdb->common.mctx;
1359 isc_boolean_t still_dirty = ISC_FALSE;
1362 * Caller must be holding the node lock.
1364 REQUIRE(least_serial != 0);
1366 top_prev = NULL;
1367 for (current = node->data; current != NULL; current = top_next) {
1368 top_next = current->next;
1371 * First, we clean up any instances of multiple rdatasets
1372 * with the same serial number, or that have the IGNORE
1373 * attribute.
1375 dparent = current;
1376 for (dcurrent = current->down;
1377 dcurrent != NULL;
1378 dcurrent = down_next) {
1379 down_next = dcurrent->down;
1380 INSIST(dcurrent->serial <= dparent->serial);
1381 if (dcurrent->serial == dparent->serial ||
1382 IGNORE(dcurrent)) {
1383 if (down_next != NULL)
1384 down_next->next = dparent;
1385 dparent->down = down_next;
1386 free_rdataset(rbtdb, mctx, dcurrent);
1387 } else
1388 dparent = dcurrent;
1392 * We've now eliminated all IGNORE datasets with the possible
1393 * exception of current, which we now check.
1395 if (IGNORE(current)) {
1396 down_next = current->down;
1397 if (down_next == NULL) {
1398 if (top_prev != NULL)
1399 top_prev->next = current->next;
1400 else
1401 node->data = current->next;
1402 free_rdataset(rbtdb, mctx, current);
1404 * current no longer exists, so we can
1405 * just continue with the loop.
1407 continue;
1408 } else {
1410 * Pull up current->down, making it the new
1411 * current.
1413 if (top_prev != NULL)
1414 top_prev->next = down_next;
1415 else
1416 node->data = down_next;
1417 down_next->next = top_next;
1418 free_rdataset(rbtdb, mctx, current);
1419 current = down_next;
1424 * We now try to find the first down node less than the
1425 * least serial.
1427 dparent = current;
1428 for (dcurrent = current->down;
1429 dcurrent != NULL;
1430 dcurrent = down_next) {
1431 down_next = dcurrent->down;
1432 if (dcurrent->serial < least_serial)
1433 break;
1434 dparent = dcurrent;
1438 * If there is a such an rdataset, delete it and any older
1439 * versions.
1441 if (dcurrent != NULL) {
1442 do {
1443 down_next = dcurrent->down;
1444 INSIST(dcurrent->serial <= least_serial);
1445 free_rdataset(rbtdb, mctx, dcurrent);
1446 dcurrent = down_next;
1447 } while (dcurrent != NULL);
1448 dparent->down = NULL;
1452 * Note. The serial number of 'current' might be less than
1453 * least_serial too, but we cannot delete it because it is
1454 * the most recent version, unless it is a NONEXISTENT
1455 * rdataset.
1457 if (current->down != NULL) {
1458 still_dirty = ISC_TRUE;
1459 top_prev = current;
1460 } else {
1462 * If this is a NONEXISTENT rdataset, we can delete it.
1464 if (NONEXISTENT(current)) {
1465 if (top_prev != NULL)
1466 top_prev->next = current->next;
1467 else
1468 node->data = current->next;
1469 free_rdataset(rbtdb, mctx, current);
1470 } else
1471 top_prev = current;
1474 if (!still_dirty)
1475 node->dirty = 0;
1478 static void
1479 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
1481 dns_rbtnode_t *nsecnode;
1482 dns_fixedname_t fname;
1483 dns_name_t *name;
1484 isc_result_t result = ISC_R_UNEXPECTED;
1486 INSIST(!ISC_LINK_LINKED(node, deadlink));
1488 switch (node->nsec) {
1489 case DNS_RBT_NSEC_NORMAL:
1490 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1491 break;
1492 case DNS_RBT_NSEC_HAS_NSEC:
1493 dns_fixedname_init(&fname);
1494 name = dns_fixedname_name(&fname);
1495 dns_rbt_fullnamefromnode(node, name);
1497 * Delete the corresponding node from the auxiliary NSEC
1498 * tree before deleting from the main tree.
1500 nsecnode = NULL;
1501 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1502 NULL, DNS_RBTFIND_EMPTYDATA,
1503 NULL, NULL);
1504 if (result != ISC_R_SUCCESS) {
1505 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1506 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1507 "delete_node: "
1508 "dns_rbt_findnode(nsec): %s",
1509 isc_result_totext(result));
1510 } else {
1511 result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1512 ISC_FALSE);
1513 if (result != ISC_R_SUCCESS) {
1514 isc_log_write(dns_lctx,
1515 DNS_LOGCATEGORY_DATABASE,
1516 DNS_LOGMODULE_CACHE,
1517 ISC_LOG_WARNING,
1518 "delete_nsecnode(): "
1519 "dns_rbt_deletenode(nsecnode): %s",
1520 isc_result_totext(result));
1523 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1524 break;
1525 case DNS_RBT_NSEC_NSEC:
1526 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
1527 break;
1528 case DNS_RBT_NSEC_NSEC3:
1529 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
1530 break;
1532 if (result != ISC_R_SUCCESS) {
1533 isc_log_write(dns_lctx,
1534 DNS_LOGCATEGORY_DATABASE,
1535 DNS_LOGMODULE_CACHE,
1536 ISC_LOG_WARNING,
1537 "delete_nsecnode(): "
1538 "dns_rbt_deletenode: %s",
1539 isc_result_totext(result));
1544 * Clean up dead nodes. These are nodes which have no references, and
1545 * have no data. They are dead but we could not or chose not to delete
1546 * them when we deleted all the data at that node because we did not want
1547 * to wait for the tree write lock.
1549 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1551 static void
1552 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1553 dns_rbtnode_t *node;
1554 int count = 10; /* XXXJT: should be adjustable */
1556 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1557 while (node != NULL && count > 0) {
1558 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1561 * Since we're holding a tree write lock, it should be
1562 * impossible for this node to be referenced by others.
1564 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1565 node->data == NULL);
1567 delete_node(rbtdb, node);
1569 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1570 count--;
1575 * Caller must be holding the node lock if its reference must be protected
1576 * by the lock.
1578 static inline void
1579 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1580 unsigned int lockrefs, noderefs;
1581 isc_refcount_t *lockref;
1583 dns_rbtnode_refincrement0(node, &noderefs);
1584 if (noderefs == 1) { /* this is the first reference to the node */
1585 lockref = &rbtdb->node_locks[node->locknum].references;
1586 isc_refcount_increment0(lockref, &lockrefs);
1587 INSIST(lockrefs != 0);
1589 INSIST(noderefs != 0);
1593 * This function is assumed to be called when a node is newly referenced
1594 * and can be in the deadnode list. In that case the node must be retrieved
1595 * from the list because it is going to be used. In addition, if the caller
1596 * happens to hold a write lock on the tree, it's a good chance to purge dead
1597 * nodes.
1598 * Note: while a new reference is gained in multiple places, there are only very
1599 * few cases where the node can be in the deadnode list (only empty nodes can
1600 * have been added to the list).
1602 static inline void
1603 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1604 isc_rwlocktype_t treelocktype)
1606 isc_boolean_t need_relock = ISC_FALSE;
1608 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1609 new_reference(rbtdb, node);
1611 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1612 isc_rwlocktype_read);
1613 if (ISC_LINK_LINKED(node, deadlink))
1614 need_relock = ISC_TRUE;
1615 else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1616 treelocktype == isc_rwlocktype_write)
1617 need_relock = ISC_TRUE;
1618 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1619 isc_rwlocktype_read);
1620 if (need_relock) {
1621 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1622 isc_rwlocktype_write);
1623 if (ISC_LINK_LINKED(node, deadlink))
1624 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1625 node, deadlink);
1626 if (treelocktype == isc_rwlocktype_write)
1627 cleanup_dead_nodes(rbtdb, node->locknum);
1628 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1629 isc_rwlocktype_write);
1632 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1636 * Caller must be holding the node lock; either the "strong", read or write
1637 * lock. Note that the lock must be held even when node references are
1638 * atomically modified; in that case the decrement operation itself does not
1639 * have to be protected, but we must avoid a race condition where multiple
1640 * threads are decreasing the reference to zero simultaneously and at least
1641 * one of them is going to free the node.
1642 * This function returns ISC_TRUE if and only if the node reference decreases
1643 * to zero.
1645 static isc_boolean_t
1646 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1647 rbtdb_serial_t least_serial,
1648 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1649 isc_boolean_t pruning)
1651 isc_result_t result;
1652 isc_boolean_t write_locked;
1653 rbtdb_nodelock_t *nodelock;
1654 unsigned int refs, nrefs;
1655 int bucket = node->locknum;
1656 isc_boolean_t no_reference;
1658 nodelock = &rbtdb->node_locks[bucket];
1660 /* Handle easy and typical case first. */
1661 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1662 dns_rbtnode_refdecrement(node, &nrefs);
1663 INSIST((int)nrefs >= 0);
1664 if (nrefs == 0) {
1665 isc_refcount_decrement(&nodelock->references, &refs);
1666 INSIST((int)refs >= 0);
1668 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1671 /* Upgrade the lock? */
1672 if (nlock == isc_rwlocktype_read) {
1673 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1674 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1676 dns_rbtnode_refdecrement(node, &nrefs);
1677 INSIST((int)nrefs >= 0);
1678 if (nrefs > 0) {
1679 /* Restore the lock? */
1680 if (nlock == isc_rwlocktype_read)
1681 NODE_WEAKDOWNGRADE(&nodelock->lock);
1682 return (ISC_FALSE);
1685 if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1686 if (IS_CACHE(rbtdb))
1687 clean_cache_node(rbtdb, node);
1688 else {
1689 if (least_serial == 0) {
1691 * Caller doesn't know the least serial.
1692 * Get it.
1694 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1695 least_serial = rbtdb->least_serial;
1696 RBTDB_UNLOCK(&rbtdb->lock,
1697 isc_rwlocktype_read);
1699 clean_zone_node(rbtdb, node, least_serial);
1703 isc_refcount_decrement(&nodelock->references, &refs);
1704 INSIST((int)refs >= 0);
1707 * XXXDCL should this only be done for cache zones?
1709 if (node->data != NULL || node->down != NULL) {
1710 /* Restore the lock? */
1711 if (nlock == isc_rwlocktype_read)
1712 NODE_WEAKDOWNGRADE(&nodelock->lock);
1713 return (ISC_TRUE);
1717 * Attempt to switch to a write lock on the tree. If this fails,
1718 * we will add this node to a linked list of nodes in this locking
1719 * bucket which we will free later.
1721 if (tlock != isc_rwlocktype_write) {
1723 * Locking hierarchy notwithstanding, we don't need to free
1724 * the node lock before acquiring the tree write lock because
1725 * we only do a trylock.
1727 if (tlock == isc_rwlocktype_read)
1728 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1729 else
1730 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1731 isc_rwlocktype_write);
1732 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1733 result == ISC_R_LOCKBUSY);
1735 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1736 } else
1737 write_locked = ISC_TRUE;
1739 no_reference = ISC_TRUE;
1740 if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1742 * We can now delete the node if the reference counter is
1743 * zero. This should be typically the case, but a different
1744 * thread may still gain a (new) reference just before the
1745 * current thread locks the tree (e.g., in findnode()).
1749 * If this node is the only one in the level it's in, deleting
1750 * this node may recursively make its parent the only node in
1751 * the parent level; if so, and if no one is currently using
1752 * the parent node, this is almost the only opportunity to
1753 * clean it up. But the recursive cleanup is not that trivial
1754 * since the child and parent may be in different lock buckets,
1755 * which would cause a lock order reversal problem. To avoid
1756 * the trouble, we'll dispatch a separate event for batch
1757 * cleaning. We need to check whether we're deleting the node
1758 * as a result of pruning to avoid infinite dispatching.
1759 * Note: pruning happens only when a task has been set for the
1760 * rbtdb. If the user of the rbtdb chooses not to set a task,
1761 * it's their responsibility to purge stale leaves (e.g. by
1762 * periodic walk-through).
1764 if (!pruning && node->parent != NULL &&
1765 node->parent->down == node && node->left == NULL &&
1766 node->right == NULL && rbtdb->task != NULL) {
1767 isc_event_t *ev;
1768 dns_db_t *db;
1770 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1771 DNS_EVENT_RBTPRUNE,
1772 prune_tree, node,
1773 sizeof(isc_event_t));
1774 if (ev != NULL) {
1775 new_reference(rbtdb, node);
1776 db = NULL;
1777 attach((dns_db_t *)rbtdb, &db);
1778 ev->ev_sender = db;
1779 isc_task_send(rbtdb->task, &ev);
1780 no_reference = ISC_FALSE;
1781 } else {
1783 * XXX: this is a weird situation. We could
1784 * ignore this error case, but then the stale
1785 * node will unlikely be purged except via a
1786 * rare condition such as manual cleanup. So
1787 * we queue it in the deadnodes list, hoping
1788 * the memory shortage is temporary and the node
1789 * will be deleted later.
1791 isc_log_write(dns_lctx,
1792 DNS_LOGCATEGORY_DATABASE,
1793 DNS_LOGMODULE_CACHE,
1794 ISC_LOG_INFO,
1795 "decrement_reference: failed to "
1796 "allocate pruning event");
1797 INSIST(!ISC_LINK_LINKED(node, deadlink));
1798 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1799 deadlink);
1801 } else {
1802 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1803 char printname[DNS_NAME_FORMATSIZE];
1805 isc_log_write(dns_lctx,
1806 DNS_LOGCATEGORY_DATABASE,
1807 DNS_LOGMODULE_CACHE,
1808 ISC_LOG_DEBUG(1),
1809 "decrement_reference: "
1810 "delete from rbt: %p %s",
1811 node,
1812 dns_rbt_formatnodename(node,
1813 printname,
1814 sizeof(printname)));
1817 delete_node(rbtdb, node);
1819 } else if (dns_rbtnode_refcurrent(node) == 0) {
1820 INSIST(!ISC_LINK_LINKED(node, deadlink));
1821 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1822 } else
1823 no_reference = ISC_FALSE;
1825 /* Restore the lock? */
1826 if (nlock == isc_rwlocktype_read)
1827 NODE_WEAKDOWNGRADE(&nodelock->lock);
1830 * Relock a read lock, or unlock the write lock if no lock was held.
1832 if (tlock == isc_rwlocktype_none)
1833 if (write_locked)
1834 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1836 if (tlock == isc_rwlocktype_read)
1837 if (write_locked)
1838 isc_rwlock_downgrade(&rbtdb->tree_lock);
1840 return (no_reference);
1844 * Prune the tree by recursively cleaning-up single leaves. In the worst
1845 * case, the number of iteration is the number of tree levels, which is at
1846 * most the maximum number of domain name labels, i.e, 127. In practice, this
1847 * should be much smaller (only a few times), and even the worst case would be
1848 * acceptable for a single event.
1850 static void
1851 prune_tree(isc_task_t *task, isc_event_t *event) {
1852 dns_rbtdb_t *rbtdb = event->ev_sender;
1853 dns_rbtnode_t *node = event->ev_arg;
1854 dns_rbtnode_t *parent;
1855 unsigned int locknum;
1857 UNUSED(task);
1859 isc_event_free(&event);
1861 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1862 locknum = node->locknum;
1863 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1864 do {
1865 parent = node->parent;
1866 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1867 isc_rwlocktype_write, ISC_TRUE);
1869 if (parent != NULL && parent->down == NULL) {
1871 * node was the only down child of the parent and has
1872 * just been removed. We'll then need to examine the
1873 * parent. Keep the lock if possible; otherwise,
1874 * release the old lock and acquire one for the parent.
1876 if (parent->locknum != locknum) {
1877 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1878 isc_rwlocktype_write);
1879 locknum = parent->locknum;
1880 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1881 isc_rwlocktype_write);
1885 * We need to gain a reference to the node before
1886 * decrementing it in the next iteration. In addition,
1887 * if the node is in the dead-nodes list, extract it
1888 * from the list beforehand as we do in
1889 * reactivate_node().
1891 new_reference(rbtdb, parent);
1892 if (ISC_LINK_LINKED(parent, deadlink)) {
1893 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1894 parent, deadlink);
1896 } else
1897 parent = NULL;
1899 node = parent;
1900 } while (node != NULL);
1901 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1902 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1904 detach((dns_db_t **)(void *)&rbtdb);
1907 static inline void
1908 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1909 rbtdb_changedlist_t *cleanup_list)
1912 * Caller must be holding the database lock.
1915 rbtdb->least_serial = version->serial;
1916 *cleanup_list = version->changed_list;
1917 ISC_LIST_INIT(version->changed_list);
1920 static inline void
1921 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1922 rbtdb_changed_t *changed, *next_changed;
1925 * If the changed record is dirty, then
1926 * an update created multiple versions of
1927 * a given rdataset. We keep this list
1928 * until we're the least open version, at
1929 * which point it's safe to get rid of any
1930 * older versions.
1932 * If the changed record isn't dirty, then
1933 * we don't need it anymore since we're
1934 * committing and not rolling back.
1936 * The caller must be holding the database lock.
1938 for (changed = HEAD(version->changed_list);
1939 changed != NULL;
1940 changed = next_changed) {
1941 next_changed = NEXT(changed, link);
1942 if (!changed->dirty) {
1943 UNLINK(version->changed_list,
1944 changed, link);
1945 APPEND(*cleanup_list,
1946 changed, link);
1951 static void
1952 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1953 #ifndef BIND9
1954 UNUSED(db);
1955 UNUSED(version);
1956 UNUSED(origin);
1958 return;
1959 #else
1960 dns_rdataset_t keyset;
1961 dns_rdataset_t nsecset, signsecset;
1962 isc_boolean_t haszonekey = ISC_FALSE;
1963 isc_boolean_t hasnsec = ISC_FALSE;
1964 isc_result_t result;
1966 dns_rdataset_init(&keyset);
1967 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1968 0, 0, &keyset, NULL);
1969 if (result == ISC_R_SUCCESS) {
1970 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1971 result = dns_rdataset_first(&keyset);
1972 while (result == ISC_R_SUCCESS) {
1973 dns_rdataset_current(&keyset, &keyrdata);
1974 if (dns_zonekey_iszonekey(&keyrdata)) {
1975 haszonekey = ISC_TRUE;
1976 break;
1978 result = dns_rdataset_next(&keyset);
1980 dns_rdataset_disassociate(&keyset);
1982 if (!haszonekey) {
1983 version->secure = dns_db_insecure;
1984 version->havensec3 = ISC_FALSE;
1985 return;
1988 dns_rdataset_init(&nsecset);
1989 dns_rdataset_init(&signsecset);
1990 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1991 0, 0, &nsecset, &signsecset);
1992 if (result == ISC_R_SUCCESS) {
1993 if (dns_rdataset_isassociated(&signsecset)) {
1994 hasnsec = ISC_TRUE;
1995 dns_rdataset_disassociate(&signsecset);
1997 dns_rdataset_disassociate(&nsecset);
2000 setnsec3parameters(db, version);
2003 * Do we have a valid NSEC/NSEC3 chain?
2005 if (version->havensec3 || hasnsec)
2006 version->secure = dns_db_secure;
2007 else
2008 version->secure = dns_db_insecure;
2009 #endif
2012 /*%<
2013 * Walk the origin node looking for NSEC3PARAM records.
2014 * Cache the nsec3 parameters.
2016 #ifdef BIND9
2017 static void
2018 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2019 dns_rbtnode_t *node;
2020 dns_rdata_nsec3param_t nsec3param;
2021 dns_rdata_t rdata = DNS_RDATA_INIT;
2022 isc_region_t region;
2023 isc_result_t result;
2024 rdatasetheader_t *header, *header_next;
2025 unsigned char *raw; /* RDATASLAB */
2026 unsigned int count, length;
2027 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2029 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2030 version->havensec3 = ISC_FALSE;
2031 node = rbtdb->origin_node;
2032 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2033 isc_rwlocktype_read);
2034 for (header = node->data;
2035 header != NULL;
2036 header = header_next) {
2037 header_next = header->next;
2038 do {
2039 if (header->serial <= version->serial &&
2040 !IGNORE(header)) {
2041 if (NONEXISTENT(header))
2042 header = NULL;
2043 break;
2044 } else
2045 header = header->down;
2046 } while (header != NULL);
2048 if (header != NULL &&
2049 (header->type == dns_rdatatype_nsec3param)) {
2051 * Find A NSEC3PARAM with a supported algorithm.
2053 raw = (unsigned char *)header + sizeof(*header);
2054 count = raw[0] * 256 + raw[1]; /* count */
2055 #if DNS_RDATASET_FIXED
2056 raw += count * 4 + 2;
2057 #else
2058 raw += 2;
2059 #endif
2060 while (count-- > 0U) {
2061 length = raw[0] * 256 + raw[1];
2062 #if DNS_RDATASET_FIXED
2063 raw += 4;
2064 #else
2065 raw += 2;
2066 #endif
2067 region.base = raw;
2068 region.length = length;
2069 raw += length;
2070 dns_rdata_fromregion(&rdata,
2071 rbtdb->common.rdclass,
2072 dns_rdatatype_nsec3param,
2073 &region);
2074 result = dns_rdata_tostruct(&rdata,
2075 &nsec3param,
2076 NULL);
2077 INSIST(result == ISC_R_SUCCESS);
2078 dns_rdata_reset(&rdata);
2080 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2081 !dns_nsec3_supportedhash(nsec3param.hash))
2082 continue;
2084 if (nsec3param.flags != 0)
2085 continue;
2087 memcpy(version->salt, nsec3param.salt,
2088 nsec3param.salt_length);
2089 version->hash = nsec3param.hash;
2090 version->salt_length = nsec3param.salt_length;
2091 version->iterations = nsec3param.iterations;
2092 version->flags = nsec3param.flags;
2093 version->havensec3 = ISC_TRUE;
2095 * Look for a better algorithm than the
2096 * unknown test algorithm.
2098 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2099 goto unlock;
2103 unlock:
2104 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2105 isc_rwlocktype_read);
2106 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2108 #endif
2110 static void
2111 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2112 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2113 rbtdb_version_t *version, *cleanup_version, *least_greater;
2114 isc_boolean_t rollback = ISC_FALSE;
2115 rbtdb_changedlist_t cleanup_list;
2116 rdatasetheaderlist_t resigned_list;
2117 rbtdb_changed_t *changed, *next_changed;
2118 rbtdb_serial_t serial, least_serial;
2119 dns_rbtnode_t *rbtnode;
2120 unsigned int refs;
2121 rdatasetheader_t *header;
2122 isc_boolean_t writer;
2124 REQUIRE(VALID_RBTDB(rbtdb));
2125 version = (rbtdb_version_t *)*versionp;
2127 cleanup_version = NULL;
2128 ISC_LIST_INIT(cleanup_list);
2129 ISC_LIST_INIT(resigned_list);
2131 isc_refcount_decrement(&version->references, &refs);
2132 if (refs > 0) { /* typical and easy case first */
2133 if (commit) {
2134 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2135 INSIST(!version->writer);
2136 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2138 goto end;
2141 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2142 serial = version->serial;
2143 writer = version->writer;
2144 if (version->writer) {
2145 if (commit) {
2146 unsigned cur_ref;
2147 rbtdb_version_t *cur_version;
2149 INSIST(version->commit_ok);
2150 INSIST(version == rbtdb->future_version);
2152 * The current version is going to be replaced.
2153 * Release the (likely last) reference to it from the
2154 * DB itself and unlink it from the open list.
2156 cur_version = rbtdb->current_version;
2157 isc_refcount_decrement(&cur_version->references,
2158 &cur_ref);
2159 if (cur_ref == 0) {
2160 if (cur_version->serial == rbtdb->least_serial)
2161 INSIST(EMPTY(cur_version->changed_list));
2162 UNLINK(rbtdb->open_versions,
2163 cur_version, link);
2165 if (EMPTY(rbtdb->open_versions)) {
2167 * We're going to become the least open
2168 * version.
2170 make_least_version(rbtdb, version,
2171 &cleanup_list);
2172 } else {
2174 * Some other open version is the
2175 * least version. We can't cleanup
2176 * records that were changed in this
2177 * version because the older versions
2178 * may still be in use by an open
2179 * version.
2181 * We can, however, discard the
2182 * changed records for things that
2183 * we've added that didn't exist in
2184 * prior versions.
2186 cleanup_nondirty(version, &cleanup_list);
2189 * If the (soon to be former) current version
2190 * isn't being used by anyone, we can clean
2191 * it up.
2193 if (cur_ref == 0) {
2194 cleanup_version = cur_version;
2195 APPENDLIST(version->changed_list,
2196 cleanup_version->changed_list,
2197 link);
2200 * Become the current version.
2202 version->writer = ISC_FALSE;
2203 rbtdb->current_version = version;
2204 rbtdb->current_serial = version->serial;
2205 rbtdb->future_version = NULL;
2208 * Keep the current version in the open list, and
2209 * gain a reference for the DB itself (see the DB
2210 * creation function below). This must be the only
2211 * case where we need to increment the counter from
2212 * zero and need to use isc_refcount_increment0().
2214 isc_refcount_increment0(&version->references,
2215 &cur_ref);
2216 INSIST(cur_ref == 1);
2217 PREPEND(rbtdb->open_versions,
2218 rbtdb->current_version, link);
2219 resigned_list = version->resigned_list;
2220 ISC_LIST_INIT(version->resigned_list);
2221 } else {
2223 * We're rolling back this transaction.
2225 cleanup_list = version->changed_list;
2226 ISC_LIST_INIT(version->changed_list);
2227 resigned_list = version->resigned_list;
2228 ISC_LIST_INIT(version->resigned_list);
2229 rollback = ISC_TRUE;
2230 cleanup_version = version;
2231 rbtdb->future_version = NULL;
2233 } else {
2234 if (version != rbtdb->current_version) {
2236 * There are no external or internal references
2237 * to this version and it can be cleaned up.
2239 cleanup_version = version;
2242 * Find the version with the least serial
2243 * number greater than ours.
2245 least_greater = PREV(version, link);
2246 if (least_greater == NULL)
2247 least_greater = rbtdb->current_version;
2249 INSIST(version->serial < least_greater->serial);
2251 * Is this the least open version?
2253 if (version->serial == rbtdb->least_serial) {
2255 * Yes. Install the new least open
2256 * version.
2258 make_least_version(rbtdb,
2259 least_greater,
2260 &cleanup_list);
2261 } else {
2263 * Add any unexecuted cleanups to
2264 * those of the least greater version.
2266 APPENDLIST(least_greater->changed_list,
2267 version->changed_list,
2268 link);
2270 } else if (version->serial == rbtdb->least_serial)
2271 INSIST(EMPTY(version->changed_list));
2272 UNLINK(rbtdb->open_versions, version, link);
2274 least_serial = rbtdb->least_serial;
2275 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2278 * Update the zone's secure status.
2280 if (writer && commit && !IS_CACHE(rbtdb))
2281 iszonesecure(db, version, rbtdb->origin_node);
2283 if (cleanup_version != NULL) {
2284 INSIST(EMPTY(cleanup_version->changed_list));
2285 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2286 sizeof(*cleanup_version));
2290 * Commit/rollback re-signed headers.
2292 for (header = HEAD(resigned_list);
2293 header != NULL;
2294 header = HEAD(resigned_list)) {
2295 nodelock_t *lock;
2297 ISC_LIST_UNLINK(resigned_list, header, link);
2299 lock = &rbtdb->node_locks[header->node->locknum].lock;
2300 NODE_LOCK(lock, isc_rwlocktype_write);
2301 if (rollback)
2302 resign_insert(rbtdb, header->node->locknum, header);
2303 decrement_reference(rbtdb, header->node, least_serial,
2304 isc_rwlocktype_write, isc_rwlocktype_none,
2305 ISC_FALSE);
2306 NODE_UNLOCK(lock, isc_rwlocktype_write);
2309 if (!EMPTY(cleanup_list)) {
2311 * We acquire a tree write lock here in order to make sure
2312 * that stale nodes will be removed in decrement_reference().
2313 * If we didn't have the lock, those nodes could miss the
2314 * chance to be removed until the server stops. The write lock
2315 * is expensive, but this event should be rare enough to justify
2316 * the cost.
2318 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2319 for (changed = HEAD(cleanup_list);
2320 changed != NULL;
2321 changed = next_changed) {
2322 nodelock_t *lock;
2324 next_changed = NEXT(changed, link);
2325 rbtnode = changed->node;
2326 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2328 NODE_LOCK(lock, isc_rwlocktype_write);
2330 * This is a good opportunity to purge any dead nodes,
2331 * so use it.
2333 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2335 if (rollback)
2336 rollback_node(rbtnode, serial);
2337 decrement_reference(rbtdb, rbtnode, least_serial,
2338 isc_rwlocktype_write,
2339 isc_rwlocktype_write, ISC_FALSE);
2341 NODE_UNLOCK(lock, isc_rwlocktype_write);
2343 isc_mem_put(rbtdb->common.mctx, changed,
2344 sizeof(*changed));
2346 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2349 end:
2350 *versionp = NULL;
2354 * Add the necessary magic for the wildcard name 'name'
2355 * to be found in 'rbtdb'.
2357 * In order for wildcard matching to work correctly in
2358 * zone_find(), we must ensure that a node for the wildcarding
2359 * level exists in the database, and has its 'find_callback'
2360 * and 'wild' bits set.
2362 * E.g. if the wildcard name is "*.sub.example." then we
2363 * must ensure that "sub.example." exists and is marked as
2364 * a wildcard level.
2366 static isc_result_t
2367 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2368 isc_result_t result;
2369 dns_name_t foundname;
2370 dns_offsets_t offsets;
2371 unsigned int n;
2372 dns_rbtnode_t *node = NULL;
2374 dns_name_init(&foundname, offsets);
2375 n = dns_name_countlabels(name);
2376 INSIST(n >= 2);
2377 n--;
2378 dns_name_getlabelsequence(name, 1, n, &foundname);
2379 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2380 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2381 return (result);
2382 node->nsec = DNS_RBT_NSEC_NORMAL;
2383 node->find_callback = 1;
2384 node->wild = 1;
2385 return (ISC_R_SUCCESS);
2388 static isc_result_t
2389 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2390 isc_result_t result;
2391 dns_name_t foundname;
2392 dns_offsets_t offsets;
2393 unsigned int n, l, i;
2395 dns_name_init(&foundname, offsets);
2396 n = dns_name_countlabels(name);
2397 l = dns_name_countlabels(&rbtdb->common.origin);
2398 i = l + 1;
2399 while (i < n) {
2400 dns_rbtnode_t *node = NULL; /* dummy */
2401 dns_name_getlabelsequence(name, n - i, i, &foundname);
2402 if (dns_name_iswildcard(&foundname)) {
2403 result = add_wildcard_magic(rbtdb, &foundname);
2404 if (result != ISC_R_SUCCESS)
2405 return (result);
2406 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2407 &node);
2408 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2409 return (result);
2410 node->nsec = DNS_RBT_NSEC_NORMAL;
2412 i++;
2414 return (ISC_R_SUCCESS);
2417 static isc_result_t
2418 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2419 dns_dbnode_t **nodep)
2421 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2422 dns_rbtnode_t *node = NULL;
2423 dns_name_t nodename;
2424 isc_result_t result;
2425 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2427 REQUIRE(VALID_RBTDB(rbtdb));
2429 dns_name_init(&nodename, NULL);
2430 RWLOCK(&rbtdb->tree_lock, locktype);
2431 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2432 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2433 if (result != ISC_R_SUCCESS) {
2434 RWUNLOCK(&rbtdb->tree_lock, locktype);
2435 if (!create) {
2436 if (result == DNS_R_PARTIALMATCH)
2437 result = ISC_R_NOTFOUND;
2438 return (result);
2441 * It would be nice to try to upgrade the lock instead of
2442 * unlocking then relocking.
2444 locktype = isc_rwlocktype_write;
2445 RWLOCK(&rbtdb->tree_lock, locktype);
2446 node = NULL;
2447 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2448 if (result == ISC_R_SUCCESS) {
2449 dns_rbt_namefromnode(node, &nodename);
2450 #ifdef DNS_RBT_USEHASH
2451 node->locknum = node->hashval % rbtdb->node_lock_count;
2452 #else
2453 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2454 rbtdb->node_lock_count;
2455 #endif
2456 add_empty_wildcards(rbtdb, name);
2458 if (dns_name_iswildcard(name)) {
2459 result = add_wildcard_magic(rbtdb, name);
2460 if (result != ISC_R_SUCCESS) {
2461 RWUNLOCK(&rbtdb->tree_lock, locktype);
2462 return (result);
2465 } else if (result != ISC_R_EXISTS) {
2466 RWUNLOCK(&rbtdb->tree_lock, locktype);
2467 return (result);
2470 reactivate_node(rbtdb, node, locktype);
2471 RWUNLOCK(&rbtdb->tree_lock, locktype);
2473 *nodep = (dns_dbnode_t *)node;
2475 return (ISC_R_SUCCESS);
2478 static isc_result_t
2479 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2480 dns_dbnode_t **nodep)
2482 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2483 dns_rbtnode_t *node = NULL;
2484 dns_name_t nodename;
2485 isc_result_t result;
2486 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2488 REQUIRE(VALID_RBTDB(rbtdb));
2490 dns_name_init(&nodename, NULL);
2491 RWLOCK(&rbtdb->tree_lock, locktype);
2492 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2493 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2494 if (result != ISC_R_SUCCESS) {
2495 RWUNLOCK(&rbtdb->tree_lock, locktype);
2496 if (!create) {
2497 if (result == DNS_R_PARTIALMATCH)
2498 result = ISC_R_NOTFOUND;
2499 return (result);
2502 * It would be nice to try to upgrade the lock instead of
2503 * unlocking then relocking.
2505 locktype = isc_rwlocktype_write;
2506 RWLOCK(&rbtdb->tree_lock, locktype);
2507 node = NULL;
2508 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2509 if (result == ISC_R_SUCCESS) {
2510 dns_rbt_namefromnode(node, &nodename);
2511 #ifdef DNS_RBT_USEHASH
2512 node->locknum = node->hashval % rbtdb->node_lock_count;
2513 #else
2514 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2515 rbtdb->node_lock_count;
2516 #endif
2517 node->nsec = DNS_RBT_NSEC_NSEC3;
2518 } else if (result != ISC_R_EXISTS) {
2519 RWUNLOCK(&rbtdb->tree_lock, locktype);
2520 return (result);
2522 } else {
2523 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2525 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2526 new_reference(rbtdb, node);
2527 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2528 RWUNLOCK(&rbtdb->tree_lock, locktype);
2530 *nodep = (dns_dbnode_t *)node;
2532 return (ISC_R_SUCCESS);
2535 static isc_result_t
2536 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2537 rbtdb_search_t *search = arg;
2538 rdatasetheader_t *header, *header_next;
2539 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2540 rdatasetheader_t *found;
2541 isc_result_t result;
2542 dns_rbtnode_t *onode;
2545 * We only want to remember the topmost zone cut, since it's the one
2546 * that counts, so we'll just continue if we've already found a
2547 * zonecut.
2549 if (search->zonecut != NULL)
2550 return (DNS_R_CONTINUE);
2552 found = NULL;
2553 result = DNS_R_CONTINUE;
2554 onode = search->rbtdb->origin_node;
2556 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2557 isc_rwlocktype_read);
2560 * Look for an NS or DNAME rdataset active in our version.
2562 ns_header = NULL;
2563 dname_header = NULL;
2564 sigdname_header = NULL;
2565 for (header = node->data; header != NULL; header = header_next) {
2566 header_next = header->next;
2567 if (header->type == dns_rdatatype_ns ||
2568 header->type == dns_rdatatype_dname ||
2569 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2570 do {
2571 if (header->serial <= search->serial &&
2572 !IGNORE(header)) {
2574 * Is this a "this rdataset doesn't
2575 * exist" record?
2577 if (NONEXISTENT(header))
2578 header = NULL;
2579 break;
2580 } else
2581 header = header->down;
2582 } while (header != NULL);
2583 if (header != NULL) {
2584 if (header->type == dns_rdatatype_dname)
2585 dname_header = header;
2586 else if (header->type ==
2587 RBTDB_RDATATYPE_SIGDNAME)
2588 sigdname_header = header;
2589 else if (node != onode ||
2590 IS_STUB(search->rbtdb)) {
2592 * We've found an NS rdataset that
2593 * isn't at the origin node. We check
2594 * that they're not at the origin node,
2595 * because otherwise we'd erroneously
2596 * treat the zone top as if it were
2597 * a delegation.
2599 ns_header = header;
2606 * Did we find anything?
2608 if (dname_header != NULL) {
2610 * Note that DNAME has precedence over NS if both exist.
2612 found = dname_header;
2613 search->zonecut_sigrdataset = sigdname_header;
2614 } else if (ns_header != NULL) {
2615 found = ns_header;
2616 search->zonecut_sigrdataset = NULL;
2619 if (found != NULL) {
2621 * We increment the reference count on node to ensure that
2622 * search->zonecut_rdataset will still be valid later.
2624 new_reference(search->rbtdb, node);
2625 search->zonecut = node;
2626 search->zonecut_rdataset = found;
2627 search->need_cleanup = ISC_TRUE;
2629 * Since we've found a zonecut, anything beneath it is
2630 * glue and is not subject to wildcard matching, so we
2631 * may clear search->wild.
2633 search->wild = ISC_FALSE;
2634 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2636 * If the caller does not want to find glue, then
2637 * this is the best answer and the search should
2638 * stop now.
2640 result = DNS_R_PARTIALMATCH;
2641 } else {
2642 dns_name_t *zcname;
2645 * The search will continue beneath the zone cut.
2646 * This may or may not be the best match. In case it
2647 * is, we need to remember the node name.
2649 zcname = dns_fixedname_name(&search->zonecut_name);
2650 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2651 ISC_R_SUCCESS);
2652 search->copy_name = ISC_TRUE;
2654 } else {
2656 * There is no zonecut at this node which is active in this
2657 * version.
2659 * If this is a "wild" node and the caller hasn't disabled
2660 * wildcard matching, remember that we've seen a wild node
2661 * in case we need to go searching for wildcard matches
2662 * later on.
2664 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2665 search->wild = ISC_TRUE;
2668 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2669 isc_rwlocktype_read);
2671 return (result);
2674 static inline void
2675 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2676 rdatasetheader_t *header, isc_stdtime_t now,
2677 dns_rdataset_t *rdataset)
2679 unsigned char *raw; /* RDATASLAB */
2682 * Caller must be holding the node reader lock.
2683 * XXXJT: technically, we need a writer lock, since we'll increment
2684 * the header count below. However, since the actual counter value
2685 * doesn't matter, we prioritize performance here. (We may want to
2686 * use atomic increment when available).
2689 if (rdataset == NULL)
2690 return;
2692 new_reference(rbtdb, node);
2694 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2696 rdataset->methods = &rdataset_methods;
2697 rdataset->rdclass = rbtdb->common.rdclass;
2698 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2699 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2700 rdataset->ttl = header->rdh_ttl - now;
2701 rdataset->trust = header->trust;
2702 if (NXDOMAIN(header))
2703 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2704 if (OPTOUT(header))
2705 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2706 rdataset->private1 = rbtdb;
2707 rdataset->private2 = node;
2708 raw = (unsigned char *)header + sizeof(*header);
2709 rdataset->private3 = raw;
2710 rdataset->count = header->count++;
2711 if (rdataset->count == ISC_UINT32_MAX)
2712 rdataset->count = 0;
2715 * Reset iterator state.
2717 rdataset->privateuint4 = 0;
2718 rdataset->private5 = NULL;
2721 * Add noqname proof.
2723 rdataset->private6 = header->noqname;
2724 if (rdataset->private6 != NULL)
2725 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2726 rdataset->private7 = header->closest;
2727 if (rdataset->private7 != NULL)
2728 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
2731 * Copy out re-signing information.
2733 if (RESIGN(header)) {
2734 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
2735 rdataset->resign = header->resign;
2736 } else
2737 rdataset->resign = 0;
2740 static inline isc_result_t
2741 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2742 dns_name_t *foundname, dns_rdataset_t *rdataset,
2743 dns_rdataset_t *sigrdataset)
2745 isc_result_t result;
2746 dns_name_t *zcname;
2747 rbtdb_rdatatype_t type;
2748 dns_rbtnode_t *node;
2751 * The caller MUST NOT be holding any node locks.
2754 node = search->zonecut;
2755 type = search->zonecut_rdataset->type;
2758 * If we have to set foundname, we do it before anything else.
2759 * If we were to set foundname after we had set nodep or bound the
2760 * rdataset, then we'd have to undo that work if dns_name_copy()
2761 * failed. By setting foundname first, there's nothing to undo if
2762 * we have trouble.
2764 if (foundname != NULL && search->copy_name) {
2765 zcname = dns_fixedname_name(&search->zonecut_name);
2766 result = dns_name_copy(zcname, foundname, NULL);
2767 if (result != ISC_R_SUCCESS)
2768 return (result);
2770 if (nodep != NULL) {
2772 * Note that we don't have to increment the node's reference
2773 * count here because we're going to use the reference we
2774 * already have in the search block.
2776 *nodep = node;
2777 search->need_cleanup = ISC_FALSE;
2779 if (rdataset != NULL) {
2780 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2781 isc_rwlocktype_read);
2782 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2783 search->now, rdataset);
2784 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2785 bind_rdataset(search->rbtdb, node,
2786 search->zonecut_sigrdataset,
2787 search->now, sigrdataset);
2788 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2789 isc_rwlocktype_read);
2792 if (type == dns_rdatatype_dname)
2793 return (DNS_R_DNAME);
2794 return (DNS_R_DELEGATION);
2797 static inline isc_boolean_t
2798 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2799 dns_rbtnode_t *node)
2801 unsigned char *raw; /* RDATASLAB */
2802 unsigned int count, size;
2803 dns_name_t ns_name;
2804 isc_boolean_t valid = ISC_FALSE;
2805 dns_offsets_t offsets;
2806 isc_region_t region;
2807 rdatasetheader_t *header;
2810 * No additional locking is required.
2814 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2815 * if it occurs at a zone cut, but is not valid below it.
2817 if (type == dns_rdatatype_ns) {
2818 if (node != search->zonecut) {
2819 return (ISC_FALSE);
2821 } else if (type != dns_rdatatype_a &&
2822 type != dns_rdatatype_aaaa &&
2823 type != dns_rdatatype_a6) {
2824 return (ISC_FALSE);
2827 header = search->zonecut_rdataset;
2828 raw = (unsigned char *)header + sizeof(*header);
2829 count = raw[0] * 256 + raw[1];
2830 #if DNS_RDATASET_FIXED
2831 raw += 2 + (4 * count);
2832 #else
2833 raw += 2;
2834 #endif
2836 while (count > 0) {
2837 count--;
2838 size = raw[0] * 256 + raw[1];
2839 #if DNS_RDATASET_FIXED
2840 raw += 4;
2841 #else
2842 raw += 2;
2843 #endif
2844 region.base = raw;
2845 region.length = size;
2846 raw += size;
2848 * XXX Until we have rdata structures, we have no choice but
2849 * to directly access the rdata format.
2851 dns_name_init(&ns_name, offsets);
2852 dns_name_fromregion(&ns_name, &region);
2853 if (dns_name_compare(&ns_name, name) == 0) {
2854 valid = ISC_TRUE;
2855 break;
2859 return (valid);
2862 static inline isc_boolean_t
2863 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2864 dns_name_t *name)
2866 dns_fixedname_t fnext;
2867 dns_fixedname_t forigin;
2868 dns_name_t *next;
2869 dns_name_t *origin;
2870 dns_name_t prefix;
2871 dns_rbtdb_t *rbtdb;
2872 dns_rbtnode_t *node;
2873 isc_result_t result;
2874 isc_boolean_t answer = ISC_FALSE;
2875 rdatasetheader_t *header;
2877 rbtdb = search->rbtdb;
2879 dns_name_init(&prefix, NULL);
2880 dns_fixedname_init(&fnext);
2881 next = dns_fixedname_name(&fnext);
2882 dns_fixedname_init(&forigin);
2883 origin = dns_fixedname_name(&forigin);
2885 result = dns_rbtnodechain_next(chain, NULL, NULL);
2886 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2887 node = NULL;
2888 result = dns_rbtnodechain_current(chain, &prefix,
2889 origin, &node);
2890 if (result != ISC_R_SUCCESS)
2891 break;
2892 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2893 isc_rwlocktype_read);
2894 for (header = node->data;
2895 header != NULL;
2896 header = header->next) {
2897 if (header->serial <= search->serial &&
2898 !IGNORE(header) && EXISTS(header))
2899 break;
2901 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2902 isc_rwlocktype_read);
2903 if (header != NULL)
2904 break;
2905 result = dns_rbtnodechain_next(chain, NULL, NULL);
2907 if (result == ISC_R_SUCCESS)
2908 result = dns_name_concatenate(&prefix, origin, next, NULL);
2909 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2910 answer = ISC_TRUE;
2911 return (answer);
2914 static inline isc_boolean_t
2915 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2916 dns_fixedname_t fnext;
2917 dns_fixedname_t forigin;
2918 dns_fixedname_t fprev;
2919 dns_name_t *next;
2920 dns_name_t *origin;
2921 dns_name_t *prev;
2922 dns_name_t name;
2923 dns_name_t rname;
2924 dns_name_t tname;
2925 dns_rbtdb_t *rbtdb;
2926 dns_rbtnode_t *node;
2927 dns_rbtnodechain_t chain;
2928 isc_boolean_t check_next = ISC_TRUE;
2929 isc_boolean_t check_prev = ISC_TRUE;
2930 isc_boolean_t answer = ISC_FALSE;
2931 isc_result_t result;
2932 rdatasetheader_t *header;
2933 unsigned int n;
2935 rbtdb = search->rbtdb;
2937 dns_name_init(&name, NULL);
2938 dns_name_init(&tname, NULL);
2939 dns_name_init(&rname, NULL);
2940 dns_fixedname_init(&fnext);
2941 next = dns_fixedname_name(&fnext);
2942 dns_fixedname_init(&fprev);
2943 prev = dns_fixedname_name(&fprev);
2944 dns_fixedname_init(&forigin);
2945 origin = dns_fixedname_name(&forigin);
2948 * Find if qname is at or below a empty node.
2949 * Use our own copy of the chain.
2952 chain = search->chain;
2953 do {
2954 node = NULL;
2955 result = dns_rbtnodechain_current(&chain, &name,
2956 origin, &node);
2957 if (result != ISC_R_SUCCESS)
2958 break;
2959 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2960 isc_rwlocktype_read);
2961 for (header = node->data;
2962 header != NULL;
2963 header = header->next) {
2964 if (header->serial <= search->serial &&
2965 !IGNORE(header) && EXISTS(header))
2966 break;
2968 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2969 isc_rwlocktype_read);
2970 if (header != NULL)
2971 break;
2972 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2973 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2974 if (result == ISC_R_SUCCESS)
2975 result = dns_name_concatenate(&name, origin, prev, NULL);
2976 if (result != ISC_R_SUCCESS)
2977 check_prev = ISC_FALSE;
2979 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2980 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2981 node = NULL;
2982 result = dns_rbtnodechain_current(&chain, &name,
2983 origin, &node);
2984 if (result != ISC_R_SUCCESS)
2985 break;
2986 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2987 isc_rwlocktype_read);
2988 for (header = node->data;
2989 header != NULL;
2990 header = header->next) {
2991 if (header->serial <= search->serial &&
2992 !IGNORE(header) && EXISTS(header))
2993 break;
2995 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2996 isc_rwlocktype_read);
2997 if (header != NULL)
2998 break;
2999 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3001 if (result == ISC_R_SUCCESS)
3002 result = dns_name_concatenate(&name, origin, next, NULL);
3003 if (result != ISC_R_SUCCESS)
3004 check_next = ISC_FALSE;
3006 dns_name_clone(qname, &rname);
3009 * Remove the wildcard label to find the terminal name.
3011 n = dns_name_countlabels(wname);
3012 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3014 do {
3015 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3016 (check_next && dns_name_issubdomain(next, &rname))) {
3017 answer = ISC_TRUE;
3018 break;
3021 * Remove the left hand label.
3023 n = dns_name_countlabels(&rname);
3024 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3025 } while (!dns_name_equal(&rname, &tname));
3026 return (answer);
3029 static inline isc_result_t
3030 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3031 dns_name_t *qname)
3033 unsigned int i, j;
3034 dns_rbtnode_t *node, *level_node, *wnode;
3035 rdatasetheader_t *header;
3036 isc_result_t result = ISC_R_NOTFOUND;
3037 dns_name_t name;
3038 dns_name_t *wname;
3039 dns_fixedname_t fwname;
3040 dns_rbtdb_t *rbtdb;
3041 isc_boolean_t done, wild, active;
3042 dns_rbtnodechain_t wchain;
3045 * Caller must be holding the tree lock and MUST NOT be holding
3046 * any node locks.
3050 * Examine each ancestor level. If the level's wild bit
3051 * is set, then construct the corresponding wildcard name and
3052 * search for it. If the wildcard node exists, and is active in
3053 * this version, we're done. If not, then we next check to see
3054 * if the ancestor is active in this version. If so, then there
3055 * can be no possible wildcard match and again we're done. If not,
3056 * continue the search.
3059 rbtdb = search->rbtdb;
3060 i = search->chain.level_matches;
3061 done = ISC_FALSE;
3062 node = *nodep;
3063 do {
3064 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3065 isc_rwlocktype_read);
3068 * First we try to figure out if this node is active in
3069 * the search's version. We do this now, even though we
3070 * may not need the information, because it simplifies the
3071 * locking and code flow.
3073 for (header = node->data;
3074 header != NULL;
3075 header = header->next) {
3076 if (header->serial <= search->serial &&
3077 !IGNORE(header) && EXISTS(header))
3078 break;
3080 if (header != NULL)
3081 active = ISC_TRUE;
3082 else
3083 active = ISC_FALSE;
3085 if (node->wild)
3086 wild = ISC_TRUE;
3087 else
3088 wild = ISC_FALSE;
3090 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3091 isc_rwlocktype_read);
3093 if (wild) {
3095 * Construct the wildcard name for this level.
3097 dns_name_init(&name, NULL);
3098 dns_rbt_namefromnode(node, &name);
3099 dns_fixedname_init(&fwname);
3100 wname = dns_fixedname_name(&fwname);
3101 result = dns_name_concatenate(dns_wildcardname, &name,
3102 wname, NULL);
3103 j = i;
3104 while (result == ISC_R_SUCCESS && j != 0) {
3105 j--;
3106 level_node = search->chain.levels[j];
3107 dns_name_init(&name, NULL);
3108 dns_rbt_namefromnode(level_node, &name);
3109 result = dns_name_concatenate(wname,
3110 &name,
3111 wname,
3112 NULL);
3114 if (result != ISC_R_SUCCESS)
3115 break;
3117 wnode = NULL;
3118 dns_rbtnodechain_init(&wchain, NULL);
3119 result = dns_rbt_findnode(rbtdb->tree, wname,
3120 NULL, &wnode, &wchain,
3121 DNS_RBTFIND_EMPTYDATA,
3122 NULL, NULL);
3123 if (result == ISC_R_SUCCESS) {
3124 nodelock_t *lock;
3127 * We have found the wildcard node. If it
3128 * is active in the search's version, we're
3129 * done.
3131 lock = &rbtdb->node_locks[wnode->locknum].lock;
3132 NODE_LOCK(lock, isc_rwlocktype_read);
3133 for (header = wnode->data;
3134 header != NULL;
3135 header = header->next) {
3136 if (header->serial <= search->serial &&
3137 !IGNORE(header) && EXISTS(header))
3138 break;
3140 NODE_UNLOCK(lock, isc_rwlocktype_read);
3141 if (header != NULL ||
3142 activeempty(search, &wchain, wname)) {
3143 if (activeemtpynode(search, qname,
3144 wname)) {
3145 return (ISC_R_NOTFOUND);
3148 * The wildcard node is active!
3150 * Note: result is still ISC_R_SUCCESS
3151 * so we don't have to set it.
3153 *nodep = wnode;
3154 break;
3156 } else if (result != ISC_R_NOTFOUND &&
3157 result != DNS_R_PARTIALMATCH) {
3159 * An error has occurred. Bail out.
3161 break;
3165 if (active) {
3167 * The level node is active. Any wildcarding
3168 * present at higher levels has no
3169 * effect and we're done.
3171 result = ISC_R_NOTFOUND;
3172 break;
3175 if (i > 0) {
3176 i--;
3177 node = search->chain.levels[i];
3178 } else
3179 done = ISC_TRUE;
3180 } while (!done);
3182 return (result);
3185 static isc_boolean_t
3186 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3188 dns_rdata_t rdata = DNS_RDATA_INIT;
3189 dns_rdata_nsec3_t nsec3;
3190 unsigned char *raw; /* RDATASLAB */
3191 unsigned int rdlen, count;
3192 isc_region_t region;
3193 isc_result_t result;
3195 REQUIRE(header->type == dns_rdatatype_nsec3);
3197 raw = (unsigned char *)header + sizeof(*header);
3198 count = raw[0] * 256 + raw[1]; /* count */
3199 #if DNS_RDATASET_FIXED
3200 raw += count * 4 + 2;
3201 #else
3202 raw += 2;
3203 #endif
3204 while (count-- > 0) {
3205 rdlen = raw[0] * 256 + raw[1];
3206 #if DNS_RDATASET_FIXED
3207 raw += 4;
3208 #else
3209 raw += 2;
3210 #endif
3211 region.base = raw;
3212 region.length = rdlen;
3213 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3214 dns_rdatatype_nsec3, &region);
3215 raw += rdlen;
3216 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3217 INSIST(result == ISC_R_SUCCESS);
3218 if (nsec3.hash == search->rbtversion->hash &&
3219 nsec3.iterations == search->rbtversion->iterations &&
3220 nsec3.salt_length == search->rbtversion->salt_length &&
3221 memcmp(nsec3.salt, search->rbtversion->salt,
3222 nsec3.salt_length) == 0)
3223 return (ISC_TRUE);
3224 dns_rdata_reset(&rdata);
3226 return (ISC_FALSE);
3229 static inline isc_result_t
3230 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3231 dns_name_t *name, dns_name_t *origin,
3232 dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3233 isc_boolean_t *firstp)
3235 dns_fixedname_t ftarget;
3236 dns_name_t *target;
3237 dns_rbtnode_t *nsecnode;
3238 isc_result_t result;
3240 if (type == dns_rdatatype_nsec3)
3241 return (dns_rbtnodechain_prev(&search->chain, NULL, NULL));
3243 dns_fixedname_init(&ftarget);
3244 target = dns_fixedname_name(&ftarget);
3246 for (;;) {
3247 if (*firstp) {
3249 * Construct the name of the second node to check.
3250 * It is the first node sought in the NSEC tree.
3252 *firstp = ISC_FALSE;
3253 dns_rbtnodechain_init(nsecchain, NULL);
3254 result = dns_name_concatenate(name, origin,
3255 target, NULL);
3256 if (result != ISC_R_SUCCESS)
3257 return (result);
3258 nsecnode = NULL;
3259 result = dns_rbt_findnode(search->rbtdb->nsec,
3260 target, NULL,
3261 &nsecnode, nsecchain,
3262 DNS_RBTFIND_NOOPTIONS,
3263 NULL, NULL);
3264 if (result == ISC_R_SUCCESS) {
3266 * Since this was the first loop, finding the
3267 * name in the NSEC tree implies that the first
3268 * node checked in the main tree had an
3269 * unacceptable NSEC record.
3270 * Try the previous node in the NSEC tree.
3272 result = dns_rbtnodechain_prev(nsecchain,
3273 name, origin);
3274 if (result == DNS_R_NEWORIGIN)
3275 result = ISC_R_SUCCESS;
3276 } else if (result == ISC_R_NOTFOUND
3277 || result == DNS_R_PARTIALMATCH) {
3278 result = dns_rbtnodechain_current(nsecchain,
3279 name, origin, NULL);
3280 if (result == ISC_R_NOTFOUND)
3281 result = ISC_R_NOMORE;
3283 } else {
3285 * This is a second or later trip through the auxiliary
3286 * tree for the name of a third or earlier NSEC node in
3287 * the main tree. Previous trips through the NSEC tree
3288 * must have found nodes in the main tree with NSEC
3289 * records. Perhaps they lacked signature records.
3291 result = dns_rbtnodechain_prev(nsecchain, name, origin);
3292 if (result == DNS_R_NEWORIGIN)
3293 result = ISC_R_SUCCESS;
3294 if (result != ISC_R_SUCCESS)
3295 return (result);
3297 if (result != ISC_R_SUCCESS)
3298 return (result);
3301 * Construct the name to seek in the main tree.
3303 result = dns_name_concatenate(name, origin, target, NULL);
3304 if (result != ISC_R_SUCCESS)
3305 return (result);
3307 *nodep = NULL;
3308 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3309 nodep, &search->chain,
3310 DNS_RBTFIND_NOOPTIONS, NULL, NULL);
3311 if (result == ISC_R_SUCCESS)
3312 return (result);
3315 * There should always be a node in the main tree with the
3316 * same name as the node in the auxiliary NSEC tree, except for
3317 * nodes in the auxiliary tree that are awaiting deletion.
3319 if (result == DNS_R_PARTIALMATCH)
3320 result = ISC_R_NOTFOUND;
3322 if (result != ISC_R_NOTFOUND) {
3323 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3324 DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3325 "previous_closest_nsec(): %s",
3326 isc_result_totext(result));
3327 return (DNS_R_BADDB);
3332 static inline isc_result_t
3333 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3334 dns_name_t *foundname, dns_rdataset_t *rdataset,
3335 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3336 dns_db_secure_t secure)
3338 dns_rbtnode_t *node, *prevnode;
3339 rdatasetheader_t *header, *header_next, *found, *foundsig;
3340 dns_rbtnodechain_t nsecchain;
3341 isc_boolean_t empty_node;
3342 isc_result_t result;
3343 dns_fixedname_t fname, forigin;
3344 dns_name_t *name, *origin;
3345 dns_rdatatype_t type;
3346 rbtdb_rdatatype_t sigtype;
3347 isc_boolean_t wraps;
3348 isc_boolean_t first = ISC_TRUE;
3349 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3351 if (tree == search->rbtdb->nsec3) {
3352 type = dns_rdatatype_nsec3;
3353 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3354 wraps = ISC_TRUE;
3355 } else {
3356 type = dns_rdatatype_nsec;
3357 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3358 wraps = ISC_FALSE;
3362 * Use the auxiliary tree only starting with the second node in the
3363 * hope that the original node will be right much of the time.
3365 dns_fixedname_init(&fname);
3366 name = dns_fixedname_name(&fname);
3367 dns_fixedname_init(&forigin);
3368 origin = dns_fixedname_name(&forigin);
3369 again:
3370 node = NULL;
3371 result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3372 if (result != ISC_R_SUCCESS)
3373 return (result);
3374 do {
3375 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3376 isc_rwlocktype_read);
3377 found = NULL;
3378 foundsig = NULL;
3379 empty_node = ISC_TRUE;
3380 for (header = node->data;
3381 header != NULL;
3382 header = header_next) {
3383 header_next = header->next;
3385 * Look for an active, extant NSEC or RRSIG NSEC.
3387 do {
3388 if (header->serial <= search->serial &&
3389 !IGNORE(header)) {
3391 * Is this a "this rdataset doesn't
3392 * exist" record?
3394 if (NONEXISTENT(header))
3395 header = NULL;
3396 break;
3397 } else
3398 header = header->down;
3399 } while (header != NULL);
3400 if (header != NULL) {
3402 * We now know that there is at least one
3403 * active rdataset at this node.
3405 empty_node = ISC_FALSE;
3406 if (header->type == type) {
3407 found = header;
3408 if (foundsig != NULL)
3409 break;
3410 } else if (header->type == sigtype) {
3411 foundsig = header;
3412 if (found != NULL)
3413 break;
3417 if (!empty_node) {
3418 if (found != NULL && search->rbtversion->havensec3 &&
3419 found->type == dns_rdatatype_nsec3 &&
3420 !matchparams(found, search)) {
3421 empty_node = ISC_TRUE;
3422 found = NULL;
3423 foundsig = NULL;
3424 result = dns_rbtnodechain_prev(&search->chain,
3425 NULL, NULL);
3426 } else if (found != NULL &&
3427 (foundsig != NULL || !need_sig)) {
3429 * We've found the right NSEC/NSEC3 record.
3431 * Note: for this to really be the right
3432 * NSEC record, it's essential that the NSEC
3433 * records of any nodes obscured by a zone
3434 * cut have been removed; we assume this is
3435 * the case.
3437 result = dns_name_concatenate(name, origin,
3438 foundname, NULL);
3439 if (result == ISC_R_SUCCESS) {
3440 if (nodep != NULL) {
3441 new_reference(search->rbtdb,
3442 node);
3443 *nodep = node;
3445 bind_rdataset(search->rbtdb, node,
3446 found, search->now,
3447 rdataset);
3448 if (foundsig != NULL)
3449 bind_rdataset(search->rbtdb,
3450 node,
3451 foundsig,
3452 search->now,
3453 sigrdataset);
3455 } else if (found == NULL && foundsig == NULL) {
3457 * This node is active, but has no NSEC or
3458 * RRSIG NSEC. That means it's glue or
3459 * other obscured zone data that isn't
3460 * relevant for our search. Treat the
3461 * node as if it were empty and keep looking.
3463 empty_node = ISC_TRUE;
3464 result = previous_closest_nsec(type, search,
3465 name, origin, &prevnode,
3466 &nsecchain, &first);
3467 } else {
3469 * We found an active node, but either the
3470 * NSEC or the RRSIG NSEC is missing. This
3471 * shouldn't happen.
3473 result = DNS_R_BADDB;
3475 } else {
3477 * This node isn't active. We've got to keep
3478 * looking.
3480 result = previous_closest_nsec(type, search,
3481 name, origin, &prevnode,
3482 &nsecchain, &first);
3484 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3485 isc_rwlocktype_read);
3486 node = prevnode;
3487 } while (empty_node && result == ISC_R_SUCCESS);
3489 if (!first)
3490 dns_rbtnodechain_invalidate(&nsecchain);
3492 if (result == ISC_R_NOMORE && wraps) {
3493 result = dns_rbtnodechain_last(&search->chain, tree,
3494 NULL, NULL);
3495 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3496 wraps = ISC_FALSE;
3497 goto again;
3502 * If the result is ISC_R_NOMORE, then we got to the beginning of
3503 * the database and didn't find a NSEC record. This shouldn't
3504 * happen.
3506 if (result == ISC_R_NOMORE)
3507 result = DNS_R_BADDB;
3509 return (result);
3512 static isc_result_t
3513 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3514 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3515 dns_dbnode_t **nodep, dns_name_t *foundname,
3516 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3518 dns_rbtnode_t *node = NULL;
3519 isc_result_t result;
3520 rbtdb_search_t search;
3521 isc_boolean_t cname_ok = ISC_TRUE;
3522 isc_boolean_t close_version = ISC_FALSE;
3523 isc_boolean_t maybe_zonecut = ISC_FALSE;
3524 isc_boolean_t at_zonecut = ISC_FALSE;
3525 isc_boolean_t wild;
3526 isc_boolean_t empty_node;
3527 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3528 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3529 rbtdb_rdatatype_t sigtype;
3530 isc_boolean_t active;
3531 dns_rbtnodechain_t chain;
3532 nodelock_t *lock;
3533 dns_rbt_t *tree;
3535 search.rbtdb = (dns_rbtdb_t *)db;
3537 REQUIRE(VALID_RBTDB(search.rbtdb));
3540 * We don't care about 'now'.
3542 UNUSED(now);
3545 * If the caller didn't supply a version, attach to the current
3546 * version.
3548 if (version == NULL) {
3549 currentversion(db, &version);
3550 close_version = ISC_TRUE;
3553 search.rbtversion = version;
3554 search.serial = search.rbtversion->serial;
3555 search.options = options;
3556 search.copy_name = ISC_FALSE;
3557 search.need_cleanup = ISC_FALSE;
3558 search.wild = ISC_FALSE;
3559 search.zonecut = NULL;
3560 dns_fixedname_init(&search.zonecut_name);
3561 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3562 search.now = 0;
3565 * 'wild' will be true iff. we've matched a wildcard.
3567 wild = ISC_FALSE;
3569 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3572 * Search down from the root of the tree. If, while going down, we
3573 * encounter a callback node, zone_zonecut_callback() will search the
3574 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3576 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3577 search.rbtdb->tree;
3578 result = dns_rbt_findnode(tree, name, foundname, &node,
3579 &search.chain, DNS_RBTFIND_EMPTYDATA,
3580 zone_zonecut_callback, &search);
3582 if (result == DNS_R_PARTIALMATCH) {
3583 partial_match:
3584 if (search.zonecut != NULL) {
3585 result = setup_delegation(&search, nodep, foundname,
3586 rdataset, sigrdataset);
3587 goto tree_exit;
3590 if (search.wild) {
3592 * At least one of the levels in the search chain
3593 * potentially has a wildcard. For each such level,
3594 * we must see if there's a matching wildcard active
3595 * in the current version.
3597 result = find_wildcard(&search, &node, name);
3598 if (result == ISC_R_SUCCESS) {
3599 result = dns_name_copy(name, foundname, NULL);
3600 if (result != ISC_R_SUCCESS)
3601 goto tree_exit;
3602 wild = ISC_TRUE;
3603 goto found;
3605 else if (result != ISC_R_NOTFOUND)
3606 goto tree_exit;
3609 chain = search.chain;
3610 active = activeempty(&search, &chain, name);
3613 * If we're here, then the name does not exist, is not
3614 * beneath a zonecut, and there's no matching wildcard.
3616 if ((search.rbtversion->secure == dns_db_secure &&
3617 !search.rbtversion->havensec3) ||
3618 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3619 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3621 result = find_closest_nsec(&search, nodep, foundname,
3622 rdataset, sigrdataset, tree,
3623 search.rbtversion->secure);
3624 if (result == ISC_R_SUCCESS)
3625 result = active ? DNS_R_EMPTYNAME :
3626 DNS_R_NXDOMAIN;
3627 } else
3628 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3629 goto tree_exit;
3630 } else if (result != ISC_R_SUCCESS)
3631 goto tree_exit;
3633 found:
3635 * We have found a node whose name is the desired name, or we
3636 * have matched a wildcard.
3639 if (search.zonecut != NULL) {
3641 * If we're beneath a zone cut, we don't want to look for
3642 * CNAMEs because they're not legitimate zone glue.
3644 cname_ok = ISC_FALSE;
3645 } else {
3647 * The node may be a zone cut itself. If it might be one,
3648 * make sure we check for it later.
3650 * DS records live above the zone cut in ordinary zone so
3651 * we want to ignore any referral.
3653 * Stub zones don't have anything "above" the delgation so
3654 * we always return a referral.
3656 if (node->find_callback &&
3657 ((node != search.rbtdb->origin_node &&
3658 !dns_rdatatype_atparent(type)) ||
3659 IS_STUB(search.rbtdb)))
3660 maybe_zonecut = ISC_TRUE;
3664 * Certain DNSSEC types are not subject to CNAME matching
3665 * (RFC4035, section 2.5 and RFC3007).
3667 * We don't check for RRSIG, because we don't store RRSIG records
3668 * directly.
3670 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3671 cname_ok = ISC_FALSE;
3674 * We now go looking for rdata...
3677 lock = &search.rbtdb->node_locks[node->locknum].lock;
3678 NODE_LOCK(lock, isc_rwlocktype_read);
3680 found = NULL;
3681 foundsig = NULL;
3682 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3683 nsecheader = NULL;
3684 nsecsig = NULL;
3685 cnamesig = NULL;
3686 empty_node = ISC_TRUE;
3687 for (header = node->data; header != NULL; header = header_next) {
3688 header_next = header->next;
3690 * Look for an active, extant rdataset.
3692 do {
3693 if (header->serial <= search.serial &&
3694 !IGNORE(header)) {
3696 * Is this a "this rdataset doesn't
3697 * exist" record?
3699 if (NONEXISTENT(header))
3700 header = NULL;
3701 break;
3702 } else
3703 header = header->down;
3704 } while (header != NULL);
3705 if (header != NULL) {
3707 * We now know that there is at least one active
3708 * rdataset at this node.
3710 empty_node = ISC_FALSE;
3713 * Do special zone cut handling, if requested.
3715 if (maybe_zonecut &&
3716 header->type == dns_rdatatype_ns) {
3718 * We increment the reference count on node to
3719 * ensure that search->zonecut_rdataset will
3720 * still be valid later.
3722 new_reference(search.rbtdb, node);
3723 search.zonecut = node;
3724 search.zonecut_rdataset = header;
3725 search.zonecut_sigrdataset = NULL;
3726 search.need_cleanup = ISC_TRUE;
3727 maybe_zonecut = ISC_FALSE;
3728 at_zonecut = ISC_TRUE;
3730 * It is not clear if KEY should still be
3731 * allowed at the parent side of the zone
3732 * cut or not. It is needed for RFC3007
3733 * validated updates.
3735 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3736 && type != dns_rdatatype_nsec
3737 && type != dns_rdatatype_key) {
3739 * Glue is not OK, but any answer we
3740 * could return would be glue. Return
3741 * the delegation.
3743 found = NULL;
3744 break;
3746 if (found != NULL && foundsig != NULL)
3747 break;
3752 * If the NSEC3 record doesn't match the chain
3753 * we are using behave as if it isn't here.
3755 if (header->type == dns_rdatatype_nsec3 &&
3756 !matchparams(header, &search)) {
3757 NODE_UNLOCK(lock, isc_rwlocktype_read);
3758 goto partial_match;
3761 * If we found a type we were looking for,
3762 * remember it.
3764 if (header->type == type ||
3765 type == dns_rdatatype_any ||
3766 (header->type == dns_rdatatype_cname &&
3767 cname_ok)) {
3769 * We've found the answer!
3771 found = header;
3772 if (header->type == dns_rdatatype_cname &&
3773 cname_ok) {
3775 * We may be finding a CNAME instead
3776 * of the desired type.
3778 * If we've already got the CNAME RRSIG,
3779 * use it, otherwise change sigtype
3780 * so that we find it.
3782 if (cnamesig != NULL)
3783 foundsig = cnamesig;
3784 else
3785 sigtype =
3786 RBTDB_RDATATYPE_SIGCNAME;
3789 * If we've got all we need, end the search.
3791 if (!maybe_zonecut && foundsig != NULL)
3792 break;
3793 } else if (header->type == sigtype) {
3795 * We've found the RRSIG rdataset for our
3796 * target type. Remember it.
3798 foundsig = header;
3800 * If we've got all we need, end the search.
3802 if (!maybe_zonecut && found != NULL)
3803 break;
3804 } else if (header->type == dns_rdatatype_nsec &&
3805 !search.rbtversion->havensec3) {
3807 * Remember a NSEC rdataset even if we're
3808 * not specifically looking for it, because
3809 * we might need it later.
3811 nsecheader = header;
3812 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3813 !search.rbtversion->havensec3) {
3815 * If we need the NSEC rdataset, we'll also
3816 * need its signature.
3818 nsecsig = header;
3819 } else if (cname_ok &&
3820 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3822 * If we get a CNAME match, we'll also need
3823 * its signature.
3825 cnamesig = header;
3830 if (empty_node) {
3832 * We have an exact match for the name, but there are no
3833 * active rdatasets in the desired version. That means that
3834 * this node doesn't exist in the desired version, and that
3835 * we really have a partial match.
3837 if (!wild) {
3838 NODE_UNLOCK(lock, isc_rwlocktype_read);
3839 goto partial_match;
3844 * If we didn't find what we were looking for...
3846 if (found == NULL) {
3847 if (search.zonecut != NULL) {
3849 * We were trying to find glue at a node beneath a
3850 * zone cut, but didn't.
3852 * Return the delegation.
3854 NODE_UNLOCK(lock, isc_rwlocktype_read);
3855 result = setup_delegation(&search, nodep, foundname,
3856 rdataset, sigrdataset);
3857 goto tree_exit;
3860 * The desired type doesn't exist.
3862 result = DNS_R_NXRRSET;
3863 if (search.rbtversion->secure == dns_db_secure &&
3864 !search.rbtversion->havensec3 &&
3865 (nsecheader == NULL || nsecsig == NULL)) {
3867 * The zone is secure but there's no NSEC,
3868 * or the NSEC has no signature!
3870 if (!wild) {
3871 result = DNS_R_BADDB;
3872 goto node_exit;
3875 NODE_UNLOCK(lock, isc_rwlocktype_read);
3876 result = find_closest_nsec(&search, nodep, foundname,
3877 rdataset, sigrdataset,
3878 search.rbtdb->tree,
3879 search.rbtversion->secure);
3880 if (result == ISC_R_SUCCESS)
3881 result = DNS_R_EMPTYWILD;
3882 goto tree_exit;
3884 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3885 nsecheader == NULL)
3888 * There's no NSEC record, and we were told
3889 * to find one.
3891 result = DNS_R_BADDB;
3892 goto node_exit;
3894 if (nodep != NULL) {
3895 new_reference(search.rbtdb, node);
3896 *nodep = node;
3898 if ((search.rbtversion->secure == dns_db_secure &&
3899 !search.rbtversion->havensec3) ||
3900 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3902 bind_rdataset(search.rbtdb, node, nsecheader,
3903 0, rdataset);
3904 if (nsecsig != NULL)
3905 bind_rdataset(search.rbtdb, node,
3906 nsecsig, 0, sigrdataset);
3908 if (wild)
3909 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3910 goto node_exit;
3914 * We found what we were looking for, or we found a CNAME.
3917 if (type != found->type &&
3918 type != dns_rdatatype_any &&
3919 found->type == dns_rdatatype_cname) {
3921 * We weren't doing an ANY query and we found a CNAME instead
3922 * of the type we were looking for, so we need to indicate
3923 * that result to the caller.
3925 result = DNS_R_CNAME;
3926 } else if (search.zonecut != NULL) {
3928 * If we're beneath a zone cut, we must indicate that the
3929 * result is glue, unless we're actually at the zone cut
3930 * and the type is NSEC or KEY.
3932 if (search.zonecut == node) {
3934 * It is not clear if KEY should still be
3935 * allowed at the parent side of the zone
3936 * cut or not. It is needed for RFC3007
3937 * validated updates.
3939 if (type == dns_rdatatype_nsec ||
3940 type == dns_rdatatype_nsec3 ||
3941 type == dns_rdatatype_key)
3942 result = ISC_R_SUCCESS;
3943 else if (type == dns_rdatatype_any)
3944 result = DNS_R_ZONECUT;
3945 else
3946 result = DNS_R_GLUE;
3947 } else
3948 result = DNS_R_GLUE;
3950 * We might have found data that isn't glue, but was occluded
3951 * by a dynamic update. If the caller cares about this, they
3952 * will have told us to validate glue.
3954 * XXX We should cache the glue validity state!
3956 if (result == DNS_R_GLUE &&
3957 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3958 !valid_glue(&search, foundname, type, node)) {
3959 NODE_UNLOCK(lock, isc_rwlocktype_read);
3960 result = setup_delegation(&search, nodep, foundname,
3961 rdataset, sigrdataset);
3962 goto tree_exit;
3964 } else {
3966 * An ordinary successful query!
3968 result = ISC_R_SUCCESS;
3971 if (nodep != NULL) {
3972 if (!at_zonecut)
3973 new_reference(search.rbtdb, node);
3974 else
3975 search.need_cleanup = ISC_FALSE;
3976 *nodep = node;
3979 if (type != dns_rdatatype_any) {
3980 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3981 if (foundsig != NULL)
3982 bind_rdataset(search.rbtdb, node, foundsig, 0,
3983 sigrdataset);
3986 if (wild)
3987 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3989 node_exit:
3990 NODE_UNLOCK(lock, isc_rwlocktype_read);
3992 tree_exit:
3993 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3996 * If we found a zonecut but aren't going to use it, we have to
3997 * let go of it.
3999 if (search.need_cleanup) {
4000 node = search.zonecut;
4001 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4003 NODE_LOCK(lock, isc_rwlocktype_read);
4004 decrement_reference(search.rbtdb, node, 0,
4005 isc_rwlocktype_read, isc_rwlocktype_none,
4006 ISC_FALSE);
4007 NODE_UNLOCK(lock, isc_rwlocktype_read);
4010 if (close_version)
4011 closeversion(db, &version, ISC_FALSE);
4013 dns_rbtnodechain_reset(&search.chain);
4015 return (result);
4018 static isc_result_t
4019 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4020 isc_stdtime_t now, dns_dbnode_t **nodep,
4021 dns_name_t *foundname,
4022 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4024 UNUSED(db);
4025 UNUSED(name);
4026 UNUSED(options);
4027 UNUSED(now);
4028 UNUSED(nodep);
4029 UNUSED(foundname);
4030 UNUSED(rdataset);
4031 UNUSED(sigrdataset);
4033 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4035 /* NOTREACHED */
4036 return (ISC_R_NOTIMPLEMENTED);
4039 static isc_result_t
4040 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4041 rbtdb_search_t *search = arg;
4042 rdatasetheader_t *header, *header_prev, *header_next;
4043 rdatasetheader_t *dname_header, *sigdname_header;
4044 isc_result_t result;
4045 nodelock_t *lock;
4046 isc_rwlocktype_t locktype;
4048 /* XXX comment */
4050 REQUIRE(search->zonecut == NULL);
4053 * Keep compiler silent.
4055 UNUSED(name);
4057 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4058 locktype = isc_rwlocktype_read;
4059 NODE_LOCK(lock, locktype);
4062 * Look for a DNAME or RRSIG DNAME rdataset.
4064 dname_header = NULL;
4065 sigdname_header = NULL;
4066 header_prev = NULL;
4067 for (header = node->data; header != NULL; header = header_next) {
4068 header_next = header->next;
4069 if (header->rdh_ttl <= search->now) {
4071 * This rdataset is stale. If no one else is
4072 * using the node, we can clean it up right
4073 * now, otherwise we mark it as stale, and
4074 * the node as dirty, so it will get cleaned
4075 * up later.
4077 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4078 (locktype == isc_rwlocktype_write ||
4079 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4081 * We update the node's status only when we
4082 * can get write access; otherwise, we leave
4083 * others to this work. Periodical cleaning
4084 * will eventually take the job as the last
4085 * resort.
4086 * We won't downgrade the lock, since other
4087 * rdatasets are probably stale, too.
4089 locktype = isc_rwlocktype_write;
4091 if (dns_rbtnode_refcurrent(node) == 0) {
4092 isc_mem_t *mctx;
4095 * header->down can be non-NULL if the
4096 * refcount has just decremented to 0
4097 * but decrement_reference() has not
4098 * performed clean_cache_node(), in
4099 * which case we need to purge the
4100 * stale headers first.
4102 mctx = search->rbtdb->common.mctx;
4103 clean_stale_headers(search->rbtdb,
4104 mctx,
4105 header);
4106 if (header_prev != NULL)
4107 header_prev->next =
4108 header->next;
4109 else
4110 node->data = header->next;
4111 free_rdataset(search->rbtdb, mctx,
4112 header);
4113 } else {
4114 header->attributes |=
4115 RDATASET_ATTR_STALE;
4116 node->dirty = 1;
4117 header_prev = header;
4119 } else
4120 header_prev = header;
4121 } else if (header->type == dns_rdatatype_dname &&
4122 EXISTS(header)) {
4123 dname_header = header;
4124 header_prev = header;
4125 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4126 EXISTS(header)) {
4127 sigdname_header = header;
4128 header_prev = header;
4129 } else
4130 header_prev = header;
4133 if (dname_header != NULL &&
4134 (!DNS_TRUST_PENDING(dname_header->trust) ||
4135 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4137 * We increment the reference count on node to ensure that
4138 * search->zonecut_rdataset will still be valid later.
4140 new_reference(search->rbtdb, node);
4141 INSIST(!ISC_LINK_LINKED(node, deadlink));
4142 search->zonecut = node;
4143 search->zonecut_rdataset = dname_header;
4144 search->zonecut_sigrdataset = sigdname_header;
4145 search->need_cleanup = ISC_TRUE;
4146 result = DNS_R_PARTIALMATCH;
4147 } else
4148 result = DNS_R_CONTINUE;
4150 NODE_UNLOCK(lock, locktype);
4152 return (result);
4155 static inline isc_result_t
4156 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4157 dns_dbnode_t **nodep, dns_name_t *foundname,
4158 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4160 unsigned int i;
4161 dns_rbtnode_t *level_node;
4162 rdatasetheader_t *header, *header_prev, *header_next;
4163 rdatasetheader_t *found, *foundsig;
4164 isc_result_t result = ISC_R_NOTFOUND;
4165 dns_name_t name;
4166 dns_rbtdb_t *rbtdb;
4167 isc_boolean_t done;
4168 nodelock_t *lock;
4169 isc_rwlocktype_t locktype;
4172 * Caller must be holding the tree lock.
4175 rbtdb = search->rbtdb;
4176 i = search->chain.level_matches;
4177 done = ISC_FALSE;
4178 do {
4179 locktype = isc_rwlocktype_read;
4180 lock = &rbtdb->node_locks[node->locknum].lock;
4181 NODE_LOCK(lock, locktype);
4184 * Look for NS and RRSIG NS rdatasets.
4186 found = NULL;
4187 foundsig = NULL;
4188 header_prev = NULL;
4189 for (header = node->data;
4190 header != NULL;
4191 header = header_next) {
4192 header_next = header->next;
4193 if (header->rdh_ttl <= search->now) {
4195 * This rdataset is stale. If no one else is
4196 * using the node, we can clean it up right
4197 * now, otherwise we mark it as stale, and
4198 * the node as dirty, so it will get cleaned
4199 * up later.
4201 if ((header->rdh_ttl <= search->now -
4202 RBTDB_VIRTUAL) &&
4203 (locktype == isc_rwlocktype_write ||
4204 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4206 * We update the node's status only
4207 * when we can get write access.
4209 locktype = isc_rwlocktype_write;
4211 if (dns_rbtnode_refcurrent(node)
4212 == 0) {
4213 isc_mem_t *m;
4215 m = search->rbtdb->common.mctx;
4216 clean_stale_headers(
4217 search->rbtdb,
4218 m, header);
4219 if (header_prev != NULL)
4220 header_prev->next =
4221 header->next;
4222 else
4223 node->data =
4224 header->next;
4225 free_rdataset(rbtdb, m,
4226 header);
4227 } else {
4228 header->attributes |=
4229 RDATASET_ATTR_STALE;
4230 node->dirty = 1;
4231 header_prev = header;
4233 } else
4234 header_prev = header;
4235 } else if (EXISTS(header)) {
4237 * We've found an extant rdataset. See if
4238 * we're interested in it.
4240 if (header->type == dns_rdatatype_ns) {
4241 found = header;
4242 if (foundsig != NULL)
4243 break;
4244 } else if (header->type ==
4245 RBTDB_RDATATYPE_SIGNS) {
4246 foundsig = header;
4247 if (found != NULL)
4248 break;
4250 header_prev = header;
4251 } else
4252 header_prev = header;
4255 if (found != NULL) {
4257 * If we have to set foundname, we do it before
4258 * anything else. If we were to set foundname after
4259 * we had set nodep or bound the rdataset, then we'd
4260 * have to undo that work if dns_name_concatenate()
4261 * failed. By setting foundname first, there's
4262 * nothing to undo if we have trouble.
4264 if (foundname != NULL) {
4265 dns_name_init(&name, NULL);
4266 dns_rbt_namefromnode(node, &name);
4267 result = dns_name_copy(&name, foundname, NULL);
4268 while (result == ISC_R_SUCCESS && i > 0) {
4269 i--;
4270 level_node = search->chain.levels[i];
4271 dns_name_init(&name, NULL);
4272 dns_rbt_namefromnode(level_node,
4273 &name);
4274 result =
4275 dns_name_concatenate(foundname,
4276 &name,
4277 foundname,
4278 NULL);
4280 if (result != ISC_R_SUCCESS) {
4281 *nodep = NULL;
4282 goto node_exit;
4285 result = DNS_R_DELEGATION;
4286 if (nodep != NULL) {
4287 new_reference(search->rbtdb, node);
4288 *nodep = node;
4290 bind_rdataset(search->rbtdb, node, found, search->now,
4291 rdataset);
4292 if (foundsig != NULL)
4293 bind_rdataset(search->rbtdb, node, foundsig,
4294 search->now, sigrdataset);
4295 if (need_headerupdate(found, search->now) ||
4296 (foundsig != NULL &&
4297 need_headerupdate(foundsig, search->now))) {
4298 if (locktype != isc_rwlocktype_write) {
4299 NODE_UNLOCK(lock, locktype);
4300 NODE_LOCK(lock, isc_rwlocktype_write);
4301 locktype = isc_rwlocktype_write;
4303 if (need_headerupdate(found, search->now))
4304 update_header(search->rbtdb, found,
4305 search->now);
4306 if (foundsig != NULL &&
4307 need_headerupdate(foundsig, search->now)) {
4308 update_header(search->rbtdb, foundsig,
4309 search->now);
4314 node_exit:
4315 NODE_UNLOCK(lock, locktype);
4317 if (found == NULL && i > 0) {
4318 i--;
4319 node = search->chain.levels[i];
4320 } else
4321 done = ISC_TRUE;
4323 } while (!done);
4325 return (result);
4328 static isc_result_t
4329 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4330 isc_stdtime_t now, dns_name_t *foundname,
4331 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4333 dns_rbtnode_t *node;
4334 rdatasetheader_t *header, *header_next, *header_prev;
4335 rdatasetheader_t *found, *foundsig;
4336 isc_boolean_t empty_node;
4337 isc_result_t result;
4338 dns_fixedname_t fname, forigin;
4339 dns_name_t *name, *origin;
4340 rbtdb_rdatatype_t matchtype, sigmatchtype;
4341 nodelock_t *lock;
4342 isc_rwlocktype_t locktype;
4344 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4345 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4346 dns_rdatatype_nsec);
4348 do {
4349 node = NULL;
4350 dns_fixedname_init(&fname);
4351 name = dns_fixedname_name(&fname);
4352 dns_fixedname_init(&forigin);
4353 origin = dns_fixedname_name(&forigin);
4354 result = dns_rbtnodechain_current(&search->chain, name,
4355 origin, &node);
4356 if (result != ISC_R_SUCCESS)
4357 return (result);
4358 locktype = isc_rwlocktype_read;
4359 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4360 NODE_LOCK(lock, locktype);
4361 found = NULL;
4362 foundsig = NULL;
4363 empty_node = ISC_TRUE;
4364 header_prev = NULL;
4365 for (header = node->data;
4366 header != NULL;
4367 header = header_next) {
4368 header_next = header->next;
4369 if (header->rdh_ttl <= now) {
4371 * This rdataset is stale. If no one else is
4372 * using the node, we can clean it up right
4373 * now, otherwise we mark it as stale, and the
4374 * node as dirty, so it will get cleaned up
4375 * later.
4377 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4378 (locktype == isc_rwlocktype_write ||
4379 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4381 * We update the node's status only
4382 * when we can get write access.
4384 locktype = isc_rwlocktype_write;
4386 if (dns_rbtnode_refcurrent(node)
4387 == 0) {
4388 isc_mem_t *m;
4390 m = search->rbtdb->common.mctx;
4391 clean_stale_headers(
4392 search->rbtdb,
4393 m, header);
4394 if (header_prev != NULL)
4395 header_prev->next =
4396 header->next;
4397 else
4398 node->data = header->next;
4399 free_rdataset(search->rbtdb, m,
4400 header);
4401 } else {
4402 header->attributes |=
4403 RDATASET_ATTR_STALE;
4404 node->dirty = 1;
4405 header_prev = header;
4407 } else
4408 header_prev = header;
4409 continue;
4411 if (NONEXISTENT(header) ||
4412 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4413 header_prev = header;
4414 continue;
4416 empty_node = ISC_FALSE;
4417 if (header->type == matchtype)
4418 found = header;
4419 else if (header->type == sigmatchtype)
4420 foundsig = header;
4421 header_prev = header;
4423 if (found != NULL) {
4424 result = dns_name_concatenate(name, origin,
4425 foundname, NULL);
4426 if (result != ISC_R_SUCCESS)
4427 goto unlock_node;
4428 bind_rdataset(search->rbtdb, node, found,
4429 now, rdataset);
4430 if (foundsig != NULL)
4431 bind_rdataset(search->rbtdb, node, foundsig,
4432 now, sigrdataset);
4433 new_reference(search->rbtdb, node);
4434 *nodep = node;
4435 result = DNS_R_COVERINGNSEC;
4436 } else if (!empty_node) {
4437 result = ISC_R_NOTFOUND;
4438 } else
4439 result = dns_rbtnodechain_prev(&search->chain, NULL,
4440 NULL);
4441 unlock_node:
4442 NODE_UNLOCK(lock, locktype);
4443 } while (empty_node && result == ISC_R_SUCCESS);
4444 return (result);
4447 static isc_result_t
4448 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4449 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4450 dns_dbnode_t **nodep, dns_name_t *foundname,
4451 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4453 dns_rbtnode_t *node = NULL;
4454 isc_result_t result;
4455 rbtdb_search_t search;
4456 isc_boolean_t cname_ok = ISC_TRUE;
4457 isc_boolean_t empty_node;
4458 nodelock_t *lock;
4459 isc_rwlocktype_t locktype;
4460 rdatasetheader_t *header, *header_prev, *header_next;
4461 rdatasetheader_t *found, *nsheader;
4462 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4463 rdatasetheader_t *update, *updatesig;
4464 rbtdb_rdatatype_t sigtype, negtype;
4466 UNUSED(version);
4468 search.rbtdb = (dns_rbtdb_t *)db;
4470 REQUIRE(VALID_RBTDB(search.rbtdb));
4471 REQUIRE(version == NULL);
4473 if (now == 0)
4474 isc_stdtime_get(&now);
4476 search.rbtversion = NULL;
4477 search.serial = 1;
4478 search.options = options;
4479 search.copy_name = ISC_FALSE;
4480 search.need_cleanup = ISC_FALSE;
4481 search.wild = ISC_FALSE;
4482 search.zonecut = NULL;
4483 dns_fixedname_init(&search.zonecut_name);
4484 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4485 search.now = now;
4486 update = NULL;
4487 updatesig = NULL;
4489 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4492 * Search down from the root of the tree. If, while going down, we
4493 * encounter a callback node, cache_zonecut_callback() will search the
4494 * rdatasets at the zone cut for a DNAME rdataset.
4496 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4497 &search.chain, DNS_RBTFIND_EMPTYDATA,
4498 cache_zonecut_callback, &search);
4500 if (result == DNS_R_PARTIALMATCH) {
4501 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4502 result = find_coveringnsec(&search, nodep, now,
4503 foundname, rdataset,
4504 sigrdataset);
4505 if (result == DNS_R_COVERINGNSEC)
4506 goto tree_exit;
4508 if (search.zonecut != NULL) {
4509 result = setup_delegation(&search, nodep, foundname,
4510 rdataset, sigrdataset);
4511 goto tree_exit;
4512 } else {
4513 find_ns:
4514 result = find_deepest_zonecut(&search, node, nodep,
4515 foundname, rdataset,
4516 sigrdataset);
4517 goto tree_exit;
4519 } else if (result != ISC_R_SUCCESS)
4520 goto tree_exit;
4523 * Certain DNSSEC types are not subject to CNAME matching
4524 * (RFC4035, section 2.5 and RFC3007).
4526 * We don't check for RRSIG, because we don't store RRSIG records
4527 * directly.
4529 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4530 cname_ok = ISC_FALSE;
4533 * We now go looking for rdata...
4536 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4537 locktype = isc_rwlocktype_read;
4538 NODE_LOCK(lock, locktype);
4540 found = NULL;
4541 foundsig = NULL;
4542 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4543 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4544 nsheader = NULL;
4545 nssig = NULL;
4546 cnamesig = NULL;
4547 empty_node = ISC_TRUE;
4548 header_prev = NULL;
4549 for (header = node->data; header != NULL; header = header_next) {
4550 header_next = header->next;
4551 if (header->rdh_ttl <= now) {
4553 * This rdataset is stale. If no one else is using the
4554 * node, we can clean it up right now, otherwise we
4555 * mark it as stale, and the node as dirty, so it will
4556 * get cleaned up later.
4558 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4559 (locktype == isc_rwlocktype_write ||
4560 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4562 * We update the node's status only when we
4563 * can get write access.
4565 locktype = isc_rwlocktype_write;
4567 if (dns_rbtnode_refcurrent(node) == 0) {
4568 isc_mem_t *mctx;
4570 mctx = search.rbtdb->common.mctx;
4571 clean_stale_headers(search.rbtdb, mctx,
4572 header);
4573 if (header_prev != NULL)
4574 header_prev->next =
4575 header->next;
4576 else
4577 node->data = header->next;
4578 free_rdataset(search.rbtdb, mctx,
4579 header);
4580 } else {
4581 header->attributes |=
4582 RDATASET_ATTR_STALE;
4583 node->dirty = 1;
4584 header_prev = header;
4586 } else
4587 header_prev = header;
4588 } else if (EXISTS(header)) {
4590 * We now know that there is at least one active
4591 * non-stale rdataset at this node.
4593 empty_node = ISC_FALSE;
4596 * If we found a type we were looking for, remember
4597 * it.
4599 if (header->type == type ||
4600 (type == dns_rdatatype_any &&
4601 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4602 (cname_ok && header->type ==
4603 dns_rdatatype_cname)) {
4605 * We've found the answer.
4607 found = header;
4608 if (header->type == dns_rdatatype_cname &&
4609 cname_ok &&
4610 cnamesig != NULL) {
4612 * If we've already got the CNAME RRSIG,
4613 * use it, otherwise change sigtype
4614 * so that we find it.
4616 if (cnamesig != NULL)
4617 foundsig = cnamesig;
4618 else
4619 sigtype =
4620 RBTDB_RDATATYPE_SIGCNAME;
4621 foundsig = cnamesig;
4623 } else if (header->type == sigtype) {
4625 * We've found the RRSIG rdataset for our
4626 * target type. Remember it.
4628 foundsig = header;
4629 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4630 header->type == negtype) {
4632 * We've found a negative cache entry.
4634 found = header;
4635 } else if (header->type == dns_rdatatype_ns) {
4637 * Remember a NS rdataset even if we're
4638 * not specifically looking for it, because
4639 * we might need it later.
4641 nsheader = header;
4642 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4644 * If we need the NS rdataset, we'll also
4645 * need its signature.
4647 nssig = header;
4648 } else if (cname_ok &&
4649 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4651 * If we get a CNAME match, we'll also need
4652 * its signature.
4654 cnamesig = header;
4656 header_prev = header;
4657 } else
4658 header_prev = header;
4661 if (empty_node) {
4663 * We have an exact match for the name, but there are no
4664 * extant rdatasets. That means that this node doesn't
4665 * meaningfully exist, and that we really have a partial match.
4667 NODE_UNLOCK(lock, locktype);
4668 goto find_ns;
4672 * If we didn't find what we were looking for...
4674 if (found == NULL ||
4675 (found->trust == dns_trust_additional &&
4676 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4677 (found->trust == dns_trust_glue &&
4678 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4679 (DNS_TRUST_PENDING(found->trust) &&
4680 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4682 * If there is an NS rdataset at this node, then this is the
4683 * deepest zone cut.
4685 if (nsheader != NULL) {
4686 if (nodep != NULL) {
4687 new_reference(search.rbtdb, node);
4688 INSIST(!ISC_LINK_LINKED(node, deadlink));
4689 *nodep = node;
4691 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4692 rdataset);
4693 if (need_headerupdate(nsheader, search.now))
4694 update = nsheader;
4695 if (nssig != NULL) {
4696 bind_rdataset(search.rbtdb, node, nssig,
4697 search.now, sigrdataset);
4698 if (need_headerupdate(nssig, search.now))
4699 updatesig = nssig;
4701 result = DNS_R_DELEGATION;
4702 goto node_exit;
4706 * Go find the deepest zone cut.
4708 NODE_UNLOCK(lock, locktype);
4709 goto find_ns;
4713 * We found what we were looking for, or we found a CNAME.
4716 if (nodep != NULL) {
4717 new_reference(search.rbtdb, node);
4718 INSIST(!ISC_LINK_LINKED(node, deadlink));
4719 *nodep = node;
4722 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4724 * We found a negative cache entry.
4726 if (NXDOMAIN(found))
4727 result = DNS_R_NCACHENXDOMAIN;
4728 else
4729 result = DNS_R_NCACHENXRRSET;
4730 } else if (type != found->type &&
4731 type != dns_rdatatype_any &&
4732 found->type == dns_rdatatype_cname) {
4734 * We weren't doing an ANY query and we found a CNAME instead
4735 * of the type we were looking for, so we need to indicate
4736 * that result to the caller.
4738 result = DNS_R_CNAME;
4739 } else {
4741 * An ordinary successful query!
4743 result = ISC_R_SUCCESS;
4746 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4747 result == DNS_R_NCACHENXRRSET) {
4748 bind_rdataset(search.rbtdb, node, found, search.now,
4749 rdataset);
4750 if (need_headerupdate(found, search.now))
4751 update = found;
4752 if (foundsig != NULL) {
4753 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4754 sigrdataset);
4755 if (need_headerupdate(foundsig, search.now))
4756 updatesig = foundsig;
4760 node_exit:
4761 if ((update != NULL || updatesig != NULL) &&
4762 locktype != isc_rwlocktype_write) {
4763 NODE_UNLOCK(lock, locktype);
4764 NODE_LOCK(lock, isc_rwlocktype_write);
4765 locktype = isc_rwlocktype_write;
4767 if (update != NULL && need_headerupdate(update, search.now))
4768 update_header(search.rbtdb, update, search.now);
4769 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4770 update_header(search.rbtdb, updatesig, search.now);
4772 NODE_UNLOCK(lock, locktype);
4774 tree_exit:
4775 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4778 * If we found a zonecut but aren't going to use it, we have to
4779 * let go of it.
4781 if (search.need_cleanup) {
4782 node = search.zonecut;
4783 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4785 NODE_LOCK(lock, isc_rwlocktype_read);
4786 decrement_reference(search.rbtdb, node, 0,
4787 isc_rwlocktype_read, isc_rwlocktype_none,
4788 ISC_FALSE);
4789 NODE_UNLOCK(lock, isc_rwlocktype_read);
4792 dns_rbtnodechain_reset(&search.chain);
4794 return (result);
4797 static isc_result_t
4798 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4799 isc_stdtime_t now, dns_dbnode_t **nodep,
4800 dns_name_t *foundname,
4801 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4803 dns_rbtnode_t *node = NULL;
4804 nodelock_t *lock;
4805 isc_result_t result;
4806 rbtdb_search_t search;
4807 rdatasetheader_t *header, *header_prev, *header_next;
4808 rdatasetheader_t *found, *foundsig;
4809 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4810 isc_rwlocktype_t locktype;
4812 search.rbtdb = (dns_rbtdb_t *)db;
4814 REQUIRE(VALID_RBTDB(search.rbtdb));
4816 if (now == 0)
4817 isc_stdtime_get(&now);
4819 search.rbtversion = NULL;
4820 search.serial = 1;
4821 search.options = options;
4822 search.copy_name = ISC_FALSE;
4823 search.need_cleanup = ISC_FALSE;
4824 search.wild = ISC_FALSE;
4825 search.zonecut = NULL;
4826 dns_fixedname_init(&search.zonecut_name);
4827 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4828 search.now = now;
4830 if ((options & DNS_DBFIND_NOEXACT) != 0)
4831 rbtoptions |= DNS_RBTFIND_NOEXACT;
4833 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4836 * Search down from the root of the tree.
4838 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4839 &search.chain, rbtoptions, NULL, &search);
4841 if (result == DNS_R_PARTIALMATCH) {
4842 find_ns:
4843 result = find_deepest_zonecut(&search, node, nodep, foundname,
4844 rdataset, sigrdataset);
4845 goto tree_exit;
4846 } else if (result != ISC_R_SUCCESS)
4847 goto tree_exit;
4850 * We now go looking for an NS rdataset at the node.
4853 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4854 locktype = isc_rwlocktype_read;
4855 NODE_LOCK(lock, locktype);
4857 found = NULL;
4858 foundsig = NULL;
4859 header_prev = NULL;
4860 for (header = node->data; header != NULL; header = header_next) {
4861 header_next = header->next;
4862 if (header->rdh_ttl <= now) {
4864 * This rdataset is stale. If no one else is using the
4865 * node, we can clean it up right now, otherwise we
4866 * mark it as stale, and the node as dirty, so it will
4867 * get cleaned up later.
4869 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4870 (locktype == isc_rwlocktype_write ||
4871 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4873 * We update the node's status only when we
4874 * can get write access.
4876 locktype = isc_rwlocktype_write;
4878 if (dns_rbtnode_refcurrent(node) == 0) {
4879 isc_mem_t *mctx;
4881 mctx = search.rbtdb->common.mctx;
4882 clean_stale_headers(search.rbtdb, mctx,
4883 header);
4884 if (header_prev != NULL)
4885 header_prev->next =
4886 header->next;
4887 else
4888 node->data = header->next;
4889 free_rdataset(search.rbtdb, mctx,
4890 header);
4891 } else {
4892 header->attributes |=
4893 RDATASET_ATTR_STALE;
4894 node->dirty = 1;
4895 header_prev = header;
4897 } else
4898 header_prev = header;
4899 } else if (EXISTS(header)) {
4901 * If we found a type we were looking for, remember
4902 * it.
4904 if (header->type == dns_rdatatype_ns) {
4906 * Remember a NS rdataset even if we're
4907 * not specifically looking for it, because
4908 * we might need it later.
4910 found = header;
4911 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4913 * If we need the NS rdataset, we'll also
4914 * need its signature.
4916 foundsig = header;
4918 header_prev = header;
4919 } else
4920 header_prev = header;
4923 if (found == NULL) {
4925 * No NS records here.
4927 NODE_UNLOCK(lock, locktype);
4928 goto find_ns;
4931 if (nodep != NULL) {
4932 new_reference(search.rbtdb, node);
4933 INSIST(!ISC_LINK_LINKED(node, deadlink));
4934 *nodep = node;
4937 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4938 if (foundsig != NULL)
4939 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4940 sigrdataset);
4942 if (need_headerupdate(found, search.now) ||
4943 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
4944 if (locktype != isc_rwlocktype_write) {
4945 NODE_UNLOCK(lock, locktype);
4946 NODE_LOCK(lock, isc_rwlocktype_write);
4947 locktype = isc_rwlocktype_write;
4949 if (need_headerupdate(found, search.now))
4950 update_header(search.rbtdb, found, search.now);
4951 if (foundsig != NULL &&
4952 need_headerupdate(foundsig, search.now)) {
4953 update_header(search.rbtdb, foundsig, search.now);
4957 NODE_UNLOCK(lock, locktype);
4959 tree_exit:
4960 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4962 INSIST(!search.need_cleanup);
4964 dns_rbtnodechain_reset(&search.chain);
4966 if (result == DNS_R_DELEGATION)
4967 result = ISC_R_SUCCESS;
4969 return (result);
4972 static void
4973 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4974 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4975 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4976 unsigned int refs;
4978 REQUIRE(VALID_RBTDB(rbtdb));
4979 REQUIRE(targetp != NULL && *targetp == NULL);
4981 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4982 dns_rbtnode_refincrement(node, &refs);
4983 INSIST(refs != 0);
4984 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4986 *targetp = source;
4989 static void
4990 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4991 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4992 dns_rbtnode_t *node;
4993 isc_boolean_t want_free = ISC_FALSE;
4994 isc_boolean_t inactive = ISC_FALSE;
4995 rbtdb_nodelock_t *nodelock;
4997 REQUIRE(VALID_RBTDB(rbtdb));
4998 REQUIRE(targetp != NULL && *targetp != NULL);
5000 node = (dns_rbtnode_t *)(*targetp);
5001 nodelock = &rbtdb->node_locks[node->locknum];
5003 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5005 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5006 isc_rwlocktype_none, ISC_FALSE)) {
5007 if (isc_refcount_current(&nodelock->references) == 0 &&
5008 nodelock->exiting) {
5009 inactive = ISC_TRUE;
5013 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5015 *targetp = NULL;
5017 if (inactive) {
5018 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5019 rbtdb->active--;
5020 if (rbtdb->active == 0)
5021 want_free = ISC_TRUE;
5022 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5023 if (want_free) {
5024 char buf[DNS_NAME_FORMATSIZE];
5025 if (dns_name_dynamic(&rbtdb->common.origin))
5026 dns_name_format(&rbtdb->common.origin, buf,
5027 sizeof(buf));
5028 else
5029 strcpy(buf, "<UNKNOWN>");
5030 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5031 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5032 "calling free_rbtdb(%s)", buf);
5033 free_rbtdb(rbtdb, ISC_TRUE, NULL);
5038 static isc_result_t
5039 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5040 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5041 dns_rbtnode_t *rbtnode = node;
5042 rdatasetheader_t *header;
5043 isc_boolean_t force_expire = ISC_FALSE;
5045 * These are the category and module used by the cache cleaner.
5047 isc_boolean_t log = ISC_FALSE;
5048 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5049 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5050 int level = ISC_LOG_DEBUG(2);
5051 char printname[DNS_NAME_FORMATSIZE];
5053 REQUIRE(VALID_RBTDB(rbtdb));
5056 * Caller must hold a tree lock.
5059 if (now == 0)
5060 isc_stdtime_get(&now);
5062 if (rbtdb->overmem) {
5063 isc_uint32_t val;
5065 isc_random_get(&val);
5067 * XXXDCL Could stand to have a better policy, like LRU.
5069 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5072 * Note that 'log' can be true IFF rbtdb->overmem is also true.
5073 * rbtdb->overmem can currently only be true for cache
5074 * databases -- hence all of the "overmem cache" log strings.
5076 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5077 if (log)
5078 isc_log_write(dns_lctx, category, module, level,
5079 "overmem cache: %s %s",
5080 force_expire ? "FORCE" : "check",
5081 dns_rbt_formatnodename(rbtnode,
5082 printname,
5083 sizeof(printname)));
5087 * We may not need write access, but this code path is not performance
5088 * sensitive, so it should be okay to always lock as a writer.
5090 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5091 isc_rwlocktype_write);
5093 for (header = rbtnode->data; header != NULL; header = header->next)
5094 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5096 * We don't check if refcurrent(rbtnode) == 0 and try
5097 * to free like we do in cache_find(), because
5098 * refcurrent(rbtnode) must be non-zero. This is so
5099 * because 'node' is an argument to the function.
5101 header->attributes |= RDATASET_ATTR_STALE;
5102 rbtnode->dirty = 1;
5103 if (log)
5104 isc_log_write(dns_lctx, category, module,
5105 level, "overmem cache: stale %s",
5106 printname);
5107 } else if (force_expire) {
5108 if (! RETAIN(header)) {
5109 set_ttl(rbtdb, header, 0);
5110 header->attributes |= RDATASET_ATTR_STALE;
5111 rbtnode->dirty = 1;
5112 } else if (log) {
5113 isc_log_write(dns_lctx, category, module,
5114 level, "overmem cache: "
5115 "reprieve by RETAIN() %s",
5116 printname);
5118 } else if (rbtdb->overmem && log)
5119 isc_log_write(dns_lctx, category, module, level,
5120 "overmem cache: saved %s", printname);
5122 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5123 isc_rwlocktype_write);
5125 return (ISC_R_SUCCESS);
5128 static void
5129 overmem(dns_db_t *db, isc_boolean_t overmem) {
5130 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5132 if (IS_CACHE(rbtdb))
5133 rbtdb->overmem = overmem;
5136 static void
5137 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5138 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5139 dns_rbtnode_t *rbtnode = node;
5140 isc_boolean_t first;
5142 REQUIRE(VALID_RBTDB(rbtdb));
5144 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5145 isc_rwlocktype_read);
5147 fprintf(out, "node %p, %u references, locknum = %u\n",
5148 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5149 rbtnode->locknum);
5150 if (rbtnode->data != NULL) {
5151 rdatasetheader_t *current, *top_next;
5153 for (current = rbtnode->data; current != NULL;
5154 current = top_next) {
5155 top_next = current->next;
5156 first = ISC_TRUE;
5157 fprintf(out, "\ttype %u", current->type);
5158 do {
5159 if (!first)
5160 fprintf(out, "\t");
5161 first = ISC_FALSE;
5162 fprintf(out,
5163 "\tserial = %lu, ttl = %u, "
5164 "trust = %u, attributes = %u, "
5165 "resign = %u\n",
5166 (unsigned long)current->serial,
5167 current->rdh_ttl,
5168 current->trust,
5169 current->attributes,
5170 current->resign);
5171 current = current->down;
5172 } while (current != NULL);
5174 } else
5175 fprintf(out, "(empty)\n");
5177 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5178 isc_rwlocktype_read);
5181 static isc_result_t
5182 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5184 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5185 rbtdb_dbiterator_t *rbtdbiter;
5187 REQUIRE(VALID_RBTDB(rbtdb));
5189 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5190 if (rbtdbiter == NULL)
5191 return (ISC_R_NOMEMORY);
5193 rbtdbiter->common.methods = &dbiterator_methods;
5194 rbtdbiter->common.db = NULL;
5195 dns_db_attach(db, &rbtdbiter->common.db);
5196 rbtdbiter->common.relative_names =
5197 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5198 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5199 rbtdbiter->common.cleaning = ISC_FALSE;
5200 rbtdbiter->paused = ISC_TRUE;
5201 rbtdbiter->tree_locked = isc_rwlocktype_none;
5202 rbtdbiter->result = ISC_R_SUCCESS;
5203 dns_fixedname_init(&rbtdbiter->name);
5204 dns_fixedname_init(&rbtdbiter->origin);
5205 rbtdbiter->node = NULL;
5206 rbtdbiter->delete = 0;
5207 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5208 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5209 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5210 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5211 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5212 if (rbtdbiter->nsec3only)
5213 rbtdbiter->current = &rbtdbiter->nsec3chain;
5214 else
5215 rbtdbiter->current = &rbtdbiter->chain;
5217 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5219 return (ISC_R_SUCCESS);
5222 static isc_result_t
5223 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5224 dns_rdatatype_t type, dns_rdatatype_t covers,
5225 isc_stdtime_t now, dns_rdataset_t *rdataset,
5226 dns_rdataset_t *sigrdataset)
5228 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5229 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5230 rdatasetheader_t *header, *header_next, *found, *foundsig;
5231 rbtdb_serial_t serial;
5232 rbtdb_version_t *rbtversion = version;
5233 isc_boolean_t close_version = ISC_FALSE;
5234 rbtdb_rdatatype_t matchtype, sigmatchtype;
5236 REQUIRE(VALID_RBTDB(rbtdb));
5237 REQUIRE(type != dns_rdatatype_any);
5239 if (rbtversion == NULL) {
5240 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5241 close_version = ISC_TRUE;
5243 serial = rbtversion->serial;
5244 now = 0;
5246 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5247 isc_rwlocktype_read);
5249 found = NULL;
5250 foundsig = NULL;
5251 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5252 if (covers == 0)
5253 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5254 else
5255 sigmatchtype = 0;
5257 for (header = rbtnode->data; header != NULL; header = header_next) {
5258 header_next = header->next;
5259 do {
5260 if (header->serial <= serial &&
5261 !IGNORE(header)) {
5263 * Is this a "this rdataset doesn't
5264 * exist" record?
5266 if (NONEXISTENT(header))
5267 header = NULL;
5268 break;
5269 } else
5270 header = header->down;
5271 } while (header != NULL);
5272 if (header != NULL) {
5274 * We have an active, extant rdataset. If it's a
5275 * type we're looking for, remember it.
5277 if (header->type == matchtype) {
5278 found = header;
5279 if (foundsig != NULL)
5280 break;
5281 } else if (header->type == sigmatchtype) {
5282 foundsig = header;
5283 if (found != NULL)
5284 break;
5288 if (found != NULL) {
5289 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5290 if (foundsig != NULL)
5291 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5292 sigrdataset);
5295 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5296 isc_rwlocktype_read);
5298 if (close_version)
5299 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5300 ISC_FALSE);
5302 if (found == NULL)
5303 return (ISC_R_NOTFOUND);
5305 return (ISC_R_SUCCESS);
5308 static isc_result_t
5309 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5310 dns_rdatatype_t type, dns_rdatatype_t covers,
5311 isc_stdtime_t now, dns_rdataset_t *rdataset,
5312 dns_rdataset_t *sigrdataset)
5314 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5315 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5316 rdatasetheader_t *header, *header_next, *found, *foundsig;
5317 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5318 isc_result_t result;
5319 nodelock_t *lock;
5320 isc_rwlocktype_t locktype;
5322 REQUIRE(VALID_RBTDB(rbtdb));
5323 REQUIRE(type != dns_rdatatype_any);
5325 UNUSED(version);
5327 result = ISC_R_SUCCESS;
5329 if (now == 0)
5330 isc_stdtime_get(&now);
5332 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5333 locktype = isc_rwlocktype_read;
5334 NODE_LOCK(lock, locktype);
5336 found = NULL;
5337 foundsig = NULL;
5338 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5339 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5340 if (covers == 0)
5341 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5342 else
5343 sigmatchtype = 0;
5345 for (header = rbtnode->data; header != NULL; header = header_next) {
5346 header_next = header->next;
5347 if (header->rdh_ttl <= now) {
5348 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5349 (locktype == isc_rwlocktype_write ||
5350 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5352 * We update the node's status only when we
5353 * can get write access.
5355 locktype = isc_rwlocktype_write;
5358 * We don't check if refcurrent(rbtnode) == 0
5359 * and try to free like we do in cache_find(),
5360 * because refcurrent(rbtnode) must be
5361 * non-zero. This is so because 'node' is an
5362 * argument to the function.
5364 header->attributes |= RDATASET_ATTR_STALE;
5365 rbtnode->dirty = 1;
5367 } else if (EXISTS(header)) {
5368 if (header->type == matchtype)
5369 found = header;
5370 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5371 header->type == negtype)
5372 found = header;
5373 else if (header->type == sigmatchtype)
5374 foundsig = header;
5377 if (found != NULL) {
5378 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5379 if (foundsig != NULL)
5380 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5381 sigrdataset);
5384 NODE_UNLOCK(lock, locktype);
5386 if (found == NULL)
5387 return (ISC_R_NOTFOUND);
5389 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5391 * We found a negative cache entry.
5393 if (NXDOMAIN(found))
5394 result = DNS_R_NCACHENXDOMAIN;
5395 else
5396 result = DNS_R_NCACHENXRRSET;
5399 return (result);
5402 static isc_result_t
5403 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5404 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5406 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5407 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5408 rbtdb_version_t *rbtversion = version;
5409 rbtdb_rdatasetiter_t *iterator;
5410 unsigned int refs;
5412 REQUIRE(VALID_RBTDB(rbtdb));
5414 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5415 if (iterator == NULL)
5416 return (ISC_R_NOMEMORY);
5418 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5419 now = 0;
5420 if (rbtversion == NULL)
5421 currentversion(db,
5422 (dns_dbversion_t **) (void *)(&rbtversion));
5423 else {
5424 unsigned int refs;
5426 isc_refcount_increment(&rbtversion->references,
5427 &refs);
5428 INSIST(refs > 1);
5430 } else {
5431 if (now == 0)
5432 isc_stdtime_get(&now);
5433 rbtversion = NULL;
5436 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5437 iterator->common.methods = &rdatasetiter_methods;
5438 iterator->common.db = db;
5439 iterator->common.node = node;
5440 iterator->common.version = (dns_dbversion_t *)rbtversion;
5441 iterator->common.now = now;
5443 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5445 dns_rbtnode_refincrement(rbtnode, &refs);
5446 INSIST(refs != 0);
5448 iterator->current = NULL;
5450 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5452 *iteratorp = (dns_rdatasetiter_t *)iterator;
5454 return (ISC_R_SUCCESS);
5457 static isc_boolean_t
5458 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5459 rdatasetheader_t *header, *header_next;
5460 isc_boolean_t cname, other_data;
5461 dns_rdatatype_t rdtype;
5464 * The caller must hold the node lock.
5468 * Look for CNAME and "other data" rdatasets active in our version.
5470 cname = ISC_FALSE;
5471 other_data = ISC_FALSE;
5472 for (header = node->data; header != NULL; header = header_next) {
5473 header_next = header->next;
5474 if (header->type == dns_rdatatype_cname) {
5476 * Look for an active extant CNAME.
5478 do {
5479 if (header->serial <= serial &&
5480 !IGNORE(header)) {
5482 * Is this a "this rdataset doesn't
5483 * exist" record?
5485 if (NONEXISTENT(header))
5486 header = NULL;
5487 break;
5488 } else
5489 header = header->down;
5490 } while (header != NULL);
5491 if (header != NULL)
5492 cname = ISC_TRUE;
5493 } else {
5495 * Look for active extant "other data".
5497 * "Other data" is any rdataset whose type is not
5498 * KEY, NSEC, SIG or RRSIG.
5500 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5501 if (rdtype != dns_rdatatype_key &&
5502 rdtype != dns_rdatatype_sig &&
5503 rdtype != dns_rdatatype_nsec &&
5504 rdtype != dns_rdatatype_rrsig) {
5506 * Is it active and extant?
5508 do {
5509 if (header->serial <= serial &&
5510 !IGNORE(header)) {
5512 * Is this a "this rdataset
5513 * doesn't exist" record?
5515 if (NONEXISTENT(header))
5516 header = NULL;
5517 break;
5518 } else
5519 header = header->down;
5520 } while (header != NULL);
5521 if (header != NULL)
5522 other_data = ISC_TRUE;
5527 if (cname && other_data)
5528 return (ISC_TRUE);
5530 return (ISC_FALSE);
5533 static isc_result_t
5534 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5535 isc_result_t result;
5537 INSIST(!IS_CACHE(rbtdb));
5538 INSIST(newheader->heap_index == 0);
5539 INSIST(!ISC_LINK_LINKED(newheader, link));
5541 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5542 return (result);
5545 static isc_result_t
5546 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5547 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5548 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5550 rbtdb_changed_t *changed = NULL;
5551 rdatasetheader_t *topheader, *topheader_prev, *header;
5552 unsigned char *merged;
5553 isc_result_t result;
5554 isc_boolean_t header_nx;
5555 isc_boolean_t newheader_nx;
5556 isc_boolean_t merge;
5557 dns_rdatatype_t rdtype, covers;
5558 rbtdb_rdatatype_t negtype;
5559 dns_trust_t trust;
5560 int idx;
5563 * Add an rdatasetheader_t to a node.
5567 * Caller must be holding the node lock.
5570 if ((options & DNS_DBADD_MERGE) != 0) {
5571 REQUIRE(rbtversion != NULL);
5572 merge = ISC_TRUE;
5573 } else
5574 merge = ISC_FALSE;
5576 if ((options & DNS_DBADD_FORCE) != 0)
5577 trust = dns_trust_ultimate;
5578 else
5579 trust = newheader->trust;
5581 if (rbtversion != NULL && !loading) {
5583 * We always add a changed record, even if no changes end up
5584 * being made to this node, because it's harmless and
5585 * simplifies the code.
5587 changed = add_changed(rbtdb, rbtversion, rbtnode);
5588 if (changed == NULL) {
5589 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5590 return (ISC_R_NOMEMORY);
5594 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5595 topheader_prev = NULL;
5597 negtype = 0;
5598 if (rbtversion == NULL && !newheader_nx) {
5599 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5600 if (rdtype == 0) {
5602 * We're adding a negative cache entry.
5604 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5605 if (covers == dns_rdatatype_any) {
5607 * We're adding an negative cache entry
5608 * which covers all types (NXDOMAIN,
5609 * NODATA(QTYPE=ANY)).
5611 * We make all other data stale so that the
5612 * only rdataset that can be found at this
5613 * node is the negative cache entry.
5615 for (topheader = rbtnode->data;
5616 topheader != NULL;
5617 topheader = topheader->next) {
5618 set_ttl(rbtdb, topheader, 0);
5619 topheader->attributes |=
5620 RDATASET_ATTR_STALE;
5622 rbtnode->dirty = 1;
5623 goto find_header;
5625 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5626 } else {
5628 * We're adding something that isn't a
5629 * negative cache entry. Look for an extant
5630 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5631 * cache entry.
5633 for (topheader = rbtnode->data;
5634 topheader != NULL;
5635 topheader = topheader->next) {
5636 if (topheader->type ==
5637 RBTDB_RDATATYPE_NCACHEANY)
5638 break;
5640 if (topheader != NULL && EXISTS(topheader) &&
5641 topheader->rdh_ttl > now) {
5643 * Found one.
5645 if (trust < topheader->trust) {
5647 * The NXDOMAIN/NODATA(QTYPE=ANY)
5648 * is more trusted.
5650 free_rdataset(rbtdb,
5651 rbtdb->common.mctx,
5652 newheader);
5653 if (addedrdataset != NULL)
5654 bind_rdataset(rbtdb, rbtnode,
5655 topheader, now,
5656 addedrdataset);
5657 return (DNS_R_UNCHANGED);
5660 * The new rdataset is better. Expire the
5661 * NXDOMAIN/NODATA(QTYPE=ANY).
5663 set_ttl(rbtdb, topheader, 0);
5664 topheader->attributes |= RDATASET_ATTR_STALE;
5665 rbtnode->dirty = 1;
5666 topheader = NULL;
5667 goto find_header;
5669 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5673 for (topheader = rbtnode->data;
5674 topheader != NULL;
5675 topheader = topheader->next) {
5676 if (topheader->type == newheader->type ||
5677 topheader->type == negtype)
5678 break;
5679 topheader_prev = topheader;
5682 find_header:
5684 * If header isn't NULL, we've found the right type. There may be
5685 * IGNORE rdatasets between the top of the chain and the first real
5686 * data. We skip over them.
5688 header = topheader;
5689 while (header != NULL && IGNORE(header))
5690 header = header->down;
5691 if (header != NULL) {
5692 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5695 * Deleting an already non-existent rdataset has no effect.
5697 if (header_nx && newheader_nx) {
5698 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5699 return (DNS_R_UNCHANGED);
5703 * Trying to add an rdataset with lower trust to a cache DB
5704 * has no effect, provided that the cache data isn't stale.
5706 if (rbtversion == NULL && trust < header->trust &&
5707 (header->rdh_ttl > now || header_nx)) {
5708 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5709 if (addedrdataset != NULL)
5710 bind_rdataset(rbtdb, rbtnode, header, now,
5711 addedrdataset);
5712 return (DNS_R_UNCHANGED);
5716 * Don't merge if a nonexistent rdataset is involved.
5718 if (merge && (header_nx || newheader_nx))
5719 merge = ISC_FALSE;
5722 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5723 * that is the union of 'newheader' and 'header'.
5725 if (merge) {
5726 unsigned int flags = 0;
5727 INSIST(rbtversion->serial >= header->serial);
5728 merged = NULL;
5729 result = ISC_R_SUCCESS;
5731 if ((options & DNS_DBADD_EXACT) != 0)
5732 flags |= DNS_RDATASLAB_EXACT;
5733 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5734 newheader->rdh_ttl != header->rdh_ttl)
5735 result = DNS_R_NOTEXACT;
5736 else if (newheader->rdh_ttl != header->rdh_ttl)
5737 flags |= DNS_RDATASLAB_FORCE;
5738 if (result == ISC_R_SUCCESS)
5739 result = dns_rdataslab_merge(
5740 (unsigned char *)header,
5741 (unsigned char *)newheader,
5742 (unsigned int)(sizeof(*newheader)),
5743 rbtdb->common.mctx,
5744 rbtdb->common.rdclass,
5745 (dns_rdatatype_t)header->type,
5746 flags, &merged);
5747 if (result == ISC_R_SUCCESS) {
5749 * If 'header' has the same serial number as
5750 * we do, we could clean it up now if we knew
5751 * that our caller had no references to it.
5752 * We don't know this, however, so we leave it
5753 * alone. It will get cleaned up when
5754 * clean_zone_node() runs.
5756 free_rdataset(rbtdb, rbtdb->common.mctx,
5757 newheader);
5758 newheader = (rdatasetheader_t *)merged;
5759 if (loading && RESIGN(newheader) &&
5760 RESIGN(header) &&
5761 header->resign < newheader->resign)
5762 newheader->resign = header->resign;
5763 } else {
5764 free_rdataset(rbtdb, rbtdb->common.mctx,
5765 newheader);
5766 return (result);
5770 * Don't replace existing NS, A and AAAA RRsets
5771 * in the cache if they are already exist. This
5772 * prevents named being locked to old servers.
5773 * Don't lower trust of existing record if the
5774 * update is forced.
5776 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5777 header->type == dns_rdatatype_ns &&
5778 !header_nx && !newheader_nx &&
5779 header->trust >= newheader->trust &&
5780 dns_rdataslab_equalx((unsigned char *)header,
5781 (unsigned char *)newheader,
5782 (unsigned int)(sizeof(*newheader)),
5783 rbtdb->common.rdclass,
5784 (dns_rdatatype_t)header->type)) {
5786 * Honour the new ttl if it is less than the
5787 * older one.
5789 if (header->rdh_ttl > newheader->rdh_ttl)
5790 set_ttl(rbtdb, header, newheader->rdh_ttl);
5791 if (header->noqname == NULL &&
5792 newheader->noqname != NULL) {
5793 header->noqname = newheader->noqname;
5794 newheader->noqname = NULL;
5796 if (header->closest == NULL &&
5797 newheader->closest != NULL) {
5798 header->closest = newheader->closest;
5799 newheader->closest = NULL;
5801 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5802 if (addedrdataset != NULL)
5803 bind_rdataset(rbtdb, rbtnode, header, now,
5804 addedrdataset);
5805 return (ISC_R_SUCCESS);
5807 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5808 (header->type == dns_rdatatype_a ||
5809 header->type == dns_rdatatype_aaaa) &&
5810 !header_nx && !newheader_nx &&
5811 header->trust >= newheader->trust &&
5812 dns_rdataslab_equal((unsigned char *)header,
5813 (unsigned char *)newheader,
5814 (unsigned int)(sizeof(*newheader)))) {
5816 * Honour the new ttl if it is less than the
5817 * older one.
5819 if (header->rdh_ttl > newheader->rdh_ttl)
5820 set_ttl(rbtdb, header, newheader->rdh_ttl);
5821 if (header->noqname == NULL &&
5822 newheader->noqname != NULL) {
5823 header->noqname = newheader->noqname;
5824 newheader->noqname = NULL;
5826 if (header->closest == NULL &&
5827 newheader->closest != NULL) {
5828 header->closest = newheader->closest;
5829 newheader->closest = NULL;
5831 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5832 if (addedrdataset != NULL)
5833 bind_rdataset(rbtdb, rbtnode, header, now,
5834 addedrdataset);
5835 return (ISC_R_SUCCESS);
5837 INSIST(rbtversion == NULL ||
5838 rbtversion->serial >= topheader->serial);
5839 if (topheader_prev != NULL)
5840 topheader_prev->next = newheader;
5841 else
5842 rbtnode->data = newheader;
5843 newheader->next = topheader->next;
5844 if (loading) {
5846 * There are no other references to 'header' when
5847 * loading, so we MAY clean up 'header' now.
5848 * Since we don't generate changed records when
5849 * loading, we MUST clean up 'header' now.
5851 newheader->down = NULL;
5852 free_rdataset(rbtdb, rbtdb->common.mctx, header);
5853 } else {
5854 newheader->down = topheader;
5855 topheader->next = newheader;
5856 rbtnode->dirty = 1;
5857 if (changed != NULL)
5858 changed->dirty = ISC_TRUE;
5859 if (rbtversion == NULL) {
5860 set_ttl(rbtdb, header, 0);
5861 header->attributes |= RDATASET_ATTR_STALE;
5863 idx = newheader->node->locknum;
5864 if (IS_CACHE(rbtdb)) {
5865 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5866 newheader, link);
5868 * XXXMLG We don't check the return value
5869 * here. If it fails, we will not do TTL
5870 * based expiry on this node. However, we
5871 * will do it on the LRU side, so memory
5872 * will not leak... for long.
5874 isc_heap_insert(rbtdb->heaps[idx], newheader);
5875 } else if (RESIGN(newheader))
5876 resign_insert(rbtdb, idx, newheader);
5878 } else {
5880 * No non-IGNORED rdatasets of the given type exist at
5881 * this node.
5885 * If we're trying to delete the type, don't bother.
5887 if (newheader_nx) {
5888 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5889 return (DNS_R_UNCHANGED);
5892 if (topheader != NULL) {
5894 * We have an list of rdatasets of the given type,
5895 * but they're all marked IGNORE. We simply insert
5896 * the new rdataset at the head of the list.
5898 * Ignored rdatasets cannot occur during loading, so
5899 * we INSIST on it.
5901 INSIST(!loading);
5902 INSIST(rbtversion == NULL ||
5903 rbtversion->serial >= topheader->serial);
5904 if (topheader_prev != NULL)
5905 topheader_prev->next = newheader;
5906 else
5907 rbtnode->data = newheader;
5908 newheader->next = topheader->next;
5909 newheader->down = topheader;
5910 topheader->next = newheader;
5911 rbtnode->dirty = 1;
5912 if (changed != NULL)
5913 changed->dirty = ISC_TRUE;
5914 } else {
5916 * No rdatasets of the given type exist at the node.
5918 newheader->next = rbtnode->data;
5919 newheader->down = NULL;
5920 rbtnode->data = newheader;
5922 idx = newheader->node->locknum;
5923 if (IS_CACHE(rbtdb)) {
5924 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5925 newheader, link);
5926 isc_heap_insert(rbtdb->heaps[idx], newheader);
5927 } else if (RESIGN(newheader)) {
5928 resign_insert(rbtdb, idx, newheader);
5933 * Check if the node now contains CNAME and other data.
5935 if (rbtversion != NULL &&
5936 cname_and_other_data(rbtnode, rbtversion->serial))
5937 return (DNS_R_CNAMEANDOTHER);
5939 if (addedrdataset != NULL)
5940 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5942 return (ISC_R_SUCCESS);
5945 static inline isc_boolean_t
5946 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5947 rbtdb_rdatatype_t type)
5949 if (IS_CACHE(rbtdb)) {
5950 if (type == dns_rdatatype_dname)
5951 return (ISC_TRUE);
5952 else
5953 return (ISC_FALSE);
5954 } else if (type == dns_rdatatype_dname ||
5955 (type == dns_rdatatype_ns &&
5956 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5957 return (ISC_TRUE);
5958 return (ISC_FALSE);
5961 static inline isc_result_t
5962 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5963 dns_rdataset_t *rdataset)
5965 struct noqname *noqname;
5966 isc_mem_t *mctx = rbtdb->common.mctx;
5967 dns_name_t name;
5968 dns_rdataset_t neg, negsig;
5969 isc_result_t result;
5970 isc_region_t r;
5972 dns_name_init(&name, NULL);
5973 dns_rdataset_init(&neg);
5974 dns_rdataset_init(&negsig);
5976 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5977 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5979 noqname = isc_mem_get(mctx, sizeof(*noqname));
5980 if (noqname == NULL) {
5981 result = ISC_R_NOMEMORY;
5982 goto cleanup;
5984 dns_name_init(&noqname->name, NULL);
5985 noqname->neg = NULL;
5986 noqname->negsig = NULL;
5987 noqname->type = neg.type;
5988 result = dns_name_dup(&name, mctx, &noqname->name);
5989 if (result != ISC_R_SUCCESS)
5990 goto cleanup;
5991 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5992 if (result != ISC_R_SUCCESS)
5993 goto cleanup;
5994 noqname->neg = r.base;
5995 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5996 if (result != ISC_R_SUCCESS)
5997 goto cleanup;
5998 noqname->negsig = r.base;
5999 dns_rdataset_disassociate(&neg);
6000 dns_rdataset_disassociate(&negsig);
6001 newheader->noqname = noqname;
6002 return (ISC_R_SUCCESS);
6004 cleanup:
6005 dns_rdataset_disassociate(&neg);
6006 dns_rdataset_disassociate(&negsig);
6007 free_noqname(mctx, &noqname);
6008 return(result);
6011 static inline isc_result_t
6012 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6013 dns_rdataset_t *rdataset)
6015 struct noqname *closest;
6016 isc_mem_t *mctx = rbtdb->common.mctx;
6017 dns_name_t name;
6018 dns_rdataset_t neg, negsig;
6019 isc_result_t result;
6020 isc_region_t r;
6022 dns_name_init(&name, NULL);
6023 dns_rdataset_init(&neg);
6024 dns_rdataset_init(&negsig);
6026 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6027 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6029 closest = isc_mem_get(mctx, sizeof(*closest));
6030 if (closest == NULL) {
6031 result = ISC_R_NOMEMORY;
6032 goto cleanup;
6034 dns_name_init(&closest->name, NULL);
6035 closest->neg = NULL;
6036 closest->negsig = NULL;
6037 closest->type = neg.type;
6038 result = dns_name_dup(&name, mctx, &closest->name);
6039 if (result != ISC_R_SUCCESS)
6040 goto cleanup;
6041 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6042 if (result != ISC_R_SUCCESS)
6043 goto cleanup;
6044 closest->neg = r.base;
6045 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6046 if (result != ISC_R_SUCCESS)
6047 goto cleanup;
6048 closest->negsig = r.base;
6049 dns_rdataset_disassociate(&neg);
6050 dns_rdataset_disassociate(&negsig);
6051 newheader->closest = closest;
6052 return (ISC_R_SUCCESS);
6054 cleanup:
6055 dns_rdataset_disassociate(&neg);
6056 dns_rdataset_disassociate(&negsig);
6057 free_noqname(mctx, &closest);
6058 return(result);
6061 static dns_dbmethods_t zone_methods;
6063 static isc_result_t
6064 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6065 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6066 dns_rdataset_t *addedrdataset)
6068 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6069 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6070 rbtdb_version_t *rbtversion = version;
6071 isc_region_t region;
6072 rdatasetheader_t *newheader;
6073 rdatasetheader_t *header;
6074 isc_result_t result;
6075 isc_boolean_t delegating;
6076 isc_boolean_t newnsec;
6077 isc_boolean_t tree_locked = ISC_FALSE;
6079 REQUIRE(VALID_RBTDB(rbtdb));
6081 if (rbtdb->common.methods == &zone_methods)
6082 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6083 (rdataset->type == dns_rdatatype_nsec3 ||
6084 rdataset->covers == dns_rdatatype_nsec3)) ||
6085 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6086 rdataset->type != dns_rdatatype_nsec3 &&
6087 rdataset->covers != dns_rdatatype_nsec3)));
6089 if (rbtversion == NULL) {
6090 if (now == 0)
6091 isc_stdtime_get(&now);
6092 } else
6093 now = 0;
6095 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6096 &region,
6097 sizeof(rdatasetheader_t));
6098 if (result != ISC_R_SUCCESS)
6099 return (result);
6101 newheader = (rdatasetheader_t *)region.base;
6102 init_rdataset(rbtdb, newheader);
6103 set_ttl(rbtdb, newheader, rdataset->ttl + now);
6104 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6105 rdataset->covers);
6106 newheader->attributes = 0;
6107 newheader->noqname = NULL;
6108 newheader->closest = NULL;
6109 newheader->count = init_count++;
6110 newheader->trust = rdataset->trust;
6111 newheader->additional_auth = NULL;
6112 newheader->additional_glue = NULL;
6113 newheader->last_used = now;
6114 newheader->node = rbtnode;
6115 if (rbtversion != NULL) {
6116 newheader->serial = rbtversion->serial;
6117 now = 0;
6119 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6120 newheader->attributes |= RDATASET_ATTR_RESIGN;
6121 newheader->resign = rdataset->resign;
6122 } else
6123 newheader->resign = 0;
6124 } else {
6125 newheader->serial = 1;
6126 newheader->resign = 0;
6127 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6128 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6129 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6130 newheader->attributes |= RDATASET_ATTR_OPTOUT;
6131 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6132 result = addnoqname(rbtdb, newheader, rdataset);
6133 if (result != ISC_R_SUCCESS) {
6134 free_rdataset(rbtdb, rbtdb->common.mctx,
6135 newheader);
6136 return (result);
6139 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6140 result = addclosest(rbtdb, newheader, rdataset);
6141 if (result != ISC_R_SUCCESS) {
6142 free_rdataset(rbtdb, rbtdb->common.mctx,
6143 newheader);
6144 return (result);
6150 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6151 * just DNAME for the cache), then we need to set the callback bit
6152 * on the node.
6154 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6155 delegating = ISC_TRUE;
6156 else
6157 delegating = ISC_FALSE;
6160 * Add to the auxiliary NSEC tree if we're adding an NSEC record.
6162 if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
6163 rdataset->type == dns_rdatatype_nsec)
6164 newnsec = ISC_TRUE;
6165 else
6166 newnsec = ISC_FALSE;
6169 * If we're adding a delegation type, adding to the auxiliary NSEC tree,
6170 * or the DB is a cache in an overmem state, hold an exclusive lock on
6171 * the tree. In the latter case the lock does not necessarily have to
6172 * be acquired but it will help purge stale entries more effectively.
6174 if (delegating || newnsec || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
6175 tree_locked = ISC_TRUE;
6176 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6179 if (IS_CACHE(rbtdb) && rbtdb->overmem)
6180 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6182 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6183 isc_rwlocktype_write);
6185 if (rbtdb->rrsetstats != NULL) {
6186 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6187 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6190 if (IS_CACHE(rbtdb)) {
6191 if (tree_locked)
6192 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6194 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6195 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6196 expire_header(rbtdb, header, tree_locked);
6199 * If we've been holding a write lock on the tree just for
6200 * cleaning, we can release it now. However, we still need the
6201 * node lock.
6203 if (tree_locked && !delegating && !newnsec) {
6204 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6205 tree_locked = ISC_FALSE;
6209 result = ISC_R_SUCCESS;
6210 if (newnsec) {
6211 dns_fixedname_t fname;
6212 dns_name_t *name;
6213 dns_rbtnode_t *nsecnode;
6215 dns_fixedname_init(&fname);
6216 name = dns_fixedname_name(&fname);
6217 dns_rbt_fullnamefromnode(rbtnode, name);
6218 nsecnode = NULL;
6219 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6220 if (result == ISC_R_SUCCESS) {
6221 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6222 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6223 } else if (result == ISC_R_EXISTS) {
6224 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6225 result = ISC_R_SUCCESS;
6229 if (result == ISC_R_SUCCESS)
6230 result = add(rbtdb, rbtnode, rbtversion, newheader, options,
6231 ISC_FALSE, addedrdataset, now);
6232 if (result == ISC_R_SUCCESS && delegating)
6233 rbtnode->find_callback = 1;
6235 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6236 isc_rwlocktype_write);
6238 if (tree_locked)
6239 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6242 * Update the zone's secure status. If version is non-NULL
6243 * this is deferred until closeversion() is called.
6245 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6246 iszonesecure(db, version, rbtdb->origin_node);
6248 return (result);
6251 static isc_result_t
6252 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6253 dns_rdataset_t *rdataset, unsigned int options,
6254 dns_rdataset_t *newrdataset)
6256 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6257 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6258 rbtdb_version_t *rbtversion = version;
6259 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6260 unsigned char *subresult;
6261 isc_region_t region;
6262 isc_result_t result;
6263 rbtdb_changed_t *changed;
6265 REQUIRE(VALID_RBTDB(rbtdb));
6267 if (rbtdb->common.methods == &zone_methods)
6268 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6269 (rdataset->type == dns_rdatatype_nsec3 ||
6270 rdataset->covers == dns_rdatatype_nsec3)) ||
6271 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6272 rdataset->type != dns_rdatatype_nsec3 &&
6273 rdataset->covers != dns_rdatatype_nsec3)));
6275 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6276 &region,
6277 sizeof(rdatasetheader_t));
6278 if (result != ISC_R_SUCCESS)
6279 return (result);
6280 newheader = (rdatasetheader_t *)region.base;
6281 init_rdataset(rbtdb, newheader);
6282 set_ttl(rbtdb, newheader, rdataset->ttl);
6283 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6284 rdataset->covers);
6285 newheader->attributes = 0;
6286 newheader->serial = rbtversion->serial;
6287 newheader->trust = 0;
6288 newheader->noqname = NULL;
6289 newheader->closest = NULL;
6290 newheader->count = init_count++;
6291 newheader->additional_auth = NULL;
6292 newheader->additional_glue = NULL;
6293 newheader->last_used = 0;
6294 newheader->node = rbtnode;
6295 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6296 newheader->attributes |= RDATASET_ATTR_RESIGN;
6297 newheader->resign = rdataset->resign;
6298 } else
6299 newheader->resign = 0;
6301 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6302 isc_rwlocktype_write);
6304 changed = add_changed(rbtdb, rbtversion, rbtnode);
6305 if (changed == NULL) {
6306 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6307 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6308 isc_rwlocktype_write);
6309 return (ISC_R_NOMEMORY);
6312 topheader_prev = NULL;
6313 for (topheader = rbtnode->data;
6314 topheader != NULL;
6315 topheader = topheader->next) {
6316 if (topheader->type == newheader->type)
6317 break;
6318 topheader_prev = topheader;
6321 * If header isn't NULL, we've found the right type. There may be
6322 * IGNORE rdatasets between the top of the chain and the first real
6323 * data. We skip over them.
6325 header = topheader;
6326 while (header != NULL && IGNORE(header))
6327 header = header->down;
6328 if (header != NULL && EXISTS(header)) {
6329 unsigned int flags = 0;
6330 subresult = NULL;
6331 result = ISC_R_SUCCESS;
6332 if ((options & DNS_DBSUB_EXACT) != 0) {
6333 flags |= DNS_RDATASLAB_EXACT;
6334 if (newheader->rdh_ttl != header->rdh_ttl)
6335 result = DNS_R_NOTEXACT;
6337 if (result == ISC_R_SUCCESS)
6338 result = dns_rdataslab_subtract(
6339 (unsigned char *)header,
6340 (unsigned char *)newheader,
6341 (unsigned int)(sizeof(*newheader)),
6342 rbtdb->common.mctx,
6343 rbtdb->common.rdclass,
6344 (dns_rdatatype_t)header->type,
6345 flags, &subresult);
6346 if (result == ISC_R_SUCCESS) {
6347 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6348 newheader = (rdatasetheader_t *)subresult;
6349 init_rdataset(rbtdb, newheader);
6351 * We have to set the serial since the rdataslab
6352 * subtraction routine copies the reserved portion of
6353 * header, not newheader.
6355 newheader->serial = rbtversion->serial;
6357 * XXXJT: dns_rdataslab_subtract() copied the pointers
6358 * to additional info. We need to clear these fields
6359 * to avoid having duplicated references.
6361 newheader->additional_auth = NULL;
6362 newheader->additional_glue = NULL;
6363 } else if (result == DNS_R_NXRRSET) {
6365 * This subtraction would remove all of the rdata;
6366 * add a nonexistent header instead.
6368 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6369 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6370 if (newheader == NULL) {
6371 result = ISC_R_NOMEMORY;
6372 goto unlock;
6374 set_ttl(rbtdb, newheader, 0);
6375 newheader->type = topheader->type;
6376 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6377 newheader->trust = 0;
6378 newheader->serial = rbtversion->serial;
6379 newheader->noqname = NULL;
6380 newheader->closest = NULL;
6381 newheader->count = 0;
6382 newheader->additional_auth = NULL;
6383 newheader->additional_glue = NULL;
6384 newheader->node = rbtnode;
6385 newheader->resign = 0;
6386 newheader->last_used = 0;
6387 } else {
6388 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6389 goto unlock;
6393 * If we're here, we want to link newheader in front of
6394 * topheader.
6396 INSIST(rbtversion->serial >= topheader->serial);
6397 if (topheader_prev != NULL)
6398 topheader_prev->next = newheader;
6399 else
6400 rbtnode->data = newheader;
6401 newheader->next = topheader->next;
6402 newheader->down = topheader;
6403 topheader->next = newheader;
6404 rbtnode->dirty = 1;
6405 changed->dirty = ISC_TRUE;
6406 } else {
6408 * The rdataset doesn't exist, so we don't need to do anything
6409 * to satisfy the deletion request.
6411 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6412 if ((options & DNS_DBSUB_EXACT) != 0)
6413 result = DNS_R_NOTEXACT;
6414 else
6415 result = DNS_R_UNCHANGED;
6418 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6419 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6421 unlock:
6422 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6423 isc_rwlocktype_write);
6426 * Update the zone's secure status. If version is non-NULL
6427 * this is deferred until closeversion() is called.
6429 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6430 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6432 return (result);
6435 static isc_result_t
6436 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6437 dns_rdatatype_t type, dns_rdatatype_t covers)
6439 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6440 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6441 rbtdb_version_t *rbtversion = version;
6442 isc_result_t result;
6443 rdatasetheader_t *newheader;
6445 REQUIRE(VALID_RBTDB(rbtdb));
6447 if (type == dns_rdatatype_any)
6448 return (ISC_R_NOTIMPLEMENTED);
6449 if (type == dns_rdatatype_rrsig && covers == 0)
6450 return (ISC_R_NOTIMPLEMENTED);
6452 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6453 if (newheader == NULL)
6454 return (ISC_R_NOMEMORY);
6455 set_ttl(rbtdb, newheader, 0);
6456 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6457 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6458 newheader->trust = 0;
6459 newheader->noqname = NULL;
6460 newheader->closest = NULL;
6461 newheader->additional_auth = NULL;
6462 newheader->additional_glue = NULL;
6463 if (rbtversion != NULL)
6464 newheader->serial = rbtversion->serial;
6465 else
6466 newheader->serial = 0;
6467 newheader->count = 0;
6468 newheader->last_used = 0;
6469 newheader->node = rbtnode;
6471 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6472 isc_rwlocktype_write);
6474 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6475 ISC_FALSE, NULL, 0);
6477 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6478 isc_rwlocktype_write);
6481 * Update the zone's secure status. If version is non-NULL
6482 * this is deferred until closeversion() is called.
6484 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6485 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6487 return (result);
6491 * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
6493 static isc_result_t
6494 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
6495 isc_boolean_t hasnsec)
6497 isc_result_t noderesult, nsecresult;
6498 dns_rbtnode_t *nsecnode;
6500 noderesult = dns_rbt_addnode(rbtdb->tree, name, nodep);
6501 if (!hasnsec)
6502 return (noderesult);
6503 if (noderesult == ISC_R_EXISTS) {
6505 * Add a node to the auxiliary NSEC tree for an old node
6506 * just now getting an NSEC record.
6508 if ((*nodep)->nsec == DNS_RBT_NSEC_HAS_NSEC)
6509 return noderesult;
6510 } else if (noderesult != ISC_R_SUCCESS) {
6511 return (noderesult);
6515 * Build the auxiliary tree for NSECs as we go.
6516 * This tree speeds searches for closest NSECs that would otherwise
6517 * need to examine many irrelevant nodes in large TLDs.
6519 * Add nodes to the auxiliary tree after corresponding nodes have
6520 * been added to the main tree.
6522 nsecnode = NULL;
6523 nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6524 if (nsecresult == ISC_R_SUCCESS) {
6525 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6526 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6527 return (ISC_R_SUCCESS);
6530 if (nsecresult == ISC_R_EXISTS) {
6531 #if 1 /* 0 */
6532 isc_log_write(dns_lctx,
6533 DNS_LOGCATEGORY_DATABASE,
6534 DNS_LOGMODULE_CACHE,
6535 ISC_LOG_WARNING,
6536 "addnode: NSEC node already exists");
6537 #endif
6538 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6539 return (noderesult);
6542 nsecresult = dns_rbt_deletenode(rbtdb->tree, *nodep, ISC_FALSE);
6543 if (nsecresult != ISC_R_SUCCESS)
6544 isc_log_write(dns_lctx,
6545 DNS_LOGCATEGORY_DATABASE,
6546 DNS_LOGMODULE_CACHE,
6547 ISC_LOG_WARNING,
6548 "loading_addrdataset: "
6549 "dns_rbt_deletenode: %s after "
6550 "dns_rbt_addnode(NSEC): %s",
6551 isc_result_totext(nsecresult),
6552 isc_result_totext(noderesult));
6553 return (noderesult);
6556 static isc_result_t
6557 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6558 rbtdb_load_t *loadctx = arg;
6559 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6560 dns_rbtnode_t *node;
6561 isc_result_t result;
6562 isc_region_t region;
6563 rdatasetheader_t *newheader;
6566 * This routine does no node locking. See comments in
6567 * 'load' below for more information on loading and
6568 * locking.
6573 * SOA records are only allowed at top of zone.
6575 if (rdataset->type == dns_rdatatype_soa &&
6576 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6577 return (DNS_R_NOTZONETOP);
6579 if (rdataset->type != dns_rdatatype_nsec3 &&
6580 rdataset->covers != dns_rdatatype_nsec3)
6581 add_empty_wildcards(rbtdb, name);
6583 if (dns_name_iswildcard(name)) {
6585 * NS record owners cannot legally be wild cards.
6587 if (rdataset->type == dns_rdatatype_ns)
6588 return (DNS_R_INVALIDNS);
6590 * NSEC3 record owners cannot legally be wild cards.
6592 if (rdataset->type == dns_rdatatype_nsec3)
6593 return (DNS_R_INVALIDNSEC3);
6594 result = add_wildcard_magic(rbtdb, name);
6595 if (result != ISC_R_SUCCESS)
6596 return (result);
6599 node = NULL;
6600 if (rdataset->type == dns_rdatatype_nsec3 ||
6601 rdataset->covers == dns_rdatatype_nsec3) {
6602 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6603 if (result == ISC_R_SUCCESS)
6604 node->nsec = DNS_RBT_NSEC_NSEC3;
6605 } else if (rdataset->type == dns_rdatatype_nsec) {
6606 result = loadnode(rbtdb, name, &node, ISC_TRUE);
6607 } else {
6608 result = loadnode(rbtdb, name, &node, ISC_FALSE);
6610 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6611 return (result);
6612 if (result != ISC_R_EXISTS) {
6613 dns_name_t foundname;
6614 dns_name_init(&foundname, NULL);
6615 dns_rbt_namefromnode(node, &foundname);
6616 #ifdef DNS_RBT_USEHASH
6617 node->locknum = node->hashval % rbtdb->node_lock_count;
6618 #else
6619 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6620 rbtdb->node_lock_count;
6621 #endif
6624 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6625 &region,
6626 sizeof(rdatasetheader_t));
6627 if (result != ISC_R_SUCCESS)
6628 return (result);
6629 newheader = (rdatasetheader_t *)region.base;
6630 init_rdataset(rbtdb, newheader);
6631 set_ttl(rbtdb, newheader,
6632 rdataset->ttl + loadctx->now); /* XXX overflow check */
6633 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6634 rdataset->covers);
6635 newheader->attributes = 0;
6636 newheader->trust = rdataset->trust;
6637 newheader->serial = 1;
6638 newheader->noqname = NULL;
6639 newheader->closest = NULL;
6640 newheader->count = init_count++;
6641 newheader->additional_auth = NULL;
6642 newheader->additional_glue = NULL;
6643 newheader->last_used = 0;
6644 newheader->node = node;
6645 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6646 newheader->attributes |= RDATASET_ATTR_RESIGN;
6647 newheader->resign = rdataset->resign;
6648 } else
6649 newheader->resign = 0;
6651 result = add(rbtdb, node, rbtdb->current_version, newheader,
6652 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6653 if (result == ISC_R_SUCCESS &&
6654 delegating_type(rbtdb, node, rdataset->type))
6655 node->find_callback = 1;
6656 else if (result == DNS_R_UNCHANGED)
6657 result = ISC_R_SUCCESS;
6659 return (result);
6662 static isc_result_t
6663 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6664 rbtdb_load_t *loadctx;
6665 dns_rbtdb_t *rbtdb;
6667 rbtdb = (dns_rbtdb_t *)db;
6669 REQUIRE(VALID_RBTDB(rbtdb));
6671 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6672 if (loadctx == NULL)
6673 return (ISC_R_NOMEMORY);
6675 loadctx->rbtdb = rbtdb;
6676 if (IS_CACHE(rbtdb))
6677 isc_stdtime_get(&loadctx->now);
6678 else
6679 loadctx->now = 0;
6681 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6683 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6684 == 0);
6685 rbtdb->attributes |= RBTDB_ATTR_LOADING;
6687 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6689 *addp = loading_addrdataset;
6690 *dbloadp = loadctx;
6692 return (ISC_R_SUCCESS);
6695 static isc_result_t
6696 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6697 rbtdb_load_t *loadctx;
6698 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6700 REQUIRE(VALID_RBTDB(rbtdb));
6701 REQUIRE(dbloadp != NULL);
6702 loadctx = *dbloadp;
6703 REQUIRE(loadctx->rbtdb == rbtdb);
6705 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6707 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6708 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6710 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6711 rbtdb->attributes |= RBTDB_ATTR_LOADED;
6713 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6716 * If there's a KEY rdataset at the zone origin containing a
6717 * zone key, we consider the zone secure.
6719 if (! IS_CACHE(rbtdb))
6720 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6722 *dbloadp = NULL;
6724 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6726 return (ISC_R_SUCCESS);
6729 static isc_result_t
6730 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6731 dns_masterformat_t masterformat) {
6732 dns_rbtdb_t *rbtdb;
6734 rbtdb = (dns_rbtdb_t *)db;
6736 REQUIRE(VALID_RBTDB(rbtdb));
6738 #ifdef BIND9
6739 return (dns_master_dump2(rbtdb->common.mctx, db, version,
6740 &dns_master_style_default,
6741 filename, masterformat));
6742 #else
6743 UNUSED(version);
6744 UNUSED(filename);
6745 UNUSED(masterformat);
6747 return (ISC_R_NOTIMPLEMENTED);
6748 #endif /* BIND9 */
6751 static void
6752 delete_callback(void *data, void *arg) {
6753 dns_rbtdb_t *rbtdb = arg;
6754 rdatasetheader_t *current, *next;
6755 unsigned int locknum;
6757 current = data;
6758 locknum = current->node->locknum;
6759 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6760 while (current != NULL) {
6761 next = current->next;
6762 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6763 current = next;
6765 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6768 static isc_boolean_t
6769 issecure(dns_db_t *db) {
6770 dns_rbtdb_t *rbtdb;
6771 isc_boolean_t secure;
6773 rbtdb = (dns_rbtdb_t *)db;
6775 REQUIRE(VALID_RBTDB(rbtdb));
6777 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6778 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6779 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6781 return (secure);
6784 static isc_boolean_t
6785 isdnssec(dns_db_t *db) {
6786 dns_rbtdb_t *rbtdb;
6787 isc_boolean_t dnssec;
6789 rbtdb = (dns_rbtdb_t *)db;
6791 REQUIRE(VALID_RBTDB(rbtdb));
6793 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6794 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6795 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6797 return (dnssec);
6800 static unsigned int
6801 nodecount(dns_db_t *db) {
6802 dns_rbtdb_t *rbtdb;
6803 unsigned int count;
6805 rbtdb = (dns_rbtdb_t *)db;
6807 REQUIRE(VALID_RBTDB(rbtdb));
6809 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6810 count = dns_rbt_nodecount(rbtdb->tree);
6811 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6813 return (count);
6816 static void
6817 settask(dns_db_t *db, isc_task_t *task) {
6818 dns_rbtdb_t *rbtdb;
6820 rbtdb = (dns_rbtdb_t *)db;
6822 REQUIRE(VALID_RBTDB(rbtdb));
6824 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6825 if (rbtdb->task != NULL)
6826 isc_task_detach(&rbtdb->task);
6827 if (task != NULL)
6828 isc_task_attach(task, &rbtdb->task);
6829 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6832 static isc_boolean_t
6833 ispersistent(dns_db_t *db) {
6834 UNUSED(db);
6835 return (ISC_FALSE);
6838 static isc_result_t
6839 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6840 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6841 dns_rbtnode_t *onode;
6842 isc_result_t result = ISC_R_SUCCESS;
6844 REQUIRE(VALID_RBTDB(rbtdb));
6845 REQUIRE(nodep != NULL && *nodep == NULL);
6847 /* Note that the access to origin_node doesn't require a DB lock */
6848 onode = (dns_rbtnode_t *)rbtdb->origin_node;
6849 if (onode != NULL) {
6850 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6851 new_reference(rbtdb, onode);
6852 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6854 *nodep = rbtdb->origin_node;
6855 } else {
6856 INSIST(IS_CACHE(rbtdb));
6857 result = ISC_R_NOTFOUND;
6860 return (result);
6863 static isc_result_t
6864 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6865 isc_uint8_t *flags, isc_uint16_t *iterations,
6866 unsigned char *salt, size_t *salt_length)
6868 dns_rbtdb_t *rbtdb;
6869 isc_result_t result = ISC_R_NOTFOUND;
6870 rbtdb_version_t *rbtversion = version;
6872 rbtdb = (dns_rbtdb_t *)db;
6874 REQUIRE(VALID_RBTDB(rbtdb));
6876 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6878 if (rbtversion == NULL)
6879 rbtversion = rbtdb->current_version;
6881 if (rbtversion->havensec3) {
6882 if (hash != NULL)
6883 *hash = rbtversion->hash;
6884 if (salt != NULL && salt_length != NULL) {
6885 REQUIRE(*salt_length >= rbtversion->salt_length);
6886 memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6888 if (salt_length != NULL)
6889 *salt_length = rbtversion->salt_length;
6890 if (iterations != NULL)
6891 *iterations = rbtversion->iterations;
6892 if (flags != NULL)
6893 *flags = rbtversion->flags;
6894 result = ISC_R_SUCCESS;
6896 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6898 return (result);
6901 static isc_result_t
6902 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6903 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6904 isc_stdtime_t oldresign;
6905 isc_result_t result = ISC_R_SUCCESS;
6906 rdatasetheader_t *header;
6908 REQUIRE(VALID_RBTDB(rbtdb));
6909 REQUIRE(!IS_CACHE(rbtdb));
6910 REQUIRE(rdataset != NULL);
6912 header = rdataset->private3;
6913 header--;
6915 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6916 isc_rwlocktype_write);
6918 oldresign = header->resign;
6919 header->resign = resign;
6920 if (header->heap_index != 0) {
6921 INSIST(RESIGN(header));
6922 if (resign == 0) {
6923 isc_heap_delete(rbtdb->heaps[header->node->locknum],
6924 header->heap_index);
6925 header->heap_index = 0;
6926 } else if (resign < oldresign)
6927 isc_heap_increased(rbtdb->heaps[header->node->locknum],
6928 header->heap_index);
6929 else
6930 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6931 header->heap_index);
6932 } else if (resign && header->heap_index == 0) {
6933 header->attributes |= RDATASET_ATTR_RESIGN;
6934 result = resign_insert(rbtdb, header->node->locknum, header);
6936 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6937 isc_rwlocktype_write);
6938 return (result);
6941 static isc_result_t
6942 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6943 dns_name_t *foundname)
6945 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6946 rdatasetheader_t *header = NULL, *this;
6947 unsigned int i;
6948 isc_result_t result = ISC_R_NOTFOUND;
6950 REQUIRE(VALID_RBTDB(rbtdb));
6952 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6954 for (i = 0; i < rbtdb->node_lock_count; i++) {
6955 this = isc_heap_element(rbtdb->heaps[i], 1);
6956 if (this == NULL)
6957 continue;
6958 if (header == NULL)
6959 header = this;
6960 else if (isc_serial_lt(this->resign, header->resign))
6961 header = this;
6964 if (header == NULL)
6965 goto unlock;
6967 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6968 isc_rwlocktype_read);
6970 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6972 if (foundname != NULL)
6973 dns_rbt_fullnamefromnode(header->node, foundname);
6975 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6976 isc_rwlocktype_read);
6978 result = ISC_R_SUCCESS;
6980 unlock:
6981 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6983 return (result);
6986 static void
6987 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6989 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6990 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6991 dns_rbtnode_t *node;
6992 rdatasetheader_t *header;
6994 REQUIRE(VALID_RBTDB(rbtdb));
6995 REQUIRE(rdataset != NULL);
6996 REQUIRE(rbtdb->future_version == rbtversion);
6997 REQUIRE(rbtversion->writer);
6999 node = rdataset->private2;
7000 header = rdataset->private3;
7001 header--;
7003 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7004 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
7005 isc_rwlocktype_write);
7007 * Delete from heap and save to re-signed list so that it can
7008 * be restored if we backout of this change.
7010 new_reference(rbtdb, node);
7011 isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
7012 header->heap_index = 0;
7013 ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
7015 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
7016 isc_rwlocktype_write);
7017 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7020 static dns_stats_t *
7021 getrrsetstats(dns_db_t *db) {
7022 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7024 REQUIRE(VALID_RBTDB(rbtdb));
7025 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
7027 return (rbtdb->rrsetstats);
7030 static dns_dbmethods_t zone_methods = {
7031 attach,
7032 detach,
7033 beginload,
7034 endload,
7035 dump,
7036 currentversion,
7037 newversion,
7038 attachversion,
7039 closeversion,
7040 findnode,
7041 zone_find,
7042 zone_findzonecut,
7043 attachnode,
7044 detachnode,
7045 expirenode,
7046 printnode,
7047 createiterator,
7048 zone_findrdataset,
7049 allrdatasets,
7050 addrdataset,
7051 subtractrdataset,
7052 deleterdataset,
7053 issecure,
7054 nodecount,
7055 ispersistent,
7056 overmem,
7057 settask,
7058 getoriginnode,
7059 NULL,
7060 getnsec3parameters,
7061 findnsec3node,
7062 setsigningtime,
7063 getsigningtime,
7064 resigned,
7065 isdnssec,
7066 NULL
7069 static dns_dbmethods_t cache_methods = {
7070 attach,
7071 detach,
7072 beginload,
7073 endload,
7074 dump,
7075 currentversion,
7076 newversion,
7077 attachversion,
7078 closeversion,
7079 findnode,
7080 cache_find,
7081 cache_findzonecut,
7082 attachnode,
7083 detachnode,
7084 expirenode,
7085 printnode,
7086 createiterator,
7087 cache_findrdataset,
7088 allrdatasets,
7089 addrdataset,
7090 subtractrdataset,
7091 deleterdataset,
7092 issecure,
7093 nodecount,
7094 ispersistent,
7095 overmem,
7096 settask,
7097 getoriginnode,
7098 NULL,
7099 NULL,
7100 NULL,
7101 NULL,
7102 NULL,
7103 NULL,
7104 isdnssec,
7105 getrrsetstats
7108 isc_result_t
7109 #ifdef DNS_RBTDB_VERSION64
7110 dns_rbtdb64_create
7111 #else
7112 dns_rbtdb_create
7113 #endif
7114 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
7115 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
7116 void *driverarg, dns_db_t **dbp)
7118 dns_rbtdb_t *rbtdb;
7119 isc_result_t result;
7120 int i;
7121 dns_name_t name;
7122 isc_boolean_t (*sooner)(void *, void *);
7124 /* Keep the compiler happy. */
7125 UNUSED(argc);
7126 UNUSED(argv);
7127 UNUSED(driverarg);
7129 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
7130 if (rbtdb == NULL)
7131 return (ISC_R_NOMEMORY);
7133 memset(rbtdb, '\0', sizeof(*rbtdb));
7134 dns_name_init(&rbtdb->common.origin, NULL);
7135 rbtdb->common.attributes = 0;
7136 if (type == dns_dbtype_cache) {
7137 rbtdb->common.methods = &cache_methods;
7138 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7139 } else if (type == dns_dbtype_stub) {
7140 rbtdb->common.methods = &zone_methods;
7141 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7142 } else
7143 rbtdb->common.methods = &zone_methods;
7144 rbtdb->common.rdclass = rdclass;
7145 rbtdb->common.mctx = NULL;
7147 result = RBTDB_INITLOCK(&rbtdb->lock);
7148 if (result != ISC_R_SUCCESS)
7149 goto cleanup_rbtdb;
7151 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7152 if (result != ISC_R_SUCCESS)
7153 goto cleanup_lock;
7156 * Initialize node_lock_count in a generic way to support future
7157 * extension which allows the user to specify this value on creation.
7158 * Note that when specified for a cache DB it must be larger than 1
7159 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7161 if (rbtdb->node_lock_count == 0) {
7162 if (IS_CACHE(rbtdb))
7163 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7164 else
7165 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7166 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7167 result = ISC_R_RANGE;
7168 goto cleanup_tree_lock;
7170 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7171 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7172 sizeof(rbtdb_nodelock_t));
7173 if (rbtdb->node_locks == NULL) {
7174 result = ISC_R_NOMEMORY;
7175 goto cleanup_tree_lock;
7178 rbtdb->rrsetstats = NULL;
7179 if (IS_CACHE(rbtdb)) {
7180 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7181 if (result != ISC_R_SUCCESS)
7182 goto cleanup_node_locks;
7183 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7184 sizeof(rdatasetheaderlist_t));
7185 if (rbtdb->rdatasets == NULL) {
7186 result = ISC_R_NOMEMORY;
7187 goto cleanup_rrsetstats;
7189 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7190 ISC_LIST_INIT(rbtdb->rdatasets[i]);
7191 } else
7192 rbtdb->rdatasets = NULL;
7195 * Create the heaps.
7197 rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
7198 sizeof(isc_heap_t *));
7199 if (rbtdb->heaps == NULL) {
7200 result = ISC_R_NOMEMORY;
7201 goto cleanup_rdatasets;
7203 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7204 rbtdb->heaps[i] = NULL;
7205 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7206 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7207 result = isc_heap_create(mctx, sooner, set_index, 0,
7208 &rbtdb->heaps[i]);
7209 if (result != ISC_R_SUCCESS)
7210 goto cleanup_heaps;
7214 * Create deadnode lists.
7216 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7217 sizeof(rbtnodelist_t));
7218 if (rbtdb->deadnodes == NULL) {
7219 result = ISC_R_NOMEMORY;
7220 goto cleanup_heaps;
7222 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7223 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7225 rbtdb->active = rbtdb->node_lock_count;
7227 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7228 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7229 if (result == ISC_R_SUCCESS) {
7230 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7231 if (result != ISC_R_SUCCESS)
7232 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7234 if (result != ISC_R_SUCCESS) {
7235 while (i-- > 0) {
7236 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7237 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7238 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7240 goto cleanup_deadnodes;
7242 rbtdb->node_locks[i].exiting = ISC_FALSE;
7246 * Attach to the mctx. The database will persist so long as there
7247 * are references to it, and attaching to the mctx ensures that our
7248 * mctx won't disappear out from under us.
7250 isc_mem_attach(mctx, &rbtdb->common.mctx);
7253 * Must be initialized before free_rbtdb() is called.
7255 isc_ondestroy_init(&rbtdb->common.ondest);
7258 * Make a copy of the origin name.
7260 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7261 if (result != ISC_R_SUCCESS) {
7262 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7263 return (result);
7267 * Make the Red-Black Trees.
7269 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7270 if (result != ISC_R_SUCCESS) {
7271 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7272 return (result);
7275 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
7276 if (result != ISC_R_SUCCESS) {
7277 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7278 return (result);
7281 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7282 if (result != ISC_R_SUCCESS) {
7283 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7284 return (result);
7288 * In order to set the node callback bit correctly in zone databases,
7289 * we need to know if the node has the origin name of the zone.
7290 * In loading_addrdataset() we could simply compare the new name
7291 * to the origin name, but this is expensive. Also, we don't know the
7292 * node name in addrdataset(), so we need another way of knowing the
7293 * zone's top.
7295 * We now explicitly create a node for the zone's origin, and then
7296 * we simply remember the node's address. This is safe, because
7297 * the top-of-zone node can never be deleted, nor can its address
7298 * change.
7300 if (!IS_CACHE(rbtdb)) {
7301 rbtdb->origin_node = NULL;
7302 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7303 &rbtdb->origin_node);
7304 if (result != ISC_R_SUCCESS) {
7305 INSIST(result != ISC_R_EXISTS);
7306 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7307 return (result);
7309 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
7311 * We need to give the origin node the right locknum.
7313 dns_name_init(&name, NULL);
7314 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7315 #ifdef DNS_RBT_USEHASH
7316 rbtdb->origin_node->locknum =
7317 rbtdb->origin_node->hashval %
7318 rbtdb->node_lock_count;
7319 #else
7320 rbtdb->origin_node->locknum =
7321 dns_name_hash(&name, ISC_TRUE) %
7322 rbtdb->node_lock_count;
7323 #endif
7327 * Misc. Initialization.
7329 result = isc_refcount_init(&rbtdb->references, 1);
7330 if (result != ISC_R_SUCCESS) {
7331 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7332 return (result);
7334 rbtdb->attributes = 0;
7335 rbtdb->overmem = ISC_FALSE;
7336 rbtdb->task = NULL;
7339 * Version Initialization.
7341 rbtdb->current_serial = 1;
7342 rbtdb->least_serial = 1;
7343 rbtdb->next_serial = 2;
7344 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7345 if (rbtdb->current_version == NULL) {
7346 isc_refcount_decrement(&rbtdb->references, NULL);
7347 isc_refcount_destroy(&rbtdb->references);
7348 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7349 return (ISC_R_NOMEMORY);
7351 rbtdb->current_version->secure = dns_db_insecure;
7352 rbtdb->current_version->havensec3 = ISC_FALSE;
7353 rbtdb->current_version->flags = 0;
7354 rbtdb->current_version->iterations = 0;
7355 rbtdb->current_version->hash = 0;
7356 rbtdb->current_version->salt_length = 0;
7357 memset(rbtdb->current_version->salt, 0,
7358 sizeof(rbtdb->current_version->salt));
7359 rbtdb->future_version = NULL;
7360 ISC_LIST_INIT(rbtdb->open_versions);
7362 * Keep the current version in the open list so that list operation
7363 * won't happen in normal lookup operations.
7365 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7367 rbtdb->common.magic = DNS_DB_MAGIC;
7368 rbtdb->common.impmagic = RBTDB_MAGIC;
7370 *dbp = (dns_db_t *)rbtdb;
7372 return (ISC_R_SUCCESS);
7374 cleanup_deadnodes:
7375 isc_mem_put(mctx, rbtdb->deadnodes,
7376 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7378 cleanup_heaps:
7379 if (rbtdb->heaps != NULL) {
7380 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7381 if (rbtdb->heaps[i] != NULL)
7382 isc_heap_destroy(&rbtdb->heaps[i]);
7383 isc_mem_put(mctx, rbtdb->heaps,
7384 rbtdb->node_lock_count * sizeof(isc_heap_t *));
7387 cleanup_rdatasets:
7388 if (rbtdb->rdatasets != NULL)
7389 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7390 sizeof(rdatasetheaderlist_t));
7391 cleanup_rrsetstats:
7392 if (rbtdb->rrsetstats != NULL)
7393 dns_stats_detach(&rbtdb->rrsetstats);
7395 cleanup_node_locks:
7396 isc_mem_put(mctx, rbtdb->node_locks,
7397 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7399 cleanup_tree_lock:
7400 isc_rwlock_destroy(&rbtdb->tree_lock);
7402 cleanup_lock:
7403 RBTDB_DESTROYLOCK(&rbtdb->lock);
7405 cleanup_rbtdb:
7406 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
7407 return (result);
7412 * Slabbed Rdataset Methods
7415 static void
7416 rdataset_disassociate(dns_rdataset_t *rdataset) {
7417 dns_db_t *db = rdataset->private1;
7418 dns_dbnode_t *node = rdataset->private2;
7420 detachnode(db, &node);
7423 static isc_result_t
7424 rdataset_first(dns_rdataset_t *rdataset) {
7425 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7426 unsigned int count;
7428 count = raw[0] * 256 + raw[1];
7429 if (count == 0) {
7430 rdataset->private5 = NULL;
7431 return (ISC_R_NOMORE);
7434 #if DNS_RDATASET_FIXED
7435 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7436 raw += 2 + (4 * count);
7437 else
7438 #endif
7439 raw += 2;
7442 * The privateuint4 field is the number of rdata beyond the
7443 * cursor position, so we decrement the total count by one
7444 * before storing it.
7446 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7447 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7448 * to the first entry in the offset table.
7450 count--;
7451 rdataset->privateuint4 = count;
7452 rdataset->private5 = raw;
7454 return (ISC_R_SUCCESS);
7457 static isc_result_t
7458 rdataset_next(dns_rdataset_t *rdataset) {
7459 unsigned int count;
7460 unsigned int length;
7461 unsigned char *raw; /* RDATASLAB */
7463 count = rdataset->privateuint4;
7464 if (count == 0)
7465 return (ISC_R_NOMORE);
7466 count--;
7467 rdataset->privateuint4 = count;
7470 * Skip forward one record (length + 4) or one offset (4).
7472 raw = rdataset->private5;
7473 #if DNS_RDATASET_FIXED
7474 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7475 #endif
7476 length = raw[0] * 256 + raw[1];
7477 raw += length;
7478 #if DNS_RDATASET_FIXED
7480 rdataset->private5 = raw + 4; /* length(2) + order(2) */
7481 #else
7482 rdataset->private5 = raw + 2; /* length(2) */
7483 #endif
7485 return (ISC_R_SUCCESS);
7488 static void
7489 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7490 unsigned char *raw = rdataset->private5; /* RDATASLAB */
7491 #if DNS_RDATASET_FIXED
7492 unsigned int offset;
7493 #endif
7494 unsigned int length;
7495 isc_region_t r;
7496 unsigned int flags = 0;
7498 REQUIRE(raw != NULL);
7501 * Find the start of the record if not already in private5
7502 * then skip the length and order fields.
7504 #if DNS_RDATASET_FIXED
7505 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7506 offset = (raw[0] << 24) + (raw[1] << 16) +
7507 (raw[2] << 8) + raw[3];
7508 raw = rdataset->private3;
7509 raw += offset;
7511 #endif
7512 length = raw[0] * 256 + raw[1];
7513 #if DNS_RDATASET_FIXED
7514 raw += 4;
7515 #else
7516 raw += 2;
7517 #endif
7518 if (rdataset->type == dns_rdatatype_rrsig) {
7519 if (*raw & DNS_RDATASLAB_OFFLINE)
7520 flags |= DNS_RDATA_OFFLINE;
7521 length--;
7522 raw++;
7524 r.length = length;
7525 r.base = raw;
7526 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7527 rdata->flags |= flags;
7530 static void
7531 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7532 dns_db_t *db = source->private1;
7533 dns_dbnode_t *node = source->private2;
7534 dns_dbnode_t *cloned_node = NULL;
7536 attachnode(db, node, &cloned_node);
7537 *target = *source;
7540 * Reset iterator state.
7542 target->privateuint4 = 0;
7543 target->private5 = NULL;
7546 static unsigned int
7547 rdataset_count(dns_rdataset_t *rdataset) {
7548 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7549 unsigned int count;
7551 count = raw[0] * 256 + raw[1];
7553 return (count);
7556 static isc_result_t
7557 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7558 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7560 dns_db_t *db = rdataset->private1;
7561 dns_dbnode_t *node = rdataset->private2;
7562 dns_dbnode_t *cloned_node;
7563 struct noqname *noqname = rdataset->private6;
7565 cloned_node = NULL;
7566 attachnode(db, node, &cloned_node);
7567 nsec->methods = &rdataset_methods;
7568 nsec->rdclass = db->rdclass;
7569 nsec->type = noqname->type;
7570 nsec->covers = 0;
7571 nsec->ttl = rdataset->ttl;
7572 nsec->trust = rdataset->trust;
7573 nsec->private1 = rdataset->private1;
7574 nsec->private2 = rdataset->private2;
7575 nsec->private3 = noqname->neg;
7576 nsec->privateuint4 = 0;
7577 nsec->private5 = NULL;
7578 nsec->private6 = NULL;
7579 nsec->private7 = NULL;
7581 cloned_node = NULL;
7582 attachnode(db, node, &cloned_node);
7583 nsecsig->methods = &rdataset_methods;
7584 nsecsig->rdclass = db->rdclass;
7585 nsecsig->type = dns_rdatatype_rrsig;
7586 nsecsig->covers = noqname->type;
7587 nsecsig->ttl = rdataset->ttl;
7588 nsecsig->trust = rdataset->trust;
7589 nsecsig->private1 = rdataset->private1;
7590 nsecsig->private2 = rdataset->private2;
7591 nsecsig->private3 = noqname->negsig;
7592 nsecsig->privateuint4 = 0;
7593 nsecsig->private5 = NULL;
7594 nsec->private6 = NULL;
7595 nsec->private7 = NULL;
7597 dns_name_clone(&noqname->name, name);
7599 return (ISC_R_SUCCESS);
7602 static isc_result_t
7603 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7604 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7606 dns_db_t *db = rdataset->private1;
7607 dns_dbnode_t *node = rdataset->private2;
7608 dns_dbnode_t *cloned_node;
7609 struct noqname *closest = rdataset->private7;
7611 cloned_node = NULL;
7612 attachnode(db, node, &cloned_node);
7613 nsec->methods = &rdataset_methods;
7614 nsec->rdclass = db->rdclass;
7615 nsec->type = closest->type;
7616 nsec->covers = 0;
7617 nsec->ttl = rdataset->ttl;
7618 nsec->trust = rdataset->trust;
7619 nsec->private1 = rdataset->private1;
7620 nsec->private2 = rdataset->private2;
7621 nsec->private3 = closest->neg;
7622 nsec->privateuint4 = 0;
7623 nsec->private5 = NULL;
7624 nsec->private6 = NULL;
7625 nsec->private7 = NULL;
7627 cloned_node = NULL;
7628 attachnode(db, node, &cloned_node);
7629 nsecsig->methods = &rdataset_methods;
7630 nsecsig->rdclass = db->rdclass;
7631 nsecsig->type = dns_rdatatype_rrsig;
7632 nsecsig->covers = closest->type;
7633 nsecsig->ttl = rdataset->ttl;
7634 nsecsig->trust = rdataset->trust;
7635 nsecsig->private1 = rdataset->private1;
7636 nsecsig->private2 = rdataset->private2;
7637 nsecsig->private3 = closest->negsig;
7638 nsecsig->privateuint4 = 0;
7639 nsecsig->private5 = NULL;
7640 nsec->private6 = NULL;
7641 nsec->private7 = NULL;
7643 dns_name_clone(&closest->name, name);
7645 return (ISC_R_SUCCESS);
7649 * Rdataset Iterator Methods
7652 static void
7653 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7654 rbtdb_rdatasetiter_t *rbtiterator;
7656 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7658 if (rbtiterator->common.version != NULL)
7659 closeversion(rbtiterator->common.db,
7660 &rbtiterator->common.version, ISC_FALSE);
7661 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7662 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7663 sizeof(*rbtiterator));
7665 *iteratorp = NULL;
7668 static isc_result_t
7669 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7670 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7671 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7672 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7673 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7674 rdatasetheader_t *header, *top_next;
7675 rbtdb_serial_t serial;
7676 isc_stdtime_t now;
7678 if (IS_CACHE(rbtdb)) {
7679 serial = 1;
7680 now = rbtiterator->common.now;
7681 } else {
7682 serial = rbtversion->serial;
7683 now = 0;
7686 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7687 isc_rwlocktype_read);
7689 for (header = rbtnode->data; header != NULL; header = top_next) {
7690 top_next = header->next;
7691 do {
7692 if (header->serial <= serial && !IGNORE(header)) {
7694 * Is this a "this rdataset doesn't exist"
7695 * record? Or is it too old in the cache?
7697 * Note: unlike everywhere else, we
7698 * check for now > header->rdh_ttl instead
7699 * of now >= header->rdh_ttl. This allows
7700 * ANY and RRSIG queries for 0 TTL
7701 * rdatasets to work.
7703 if (NONEXISTENT(header) ||
7704 (now != 0 && now > header->rdh_ttl))
7705 header = NULL;
7706 break;
7707 } else
7708 header = header->down;
7709 } while (header != NULL);
7710 if (header != NULL)
7711 break;
7714 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7715 isc_rwlocktype_read);
7717 rbtiterator->current = header;
7719 if (header == NULL)
7720 return (ISC_R_NOMORE);
7722 return (ISC_R_SUCCESS);
7725 static isc_result_t
7726 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7727 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7728 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7729 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7730 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7731 rdatasetheader_t *header, *top_next;
7732 rbtdb_serial_t serial;
7733 isc_stdtime_t now;
7734 rbtdb_rdatatype_t type, negtype;
7735 dns_rdatatype_t rdtype, covers;
7737 header = rbtiterator->current;
7738 if (header == NULL)
7739 return (ISC_R_NOMORE);
7741 if (IS_CACHE(rbtdb)) {
7742 serial = 1;
7743 now = rbtiterator->common.now;
7744 } else {
7745 serial = rbtversion->serial;
7746 now = 0;
7749 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7750 isc_rwlocktype_read);
7752 type = header->type;
7753 rdtype = RBTDB_RDATATYPE_BASE(header->type);
7754 if (rdtype == 0) {
7755 covers = RBTDB_RDATATYPE_EXT(header->type);
7756 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7757 } else
7758 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7759 for (header = header->next; header != NULL; header = top_next) {
7760 top_next = header->next;
7762 * If not walking back up the down list.
7764 if (header->type != type && header->type != negtype) {
7765 do {
7766 if (header->serial <= serial &&
7767 !IGNORE(header)) {
7769 * Is this a "this rdataset doesn't
7770 * exist" record?
7772 * Note: unlike everywhere else, we
7773 * check for now > header->ttl instead
7774 * of now >= header->ttl. This allows
7775 * ANY and RRSIG queries for 0 TTL
7776 * rdatasets to work.
7778 if ((header->attributes &
7779 RDATASET_ATTR_NONEXISTENT) != 0 ||
7780 (now != 0 && now > header->rdh_ttl))
7781 header = NULL;
7782 break;
7783 } else
7784 header = header->down;
7785 } while (header != NULL);
7786 if (header != NULL)
7787 break;
7791 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7792 isc_rwlocktype_read);
7794 rbtiterator->current = header;
7796 if (header == NULL)
7797 return (ISC_R_NOMORE);
7799 return (ISC_R_SUCCESS);
7802 static void
7803 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7804 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7805 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7806 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7807 rdatasetheader_t *header;
7809 header = rbtiterator->current;
7810 REQUIRE(header != NULL);
7812 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7813 isc_rwlocktype_read);
7815 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7816 rdataset);
7818 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7819 isc_rwlocktype_read);
7824 * Database Iterator Methods
7827 static inline void
7828 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7829 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7830 dns_rbtnode_t *node = rbtdbiter->node;
7832 if (node == NULL)
7833 return;
7835 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7836 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7839 static inline void
7840 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7841 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7842 dns_rbtnode_t *node = rbtdbiter->node;
7843 nodelock_t *lock;
7845 if (node == NULL)
7846 return;
7848 lock = &rbtdb->node_locks[node->locknum].lock;
7849 NODE_LOCK(lock, isc_rwlocktype_read);
7850 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7851 rbtdbiter->tree_locked, ISC_FALSE);
7852 NODE_UNLOCK(lock, isc_rwlocktype_read);
7854 rbtdbiter->node = NULL;
7857 static void
7858 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7859 dns_rbtnode_t *node;
7860 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7861 isc_boolean_t was_read_locked = ISC_FALSE;
7862 nodelock_t *lock;
7863 int i;
7865 if (rbtdbiter->delete != 0) {
7867 * Note that "%d node of %d in tree" can report things like
7868 * "flush_deletions: 59 nodes of 41 in tree". This means
7869 * That some nodes appear on the deletions list more than
7870 * once. Only the last occurence will actually be deleted.
7872 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7873 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7874 "flush_deletions: %d nodes of %d in tree",
7875 rbtdbiter->delete,
7876 dns_rbt_nodecount(rbtdb->tree));
7878 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7879 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7880 was_read_locked = ISC_TRUE;
7882 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7883 rbtdbiter->tree_locked = isc_rwlocktype_write;
7885 for (i = 0; i < rbtdbiter->delete; i++) {
7886 node = rbtdbiter->deletions[i];
7887 lock = &rbtdb->node_locks[node->locknum].lock;
7889 NODE_LOCK(lock, isc_rwlocktype_read);
7890 decrement_reference(rbtdb, node, 0,
7891 isc_rwlocktype_read,
7892 rbtdbiter->tree_locked, ISC_FALSE);
7893 NODE_UNLOCK(lock, isc_rwlocktype_read);
7896 rbtdbiter->delete = 0;
7898 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7899 if (was_read_locked) {
7900 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7901 rbtdbiter->tree_locked = isc_rwlocktype_read;
7903 } else {
7904 rbtdbiter->tree_locked = isc_rwlocktype_none;
7909 static inline void
7910 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7911 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7913 REQUIRE(rbtdbiter->paused);
7914 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7916 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7917 rbtdbiter->tree_locked = isc_rwlocktype_read;
7919 rbtdbiter->paused = ISC_FALSE;
7922 static void
7923 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7924 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7925 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7926 dns_db_t *db = NULL;
7928 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7929 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7930 rbtdbiter->tree_locked = isc_rwlocktype_none;
7931 } else
7932 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7934 dereference_iter_node(rbtdbiter);
7936 flush_deletions(rbtdbiter);
7938 dns_db_attach(rbtdbiter->common.db, &db);
7939 dns_db_detach(&rbtdbiter->common.db);
7941 dns_rbtnodechain_reset(&rbtdbiter->chain);
7942 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7943 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7944 dns_db_detach(&db);
7946 *iteratorp = NULL;
7949 static isc_result_t
7950 dbiterator_first(dns_dbiterator_t *iterator) {
7951 isc_result_t result;
7952 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7953 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7954 dns_name_t *name, *origin;
7956 if (rbtdbiter->result != ISC_R_SUCCESS &&
7957 rbtdbiter->result != ISC_R_NOMORE)
7958 return (rbtdbiter->result);
7960 if (rbtdbiter->paused)
7961 resume_iteration(rbtdbiter);
7963 dereference_iter_node(rbtdbiter);
7965 name = dns_fixedname_name(&rbtdbiter->name);
7966 origin = dns_fixedname_name(&rbtdbiter->origin);
7967 dns_rbtnodechain_reset(&rbtdbiter->chain);
7968 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7970 if (rbtdbiter->nsec3only) {
7971 rbtdbiter->current = &rbtdbiter->nsec3chain;
7972 result = dns_rbtnodechain_first(rbtdbiter->current,
7973 rbtdb->nsec3, name, origin);
7974 } else {
7975 rbtdbiter->current = &rbtdbiter->chain;
7976 result = dns_rbtnodechain_first(rbtdbiter->current,
7977 rbtdb->tree, name, origin);
7978 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7979 rbtdbiter->current = &rbtdbiter->nsec3chain;
7980 result = dns_rbtnodechain_first(rbtdbiter->current,
7981 rbtdb->nsec3, name,
7982 origin);
7985 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7986 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7987 NULL, &rbtdbiter->node);
7988 if (result == ISC_R_SUCCESS) {
7989 rbtdbiter->new_origin = ISC_TRUE;
7990 reference_iter_node(rbtdbiter);
7992 } else {
7993 INSIST(result == ISC_R_NOTFOUND);
7994 result = ISC_R_NOMORE; /* The tree is empty. */
7997 rbtdbiter->result = result;
7999 return (result);
8002 static isc_result_t
8003 dbiterator_last(dns_dbiterator_t *iterator) {
8004 isc_result_t result;
8005 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8006 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8007 dns_name_t *name, *origin;
8009 if (rbtdbiter->result != ISC_R_SUCCESS &&
8010 rbtdbiter->result != ISC_R_NOMORE)
8011 return (rbtdbiter->result);
8013 if (rbtdbiter->paused)
8014 resume_iteration(rbtdbiter);
8016 dereference_iter_node(rbtdbiter);
8018 name = dns_fixedname_name(&rbtdbiter->name);
8019 origin = dns_fixedname_name(&rbtdbiter->origin);
8020 dns_rbtnodechain_reset(&rbtdbiter->chain);
8021 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8023 result = ISC_R_NOTFOUND;
8024 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
8025 rbtdbiter->current = &rbtdbiter->nsec3chain;
8026 result = dns_rbtnodechain_last(rbtdbiter->current,
8027 rbtdb->nsec3, name, origin);
8029 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
8030 rbtdbiter->current = &rbtdbiter->chain;
8031 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8032 name, origin);
8034 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8035 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8036 NULL, &rbtdbiter->node);
8037 if (result == ISC_R_SUCCESS) {
8038 rbtdbiter->new_origin = ISC_TRUE;
8039 reference_iter_node(rbtdbiter);
8041 } else {
8042 INSIST(result == ISC_R_NOTFOUND);
8043 result = ISC_R_NOMORE; /* The tree is empty. */
8046 rbtdbiter->result = result;
8048 return (result);
8051 static isc_result_t
8052 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
8053 isc_result_t result;
8054 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8055 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8056 dns_name_t *iname, *origin;
8058 if (rbtdbiter->result != ISC_R_SUCCESS &&
8059 rbtdbiter->result != ISC_R_NOTFOUND &&
8060 rbtdbiter->result != ISC_R_NOMORE)
8061 return (rbtdbiter->result);
8063 if (rbtdbiter->paused)
8064 resume_iteration(rbtdbiter);
8066 dereference_iter_node(rbtdbiter);
8068 iname = dns_fixedname_name(&rbtdbiter->name);
8069 origin = dns_fixedname_name(&rbtdbiter->origin);
8070 dns_rbtnodechain_reset(&rbtdbiter->chain);
8071 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8073 if (rbtdbiter->nsec3only) {
8074 rbtdbiter->current = &rbtdbiter->nsec3chain;
8075 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8076 &rbtdbiter->node,
8077 rbtdbiter->current,
8078 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8079 } else if (rbtdbiter->nonsec3) {
8080 rbtdbiter->current = &rbtdbiter->chain;
8081 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8082 &rbtdbiter->node,
8083 rbtdbiter->current,
8084 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8085 } else {
8087 * Stay on main chain if not found on either chain.
8089 rbtdbiter->current = &rbtdbiter->chain;
8090 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8091 &rbtdbiter->node,
8092 rbtdbiter->current,
8093 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8094 if (result == DNS_R_PARTIALMATCH) {
8095 dns_rbtnode_t *node = NULL;
8096 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8097 &node, &rbtdbiter->nsec3chain,
8098 DNS_RBTFIND_EMPTYDATA,
8099 NULL, NULL);
8100 if (result == ISC_R_SUCCESS) {
8101 rbtdbiter->node = node;
8102 rbtdbiter->current = &rbtdbiter->nsec3chain;
8107 #if 1
8108 if (result == ISC_R_SUCCESS) {
8109 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8110 origin, NULL);
8111 if (result == ISC_R_SUCCESS) {
8112 rbtdbiter->new_origin = ISC_TRUE;
8113 reference_iter_node(rbtdbiter);
8115 } else if (result == DNS_R_PARTIALMATCH) {
8116 result = ISC_R_NOTFOUND;
8117 rbtdbiter->node = NULL;
8120 rbtdbiter->result = result;
8121 #else
8122 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8123 isc_result_t tresult;
8124 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8125 origin, NULL);
8126 if (tresult == ISC_R_SUCCESS) {
8127 rbtdbiter->new_origin = ISC_TRUE;
8128 reference_iter_node(rbtdbiter);
8129 } else {
8130 result = tresult;
8131 rbtdbiter->node = NULL;
8133 } else
8134 rbtdbiter->node = NULL;
8136 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8137 ISC_R_SUCCESS : result;
8138 #endif
8140 return (result);
8143 static isc_result_t
8144 dbiterator_prev(dns_dbiterator_t *iterator) {
8145 isc_result_t result;
8146 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8147 dns_name_t *name, *origin;
8148 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8150 REQUIRE(rbtdbiter->node != NULL);
8152 if (rbtdbiter->result != ISC_R_SUCCESS)
8153 return (rbtdbiter->result);
8155 if (rbtdbiter->paused)
8156 resume_iteration(rbtdbiter);
8158 name = dns_fixedname_name(&rbtdbiter->name);
8159 origin = dns_fixedname_name(&rbtdbiter->origin);
8160 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8161 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8162 !rbtdbiter->nonsec3 &&
8163 &rbtdbiter->nsec3chain == rbtdbiter->current) {
8164 rbtdbiter->current = &rbtdbiter->chain;
8165 dns_rbtnodechain_reset(rbtdbiter->current);
8166 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8167 name, origin);
8168 if (result == ISC_R_NOTFOUND)
8169 result = ISC_R_NOMORE;
8172 dereference_iter_node(rbtdbiter);
8174 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8175 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8176 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8177 NULL, &rbtdbiter->node);
8180 if (result == ISC_R_SUCCESS)
8181 reference_iter_node(rbtdbiter);
8183 rbtdbiter->result = result;
8185 return (result);
8188 static isc_result_t
8189 dbiterator_next(dns_dbiterator_t *iterator) {
8190 isc_result_t result;
8191 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8192 dns_name_t *name, *origin;
8193 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8195 REQUIRE(rbtdbiter->node != NULL);
8197 if (rbtdbiter->result != ISC_R_SUCCESS)
8198 return (rbtdbiter->result);
8200 if (rbtdbiter->paused)
8201 resume_iteration(rbtdbiter);
8203 name = dns_fixedname_name(&rbtdbiter->name);
8204 origin = dns_fixedname_name(&rbtdbiter->origin);
8205 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8206 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8207 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8208 rbtdbiter->current = &rbtdbiter->nsec3chain;
8209 dns_rbtnodechain_reset(rbtdbiter->current);
8210 result = dns_rbtnodechain_first(rbtdbiter->current,
8211 rbtdb->nsec3, name, origin);
8212 if (result == ISC_R_NOTFOUND)
8213 result = ISC_R_NOMORE;
8216 dereference_iter_node(rbtdbiter);
8218 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8219 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8220 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8221 NULL, &rbtdbiter->node);
8223 if (result == ISC_R_SUCCESS)
8224 reference_iter_node(rbtdbiter);
8226 rbtdbiter->result = result;
8228 return (result);
8231 static isc_result_t
8232 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8233 dns_name_t *name)
8235 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8236 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8237 dns_rbtnode_t *node = rbtdbiter->node;
8238 isc_result_t result;
8239 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8240 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8242 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8243 REQUIRE(rbtdbiter->node != NULL);
8245 if (rbtdbiter->paused)
8246 resume_iteration(rbtdbiter);
8248 if (name != NULL) {
8249 if (rbtdbiter->common.relative_names)
8250 origin = NULL;
8251 result = dns_name_concatenate(nodename, origin, name, NULL);
8252 if (result != ISC_R_SUCCESS)
8253 return (result);
8254 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8255 result = DNS_R_NEWORIGIN;
8256 } else
8257 result = ISC_R_SUCCESS;
8259 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8260 new_reference(rbtdb, node);
8261 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8263 *nodep = rbtdbiter->node;
8265 if (iterator->cleaning && result == ISC_R_SUCCESS) {
8266 isc_result_t expire_result;
8269 * If the deletion array is full, flush it before trying
8270 * to expire the current node. The current node can't
8271 * fully deleted while the iteration cursor is still on it.
8273 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8274 flush_deletions(rbtdbiter);
8276 expire_result = expirenode(iterator->db, *nodep, 0);
8279 * expirenode() currently always returns success.
8281 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8282 unsigned int refs;
8284 rbtdbiter->deletions[rbtdbiter->delete++] = node;
8285 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8286 dns_rbtnode_refincrement(node, &refs);
8287 INSIST(refs != 0);
8288 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8292 return (result);
8295 static isc_result_t
8296 dbiterator_pause(dns_dbiterator_t *iterator) {
8297 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8298 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8300 if (rbtdbiter->result != ISC_R_SUCCESS &&
8301 rbtdbiter->result != ISC_R_NOMORE)
8302 return (rbtdbiter->result);
8304 if (rbtdbiter->paused)
8305 return (ISC_R_SUCCESS);
8307 rbtdbiter->paused = ISC_TRUE;
8309 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8310 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8311 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8312 rbtdbiter->tree_locked = isc_rwlocktype_none;
8315 flush_deletions(rbtdbiter);
8317 return (ISC_R_SUCCESS);
8320 static isc_result_t
8321 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8322 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8323 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8325 if (rbtdbiter->result != ISC_R_SUCCESS)
8326 return (rbtdbiter->result);
8328 return (dns_name_copy(origin, name, NULL));
8332 * Additional cache routines.
8334 static isc_result_t
8335 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8336 dns_rdatatype_t qtype, dns_acache_t *acache,
8337 dns_zone_t **zonep, dns_db_t **dbp,
8338 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8339 dns_name_t *fname, dns_message_t *msg,
8340 isc_stdtime_t now)
8342 #ifndef BIND9
8343 UNUSED(rdataset);
8344 UNUSED(type);
8345 UNUSED(qtype);
8346 UNUSED(acache);
8347 UNUSED(zonep);
8348 UNUSED(dbp);
8349 UNUSED(versionp);
8350 UNUSED(nodep);
8351 UNUSED(fname);
8352 UNUSED(msg);
8353 UNUSED(now);
8355 return (ISC_R_NOTIMPLEMENTED);
8356 #else
8357 dns_rbtdb_t *rbtdb = rdataset->private1;
8358 dns_rbtnode_t *rbtnode = rdataset->private2;
8359 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8360 unsigned int current_count = rdataset->privateuint4;
8361 unsigned int count;
8362 rdatasetheader_t *header;
8363 nodelock_t *nodelock;
8364 unsigned int total_count;
8365 acachectl_t *acarray;
8366 dns_acacheentry_t *entry;
8367 isc_result_t result;
8369 UNUSED(qtype); /* we do not use this value at least for now */
8370 UNUSED(acache);
8372 header = (struct rdatasetheader *)(raw - sizeof(*header));
8374 total_count = raw[0] * 256 + raw[1];
8375 INSIST(total_count > current_count);
8376 count = total_count - current_count - 1;
8378 acarray = NULL;
8380 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8381 NODE_LOCK(nodelock, isc_rwlocktype_read);
8383 switch (type) {
8384 case dns_rdatasetadditional_fromauth:
8385 acarray = header->additional_auth;
8386 break;
8387 case dns_rdatasetadditional_fromcache:
8388 acarray = NULL;
8389 break;
8390 case dns_rdatasetadditional_fromglue:
8391 acarray = header->additional_glue;
8392 break;
8393 default:
8394 INSIST(0);
8397 if (acarray == NULL) {
8398 if (type != dns_rdatasetadditional_fromcache)
8399 dns_acache_countquerymiss(acache);
8400 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8401 return (ISC_R_NOTFOUND);
8404 if (acarray[count].entry == NULL) {
8405 dns_acache_countquerymiss(acache);
8406 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8407 return (ISC_R_NOTFOUND);
8410 entry = NULL;
8411 dns_acache_attachentry(acarray[count].entry, &entry);
8413 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8415 result = dns_acache_getentry(entry, zonep, dbp, versionp,
8416 nodep, fname, msg, now);
8418 dns_acache_detachentry(&entry);
8420 return (result);
8423 static void
8424 acache_callback(dns_acacheentry_t *entry, void **arg) {
8425 dns_rbtdb_t *rbtdb;
8426 dns_rbtnode_t *rbtnode;
8427 nodelock_t *nodelock;
8428 acachectl_t *acarray = NULL;
8429 acache_cbarg_t *cbarg;
8430 unsigned int count;
8432 REQUIRE(arg != NULL);
8433 cbarg = *arg;
8436 * The caller must hold the entry lock.
8439 rbtdb = (dns_rbtdb_t *)cbarg->db;
8440 rbtnode = (dns_rbtnode_t *)cbarg->node;
8442 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8443 NODE_LOCK(nodelock, isc_rwlocktype_write);
8445 switch (cbarg->type) {
8446 case dns_rdatasetadditional_fromauth:
8447 acarray = cbarg->header->additional_auth;
8448 break;
8449 case dns_rdatasetadditional_fromglue:
8450 acarray = cbarg->header->additional_glue;
8451 break;
8452 default:
8453 INSIST(0);
8456 count = cbarg->count;
8457 if (acarray != NULL && acarray[count].entry == entry) {
8458 acarray[count].entry = NULL;
8459 INSIST(acarray[count].cbarg == cbarg);
8460 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8461 acarray[count].cbarg = NULL;
8462 } else
8463 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8465 dns_acache_detachentry(&entry);
8467 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8469 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8470 dns_db_detach((dns_db_t **)(void*)&rbtdb);
8472 *arg = NULL;
8473 #endif /* BIND9 */
8476 #ifdef BIND9
8477 static void
8478 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8479 acache_cbarg_t **cbargp)
8481 acache_cbarg_t *cbarg;
8483 REQUIRE(mctx != NULL);
8484 REQUIRE(entry != NULL);
8485 REQUIRE(cbargp != NULL && *cbargp != NULL);
8487 cbarg = *cbargp;
8489 dns_acache_cancelentry(entry);
8490 dns_db_detachnode(cbarg->db, &cbarg->node);
8491 dns_db_detach(&cbarg->db);
8493 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8495 *cbargp = NULL;
8497 #endif /* BIND9 */
8499 static isc_result_t
8500 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8501 dns_rdatatype_t qtype, dns_acache_t *acache,
8502 dns_zone_t *zone, dns_db_t *db,
8503 dns_dbversion_t *version, dns_dbnode_t *node,
8504 dns_name_t *fname)
8506 #ifndef BIND9
8507 UNUSED(rdataset);
8508 UNUSED(type);
8509 UNUSED(qtype);
8510 UNUSED(acache);
8511 UNUSED(zone);
8512 UNUSED(db);
8513 UNUSED(version);
8514 UNUSED(node);
8515 UNUSED(fname);
8517 return (ISC_R_NOTIMPLEMENTED);
8518 #else
8519 dns_rbtdb_t *rbtdb = rdataset->private1;
8520 dns_rbtnode_t *rbtnode = rdataset->private2;
8521 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8522 unsigned int current_count = rdataset->privateuint4;
8523 rdatasetheader_t *header;
8524 unsigned int total_count, count;
8525 nodelock_t *nodelock;
8526 isc_result_t result;
8527 acachectl_t *acarray;
8528 dns_acacheentry_t *newentry, *oldentry = NULL;
8529 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8531 UNUSED(qtype);
8533 if (type == dns_rdatasetadditional_fromcache)
8534 return (ISC_R_SUCCESS);
8536 header = (struct rdatasetheader *)(raw - sizeof(*header));
8538 total_count = raw[0] * 256 + raw[1];
8539 INSIST(total_count > current_count);
8540 count = total_count - current_count - 1; /* should be private data */
8542 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8543 if (newcbarg == NULL)
8544 return (ISC_R_NOMEMORY);
8545 newcbarg->type = type;
8546 newcbarg->count = count;
8547 newcbarg->header = header;
8548 newcbarg->db = NULL;
8549 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8550 newcbarg->node = NULL;
8551 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8552 &newcbarg->node);
8553 newentry = NULL;
8554 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8555 acache_callback, newcbarg, &newentry);
8556 if (result != ISC_R_SUCCESS)
8557 goto fail;
8558 /* Set cache data in the new entry. */
8559 result = dns_acache_setentry(acache, newentry, zone, db,
8560 version, node, fname);
8561 if (result != ISC_R_SUCCESS)
8562 goto fail;
8564 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8565 NODE_LOCK(nodelock, isc_rwlocktype_write);
8567 acarray = NULL;
8568 switch (type) {
8569 case dns_rdatasetadditional_fromauth:
8570 acarray = header->additional_auth;
8571 break;
8572 case dns_rdatasetadditional_fromglue:
8573 acarray = header->additional_glue;
8574 break;
8575 default:
8576 INSIST(0);
8579 if (acarray == NULL) {
8580 unsigned int i;
8582 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8583 sizeof(acachectl_t));
8585 if (acarray == NULL) {
8586 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8587 goto fail;
8590 for (i = 0; i < total_count; i++) {
8591 acarray[i].entry = NULL;
8592 acarray[i].cbarg = NULL;
8595 switch (type) {
8596 case dns_rdatasetadditional_fromauth:
8597 header->additional_auth = acarray;
8598 break;
8599 case dns_rdatasetadditional_fromglue:
8600 header->additional_glue = acarray;
8601 break;
8602 default:
8603 INSIST(0);
8606 if (acarray[count].entry != NULL) {
8608 * Swap the entry. Delay cleaning-up the old entry since
8609 * it would require a node lock.
8611 oldentry = acarray[count].entry;
8612 INSIST(acarray[count].cbarg != NULL);
8613 oldcbarg = acarray[count].cbarg;
8615 acarray[count].entry = newentry;
8616 acarray[count].cbarg = newcbarg;
8618 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8620 if (oldentry != NULL) {
8621 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8622 dns_acache_detachentry(&oldentry);
8625 return (ISC_R_SUCCESS);
8627 fail:
8628 if (newcbarg != NULL) {
8629 if (newentry != NULL) {
8630 acache_cancelentry(rbtdb->common.mctx, newentry,
8631 &newcbarg);
8632 dns_acache_detachentry(&newentry);
8633 } else {
8634 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8635 dns_db_detach(&newcbarg->db);
8636 isc_mem_put(rbtdb->common.mctx, newcbarg,
8637 sizeof(*newcbarg));
8641 return (result);
8642 #endif
8645 static isc_result_t
8646 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8647 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8649 #ifndef BIND9
8650 UNUSED(acache);
8651 UNUSED(rdataset);
8652 UNUSED(type);
8653 UNUSED(qtype);
8655 return (ISC_R_NOTIMPLEMENTED);
8656 #else
8657 dns_rbtdb_t *rbtdb = rdataset->private1;
8658 dns_rbtnode_t *rbtnode = rdataset->private2;
8659 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8660 unsigned int current_count = rdataset->privateuint4;
8661 rdatasetheader_t *header;
8662 nodelock_t *nodelock;
8663 unsigned int total_count, count;
8664 acachectl_t *acarray;
8665 dns_acacheentry_t *entry;
8666 acache_cbarg_t *cbarg;
8668 UNUSED(qtype); /* we do not use this value at least for now */
8669 UNUSED(acache);
8671 if (type == dns_rdatasetadditional_fromcache)
8672 return (ISC_R_SUCCESS);
8674 header = (struct rdatasetheader *)(raw - sizeof(*header));
8676 total_count = raw[0] * 256 + raw[1];
8677 INSIST(total_count > current_count);
8678 count = total_count - current_count - 1;
8680 acarray = NULL;
8681 entry = NULL;
8683 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8684 NODE_LOCK(nodelock, isc_rwlocktype_write);
8686 switch (type) {
8687 case dns_rdatasetadditional_fromauth:
8688 acarray = header->additional_auth;
8689 break;
8690 case dns_rdatasetadditional_fromglue:
8691 acarray = header->additional_glue;
8692 break;
8693 default:
8694 INSIST(0);
8697 if (acarray == NULL) {
8698 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8699 return (ISC_R_NOTFOUND);
8702 entry = acarray[count].entry;
8703 if (entry == NULL) {
8704 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8705 return (ISC_R_NOTFOUND);
8708 acarray[count].entry = NULL;
8709 cbarg = acarray[count].cbarg;
8710 acarray[count].cbarg = NULL;
8712 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8714 if (entry != NULL) {
8715 if (cbarg != NULL)
8716 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8717 dns_acache_detachentry(&entry);
8720 return (ISC_R_SUCCESS);
8721 #endif
8725 * Routines for LRU-based cache management.
8729 * See if a given cache entry that is being reused needs to be updated
8730 * in the LRU-list. From the LRU management point of view, this function is
8731 * expected to return true for almost all cases. When used with threads,
8732 * however, this may cause a non-negligible performance penalty because a
8733 * writer lock will have to be acquired before updating the list.
8734 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8735 * function returns true if the entry has not been updated for some period of
8736 * time. We differentiate the NS or glue address case and the others since
8737 * experiments have shown that the former tends to be accessed relatively
8738 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8739 * may cause external queries at a higher level zone, involving more
8740 * transactions).
8742 * Caller must hold the node (read or write) lock.
8744 static inline isc_boolean_t
8745 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8746 if ((header->attributes &
8747 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8748 return (ISC_FALSE);
8750 #if DNS_RBTDB_LIMITLRUUPDATE
8751 if (header->type == dns_rdatatype_ns ||
8752 (header->trust == dns_trust_glue &&
8753 (header->type == dns_rdatatype_a ||
8754 header->type == dns_rdatatype_aaaa))) {
8756 * Glue records are updated if at least 60 seconds have passed
8757 * since the previous update time.
8759 return (header->last_used + 60 <= now);
8762 /* Other records are updated if 5 minutes have passed. */
8763 return (header->last_used + 300 <= now);
8764 #else
8765 UNUSED(now);
8767 return (ISC_TRUE);
8768 #endif
8772 * Update the timestamp of a given cache entry and move it to the head
8773 * of the corresponding LRU list.
8775 * Caller must hold the node (write) lock.
8777 * Note that the we do NOT touch the heap here, as the TTL has not changed.
8779 static void
8780 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8781 isc_stdtime_t now)
8783 INSIST(IS_CACHE(rbtdb));
8785 /* To be checked: can we really assume this? XXXMLG */
8786 INSIST(ISC_LINK_LINKED(header, link));
8788 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8789 header->last_used = now;
8790 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8794 * Purge some expired and/or stale (i.e. unused for some period) cache entries
8795 * under an overmem condition. To recover from this condition quickly, up to
8796 * 2 entries will be purged. This process is triggered while adding a new
8797 * entry, and we specifically avoid purging entries in the same LRU bucket as
8798 * the one to which the new entry will belong. Otherwise, we might purge
8799 * entries of the same name of different RR types while adding RRsets from a
8800 * single response (consider the case where we're adding A and AAAA glue records
8801 * of the same NS name).
8803 static void
8804 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8805 isc_stdtime_t now, isc_boolean_t tree_locked)
8807 rdatasetheader_t *header, *header_prev;
8808 unsigned int locknum;
8809 int purgecount = 2;
8811 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8812 locknum != locknum_start && purgecount > 0;
8813 locknum = (locknum + 1) % rbtdb->node_lock_count) {
8814 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8815 isc_rwlocktype_write);
8817 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8818 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8819 expire_header(rbtdb, header, tree_locked);
8820 purgecount--;
8823 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8824 header != NULL && purgecount > 0;
8825 header = header_prev) {
8826 header_prev = ISC_LIST_PREV(header, link);
8828 * Unlink the entry at this point to avoid checking it
8829 * again even if it's currently used someone else and
8830 * cannot be purged at this moment. This entry won't be
8831 * referenced any more (so unlinking is safe) since the
8832 * TTL was reset to 0.
8834 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8835 link);
8836 expire_header(rbtdb, header, tree_locked);
8837 purgecount--;
8840 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8841 isc_rwlocktype_write);
8845 static void
8846 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8847 isc_boolean_t tree_locked)
8849 set_ttl(rbtdb, header, 0);
8850 header->attributes |= RDATASET_ATTR_STALE;
8851 header->node->dirty = 1;
8854 * Caller must hold the node (write) lock.
8857 if (dns_rbtnode_refcurrent(header->node) == 0) {
8859 * If no one else is using the node, we can clean it up now.
8860 * We first need to gain a new reference to the node to meet a
8861 * requirement of decrement_reference().
8863 new_reference(rbtdb, header->node);
8864 decrement_reference(rbtdb, header->node, 0,
8865 isc_rwlocktype_write,
8866 tree_locked ? isc_rwlocktype_write :
8867 isc_rwlocktype_none, ISC_FALSE);