Remove building with NOCRYPTO option
[minix.git] / external / bsd / bind / dist / lib / dns / rrl.c
blobdd9952bd5e277ccae1670c7ed80434bf8f7b3312
1 /* $NetBSD: rrl.c,v 1.4 2014/12/10 04:37:58 christos Exp $ */
3 /*
4 * Copyright (C) 2012-2014 Internet Systems Consortium, Inc. ("ISC")
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
11 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
12 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
13 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
14 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
15 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
16 * PERFORMANCE OF THIS SOFTWARE.
19 /*! \file */
22 * Rate limit DNS responses.
25 /* #define ISC_LIST_CHECKINIT */
27 #include <config.h>
28 #include <isc/mem.h>
29 #include <isc/net.h>
30 #include <isc/netaddr.h>
31 #include <isc/print.h>
33 #include <dns/result.h>
34 #include <dns/rcode.h>
35 #include <dns/rdatatype.h>
36 #include <dns/rdataclass.h>
37 #include <dns/log.h>
38 #include <dns/rrl.h>
39 #include <dns/view.h>
41 static void
42 log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_boolean_t early,
43 char *log_buf, unsigned int log_buf_len);
46 * Get a modulus for a hash function that is tolerably likely to be
47 * relatively prime to most inputs. Of course, we get a prime for for initial
48 * values not larger than the square of the last prime. We often get a prime
49 * after that.
50 * This works well in practice for hash tables up to at least 100
51 * times the square of the last prime and better than a multiplicative hash.
53 static int
54 hash_divisor(unsigned int initial) {
55 static isc_uint16_t primes[] = {
56 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41,
57 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97,
58 #if 0
59 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157,
60 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
61 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283,
62 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367,
63 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439,
64 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
65 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
66 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661,
67 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751,
68 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829,
69 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919,
70 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997,1009,
71 #endif
73 int divisions, tries;
74 unsigned int result;
75 isc_uint16_t *pp, p;
77 result = initial;
79 if (primes[sizeof(primes)/sizeof(primes[0])-1] >= result) {
80 pp = primes;
81 while (*pp < result)
82 ++pp;
83 return (*pp);
86 if ((result & 1) == 0)
87 ++result;
89 divisions = 0;
90 tries = 1;
91 pp = primes;
92 do {
93 p = *pp++;
94 ++divisions;
95 if ((result % p) == 0) {
96 ++tries;
97 result += 2;
98 pp = primes;
100 } while (pp < &primes[sizeof(primes) / sizeof(primes[0])]);
102 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
103 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
104 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
105 "%d hash_divisor() divisions in %d tries"
106 " to get %d from %d",
107 divisions, tries, result, initial);
109 return (result);
113 * Convert a timestamp to a number of seconds in the past.
115 static inline int
116 delta_rrl_time(isc_stdtime_t ts, isc_stdtime_t now) {
117 int delta;
119 delta = now - ts;
120 if (delta >= 0)
121 return (delta);
124 * The timestamp is in the future. That future might result from
125 * re-ordered requests, because we use timestamps on requests
126 * instead of consulting a clock. Timestamps in the distant future are
127 * assumed to result from clock changes. When the clock changes to
128 * the past, make existing timestamps appear to be in the past.
130 if (delta < -DNS_RRL_MAX_TIME_TRAVEL)
131 return (DNS_RRL_FOREVER);
132 return (0);
135 static inline int
136 get_age(const dns_rrl_t *rrl, const dns_rrl_entry_t *e, isc_stdtime_t now) {
137 if (!e->ts_valid)
138 return (DNS_RRL_FOREVER);
139 return (delta_rrl_time(e->ts + rrl->ts_bases[e->ts_gen], now));
142 static inline void
143 set_age(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_stdtime_t now) {
144 dns_rrl_entry_t *e_old;
145 unsigned int ts_gen;
146 int i, ts;
148 ts_gen = rrl->ts_gen;
149 ts = now - rrl->ts_bases[ts_gen];
150 if (ts < 0) {
151 if (ts < -DNS_RRL_MAX_TIME_TRAVEL)
152 ts = DNS_RRL_FOREVER;
153 else
154 ts = 0;
158 * Make a new timestamp base if the current base is too old.
159 * All entries older than DNS_RRL_MAX_WINDOW seconds are ancient,
160 * useless history. Their timestamps can be treated as if they are
161 * all the same.
162 * We only do arithmetic on more recent timestamps, so bases for
163 * older timestamps can be recycled provided the old timestamps are
164 * marked as ancient history.
165 * This loop is almost always very short because most entries are
166 * recycled after one second and any entries that need to be marked
167 * are older than (DNS_RRL_TS_BASES)*DNS_RRL_MAX_TS seconds.
169 if (ts >= DNS_RRL_MAX_TS) {
170 ts_gen = (ts_gen + 1) % DNS_RRL_TS_BASES;
171 for (e_old = ISC_LIST_TAIL(rrl->lru), i = 0;
172 e_old != NULL && (e_old->ts_gen == ts_gen ||
173 !ISC_LINK_LINKED(e_old, hlink));
174 e_old = ISC_LIST_PREV(e_old, lru), ++i)
176 e_old->ts_valid = ISC_FALSE;
178 if (i != 0)
179 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
180 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
181 "rrl new time base scanned %d entries"
182 " at %d for %d %d %d %d",
183 i, now, rrl->ts_bases[ts_gen],
184 rrl->ts_bases[(ts_gen + 1) %
185 DNS_RRL_TS_BASES],
186 rrl->ts_bases[(ts_gen + 2) %
187 DNS_RRL_TS_BASES],
188 rrl->ts_bases[(ts_gen + 3) %
189 DNS_RRL_TS_BASES]);
190 rrl->ts_gen = ts_gen;
191 rrl->ts_bases[ts_gen] = now;
192 ts = 0;
195 e->ts_gen = ts_gen;
196 e->ts = ts;
197 e->ts_valid = ISC_TRUE;
200 static isc_result_t
201 expand_entries(dns_rrl_t *rrl, int new) {
202 unsigned int bsize;
203 dns_rrl_block_t *b;
204 dns_rrl_entry_t *e;
205 double rate;
206 int i;
208 if (rrl->num_entries + new >= rrl->max_entries &&
209 rrl->max_entries != 0)
211 new = rrl->max_entries - rrl->num_entries;
212 if (new <= 0)
213 return (ISC_R_SUCCESS);
217 * Log expansions so that the user can tune max-table-size
218 * and min-table-size.
220 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) &&
221 rrl->hash != NULL) {
222 rate = rrl->probes;
223 if (rrl->searches != 0)
224 rate /= rrl->searches;
225 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
226 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
227 "increase from %d to %d RRL entries with"
228 " %d bins; average search length %.1f",
229 rrl->num_entries, rrl->num_entries+new,
230 rrl->hash->length, rate);
233 bsize = sizeof(dns_rrl_block_t) + (new-1)*sizeof(dns_rrl_entry_t);
234 b = isc_mem_get(rrl->mctx, bsize);
235 if (b == NULL) {
236 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
237 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
238 "isc_mem_get(%d) failed for RRL entries",
239 bsize);
240 return (ISC_R_NOMEMORY);
242 memset(b, 0, bsize);
243 b->size = bsize;
245 e = b->entries;
246 for (i = 0; i < new; ++i, ++e) {
247 ISC_LINK_INIT(e, hlink);
248 ISC_LIST_INITANDAPPEND(rrl->lru, e, lru);
250 rrl->num_entries += new;
251 ISC_LIST_INITANDAPPEND(rrl->blocks, b, link);
253 return (ISC_R_SUCCESS);
256 static inline dns_rrl_bin_t *
257 get_bin(dns_rrl_hash_t *hash, unsigned int hval) {
258 INSIST(hash != NULL);
259 return (&hash->bins[hval % hash->length]);
262 static void
263 free_old_hash(dns_rrl_t *rrl) {
264 dns_rrl_hash_t *old_hash;
265 dns_rrl_bin_t *old_bin;
266 dns_rrl_entry_t *e, *e_next;
268 old_hash = rrl->old_hash;
269 for (old_bin = &old_hash->bins[0];
270 old_bin < &old_hash->bins[old_hash->length];
271 ++old_bin)
273 for (e = ISC_LIST_HEAD(*old_bin); e != NULL; e = e_next) {
274 e_next = ISC_LIST_NEXT(e, hlink);
275 ISC_LINK_INIT(e, hlink);
279 isc_mem_put(rrl->mctx, old_hash,
280 sizeof(*old_hash)
281 + (old_hash->length - 1) * sizeof(old_hash->bins[0]));
282 rrl->old_hash = NULL;
285 static isc_result_t
286 expand_rrl_hash(dns_rrl_t *rrl, isc_stdtime_t now) {
287 dns_rrl_hash_t *hash;
288 int old_bins, new_bins, hsize;
289 double rate;
291 if (rrl->old_hash != NULL)
292 free_old_hash(rrl);
295 * Most searches fail and so go to the end of the chain.
296 * Use a small hash table load factor.
298 old_bins = (rrl->hash == NULL) ? 0 : rrl->hash->length;
299 new_bins = old_bins/8 + old_bins;
300 if (new_bins < rrl->num_entries)
301 new_bins = rrl->num_entries;
302 new_bins = hash_divisor(new_bins);
304 hsize = sizeof(dns_rrl_hash_t) + (new_bins-1)*sizeof(hash->bins[0]);
305 hash = isc_mem_get(rrl->mctx, hsize);
306 if (hash == NULL) {
307 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
308 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
309 "isc_mem_get(%d) failed for"
310 " RRL hash table",
311 hsize);
312 return (ISC_R_NOMEMORY);
314 memset(hash, 0, hsize);
315 hash->length = new_bins;
316 rrl->hash_gen ^= 1;
317 hash->gen = rrl->hash_gen;
319 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) && old_bins != 0) {
320 rate = rrl->probes;
321 if (rrl->searches != 0)
322 rate /= rrl->searches;
323 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
324 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
325 "increase from %d to %d RRL bins for"
326 " %d entries; average search length %.1f",
327 old_bins, new_bins, rrl->num_entries, rate);
330 rrl->old_hash = rrl->hash;
331 if (rrl->old_hash != NULL)
332 rrl->old_hash->check_time = now;
333 rrl->hash = hash;
335 return (ISC_R_SUCCESS);
338 static void
339 ref_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, int probes, isc_stdtime_t now) {
341 * Make the entry most recently used.
343 if (ISC_LIST_HEAD(rrl->lru) != e) {
344 if (e == rrl->last_logged)
345 rrl->last_logged = ISC_LIST_PREV(e, lru);
346 ISC_LIST_UNLINK(rrl->lru, e, lru);
347 ISC_LIST_PREPEND(rrl->lru, e, lru);
351 * Expand the hash table if it is time and necessary.
352 * This will leave the newly referenced entry in a chain in the
353 * old hash table. It will migrate to the new hash table the next
354 * time it is used or be cut loose when the old hash table is destroyed.
356 rrl->probes += probes;
357 ++rrl->searches;
358 if (rrl->searches > 100 &&
359 delta_rrl_time(rrl->hash->check_time, now) > 1) {
360 if (rrl->probes/rrl->searches > 2)
361 expand_rrl_hash(rrl, now);
362 rrl->hash->check_time = now;
363 rrl->probes = 0;
364 rrl->searches = 0;
368 static inline isc_boolean_t
369 key_cmp(const dns_rrl_key_t *a, const dns_rrl_key_t *b) {
370 if (memcmp(a, b, sizeof(dns_rrl_key_t)) == 0)
371 return (ISC_TRUE);
372 return (ISC_FALSE);
375 static inline isc_uint32_t
376 hash_key(const dns_rrl_key_t *key) {
377 isc_uint32_t hval;
378 int i;
380 hval = key->w[0];
381 for (i = sizeof(*key) / sizeof(key->w[0]) - 1; i >= 0; --i) {
382 hval = key->w[i] + (hval<<1);
384 return (hval);
388 * Construct the hash table key.
389 * Use a hash of the DNS query name to save space in the database.
390 * Collisions result in legitimate rate limiting responses for one
391 * query name also limiting responses for other names to the
392 * same client. This is rare and benign enough given the large
393 * space costs compared to keeping the entire name in the database
394 * entry or the time costs of dynamic allocation.
396 static void
397 make_key(const dns_rrl_t *rrl, dns_rrl_key_t *key,
398 const isc_sockaddr_t *client_addr,
399 dns_rdatatype_t qtype, dns_name_t *qname, dns_rdataclass_t qclass,
400 dns_rrl_rtype_t rtype)
402 dns_name_t base;
403 dns_offsets_t base_offsets;
404 int labels, i;
406 memset(key, 0, sizeof(*key));
408 key->s.rtype = rtype;
409 if (rtype == DNS_RRL_RTYPE_QUERY) {
410 key->s.qtype = qtype;
411 key->s.qclass = qclass & 0xff;
412 } else if (rtype == DNS_RRL_RTYPE_REFERRAL ||
413 rtype == DNS_RRL_RTYPE_NODATA) {
415 * Because there is no qtype in the empty answer sections of
416 * referral and NODATA responses, count them as the same.
418 key->s.qclass = qclass & 0xff;
421 if (qname != NULL && qname->labels != 0) {
423 * Ignore the first label of wildcards.
425 if ((qname->attributes & DNS_NAMEATTR_WILDCARD) != 0 &&
426 (labels = dns_name_countlabels(qname)) > 1)
428 dns_name_init(&base, base_offsets);
429 dns_name_getlabelsequence(qname, 1, labels-1, &base);
430 key->s.qname_hash = dns_name_hashbylabel(&base,
431 ISC_FALSE);
432 } else {
433 key->s.qname_hash = dns_name_hashbylabel(qname,
434 ISC_FALSE);
438 switch (client_addr->type.sa.sa_family) {
439 case AF_INET:
440 key->s.ip[0] = (client_addr->type.sin.sin_addr.s_addr &
441 rrl->ipv4_mask);
442 break;
443 case AF_INET6:
444 key->s.ipv6 = ISC_TRUE;
445 memmove(key->s.ip, &client_addr->type.sin6.sin6_addr,
446 sizeof(key->s.ip));
447 for (i = 0; i < DNS_RRL_MAX_PREFIX/32; ++i)
448 key->s.ip[i] &= rrl->ipv6_mask[i];
449 break;
453 static inline dns_rrl_rate_t *
454 get_rate(dns_rrl_t *rrl, dns_rrl_rtype_t rtype) {
455 switch (rtype) {
456 case DNS_RRL_RTYPE_QUERY:
457 return (&rrl->responses_per_second);
458 case DNS_RRL_RTYPE_REFERRAL:
459 return (&rrl->referrals_per_second);
460 case DNS_RRL_RTYPE_NODATA:
461 return (&rrl->nodata_per_second);
462 case DNS_RRL_RTYPE_NXDOMAIN:
463 return (&rrl->nxdomains_per_second);
464 case DNS_RRL_RTYPE_ERROR:
465 return (&rrl->errors_per_second);
466 case DNS_RRL_RTYPE_ALL:
467 return (&rrl->all_per_second);
468 default:
469 INSIST(0);
471 return (NULL);
474 static int
475 response_balance(dns_rrl_t *rrl, const dns_rrl_entry_t *e, int age) {
476 dns_rrl_rate_t *ratep;
477 int balance, rate;
479 if (e->key.s.rtype == DNS_RRL_RTYPE_TCP) {
480 rate = 1;
481 } else {
482 ratep = get_rate(rrl, e->key.s.rtype);
483 rate = ratep->scaled;
486 balance = e->responses + age * rate;
487 if (balance > rate)
488 balance = rate;
489 return (balance);
493 * Search for an entry for a response and optionally create it.
495 static dns_rrl_entry_t *
496 get_entry(dns_rrl_t *rrl, const isc_sockaddr_t *client_addr,
497 dns_rdataclass_t qclass, dns_rdatatype_t qtype, dns_name_t *qname,
498 dns_rrl_rtype_t rtype, isc_stdtime_t now, isc_boolean_t create,
499 char *log_buf, unsigned int log_buf_len)
501 dns_rrl_key_t key;
502 isc_uint32_t hval;
503 dns_rrl_entry_t *e;
504 dns_rrl_hash_t *hash;
505 dns_rrl_bin_t *new_bin, *old_bin;
506 int probes, age;
508 make_key(rrl, &key, client_addr, qtype, qname, qclass, rtype);
509 hval = hash_key(&key);
512 * Look for the entry in the current hash table.
514 new_bin = get_bin(rrl->hash, hval);
515 probes = 1;
516 e = ISC_LIST_HEAD(*new_bin);
517 while (e != NULL) {
518 if (key_cmp(&e->key, &key)) {
519 ref_entry(rrl, e, probes, now);
520 return (e);
522 ++probes;
523 e = ISC_LIST_NEXT(e, hlink);
527 * Look in the old hash table.
529 if (rrl->old_hash != NULL) {
530 old_bin = get_bin(rrl->old_hash, hval);
531 e = ISC_LIST_HEAD(*old_bin);
532 while (e != NULL) {
533 if (key_cmp(&e->key, &key)) {
534 ISC_LIST_UNLINK(*old_bin, e, hlink);
535 ISC_LIST_PREPEND(*new_bin, e, hlink);
536 e->hash_gen = rrl->hash_gen;
537 ref_entry(rrl, e, probes, now);
538 return (e);
540 e = ISC_LIST_NEXT(e, hlink);
544 * Discard prevous hash table when all of its entries are old.
546 age = delta_rrl_time(rrl->old_hash->check_time, now);
547 if (age > rrl->window)
548 free_old_hash(rrl);
551 if (!create)
552 return (NULL);
555 * The entry does not exist, so create it by finding a free entry.
556 * Keep currently penalized and logged entries.
557 * Try to make more entries if none are idle.
558 * Steal the oldest entry if we cannot create more.
560 for (e = ISC_LIST_TAIL(rrl->lru);
561 e != NULL;
562 e = ISC_LIST_PREV(e, lru))
564 if (!ISC_LINK_LINKED(e, hlink))
565 break;
566 age = get_age(rrl, e, now);
567 if (age <= 1) {
568 e = NULL;
569 break;
571 if (!e->logged && response_balance(rrl, e, age) > 0)
572 break;
574 if (e == NULL) {
575 expand_entries(rrl, ISC_MIN((rrl->num_entries+1)/2, 1000));
576 e = ISC_LIST_TAIL(rrl->lru);
578 if (e->logged)
579 log_end(rrl, e, ISC_TRUE, log_buf, log_buf_len);
580 if (ISC_LINK_LINKED(e, hlink)) {
581 if (e->hash_gen == rrl->hash_gen)
582 hash = rrl->hash;
583 else
584 hash = rrl->old_hash;
585 old_bin = get_bin(hash, hash_key(&e->key));
586 ISC_LIST_UNLINK(*old_bin, e, hlink);
588 ISC_LIST_PREPEND(*new_bin, e, hlink);
589 e->hash_gen = rrl->hash_gen;
590 e->key = key;
591 e->ts_valid = ISC_FALSE;
592 ref_entry(rrl, e, probes, now);
593 return (e);
596 static void
597 debit_log(const dns_rrl_entry_t *e, int age, const char *action) {
598 char buf[sizeof("age=12345678")];
599 const char *age_str;
601 if (age == DNS_RRL_FOREVER) {
602 age_str = "";
603 } else {
604 snprintf(buf, sizeof(buf), "age=%d", age);
605 age_str = buf;
607 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
608 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
609 "rrl %08x %6s responses=%-3d %s",
610 hash_key(&e->key), age_str, e->responses, action);
613 static inline dns_rrl_result_t
614 debit_rrl_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, double qps, double scale,
615 const isc_sockaddr_t *client_addr, isc_stdtime_t now,
616 char *log_buf, unsigned int log_buf_len)
618 int rate, new_rate, slip, new_slip, age, log_secs, min;
619 dns_rrl_rate_t *ratep;
620 dns_rrl_entry_t const *credit_e;
623 * Pick the rate counter.
624 * Optionally adjust the rate by the estimated query/second rate.
626 ratep = get_rate(rrl, e->key.s.rtype);
627 rate = ratep->r;
628 if (rate == 0)
629 return (DNS_RRL_RESULT_OK);
631 if (scale < 1.0) {
633 * The limit for clients that have used TCP is not scaled.
635 credit_e = get_entry(rrl, client_addr,
636 0, dns_rdatatype_none, NULL,
637 DNS_RRL_RTYPE_TCP, now, ISC_FALSE,
638 log_buf, log_buf_len);
639 if (credit_e != NULL) {
640 age = get_age(rrl, e, now);
641 if (age < rrl->window)
642 scale = 1.0;
645 if (scale < 1.0) {
646 new_rate = (int) (rate * scale);
647 if (new_rate < 1)
648 new_rate = 1;
649 if (ratep->scaled != new_rate) {
650 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
651 DNS_LOGMODULE_REQUEST,
652 DNS_RRL_LOG_DEBUG1,
653 "%d qps scaled %s by %.2f"
654 " from %d to %d",
655 (int)qps, ratep->str, scale,
656 rate, new_rate);
657 rate = new_rate;
658 ratep->scaled = rate;
662 min = -rrl->window * rate;
665 * Treat time jumps into the recent past as no time.
666 * Treat entries older than the window as if they were just created
667 * Credit other entries.
669 age = get_age(rrl, e, now);
670 if (age > 0) {
672 * Credit tokens earned during elapsed time.
674 if (age > rrl->window) {
675 e->responses = rate;
676 e->slip_cnt = 0;
677 } else {
678 e->responses += rate*age;
679 if (e->responses > rate) {
680 e->responses = rate;
681 e->slip_cnt = 0;
685 * Find the seconds since last log message without overflowing
686 * small counter. This counter is reset when an entry is
687 * created. It is not necessarily reset when some requests
688 * are answered provided other requests continue to be dropped
689 * or slipped. This can happen when the request rate is just
690 * at the limit.
692 if (e->logged) {
693 log_secs = e->log_secs;
694 log_secs += age;
695 if (log_secs > DNS_RRL_MAX_LOG_SECS || log_secs < 0)
696 log_secs = DNS_RRL_MAX_LOG_SECS;
697 e->log_secs = log_secs;
700 set_age(rrl, e, now);
703 * Debit the entry for this response.
705 if (--e->responses >= 0) {
706 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
707 debit_log(e, age, "");
708 return (DNS_RRL_RESULT_OK);
711 if (e->responses < min)
712 e->responses = min;
715 * Drop this response unless it should slip or leak.
717 slip = rrl->slip.r;
718 if (slip > 2 && scale < 1.0) {
719 new_slip = (int) (slip * scale);
720 if (new_slip < 2)
721 new_slip = 2;
722 if (rrl->slip.scaled != new_slip) {
723 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
724 DNS_LOGMODULE_REQUEST,
725 DNS_RRL_LOG_DEBUG1,
726 "%d qps scaled slip"
727 " by %.2f from %d to %d",
728 (int)qps, scale,
729 slip, new_slip);
730 slip = new_slip;
731 rrl->slip.scaled = slip;
734 if (slip != 0 && e->key.s.rtype != DNS_RRL_RTYPE_ALL) {
735 if (e->slip_cnt++ == 0) {
736 if ((int) e->slip_cnt >= slip)
737 e->slip_cnt = 0;
738 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
739 debit_log(e, age, "slip");
740 return (DNS_RRL_RESULT_SLIP);
741 } else if ((int) e->slip_cnt >= slip) {
742 e->slip_cnt = 0;
746 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
747 debit_log(e, age, "drop");
748 return (DNS_RRL_RESULT_DROP);
751 static inline dns_rrl_qname_buf_t *
752 get_qname(dns_rrl_t *rrl, const dns_rrl_entry_t *e) {
753 dns_rrl_qname_buf_t *qbuf;
755 qbuf = rrl->qnames[e->log_qname];
756 if (qbuf == NULL || qbuf->e != e)
757 return (NULL);
758 return (qbuf);
761 static inline void
762 free_qname(dns_rrl_t *rrl, dns_rrl_entry_t *e) {
763 dns_rrl_qname_buf_t *qbuf;
765 qbuf = get_qname(rrl, e);
766 if (qbuf != NULL) {
767 qbuf->e = NULL;
768 ISC_LIST_APPEND(rrl->qname_free, qbuf, link);
772 static void
773 add_log_str(isc_buffer_t *lb, const char *str, unsigned int str_len) {
774 isc_region_t region;
776 isc_buffer_availableregion(lb, &region);
777 if (str_len >= region.length) {
778 if (region.length <= 0)
779 return;
780 str_len = region.length;
782 memmove(region.base, str, str_len);
783 isc_buffer_add(lb, str_len);
786 #define ADD_LOG_CSTR(eb, s) add_log_str(eb, s, sizeof(s)-1)
789 * Build strings for the logs
791 static void
792 make_log_buf(dns_rrl_t *rrl, dns_rrl_entry_t *e,
793 const char *str1, const char *str2, isc_boolean_t plural,
794 dns_name_t *qname, isc_boolean_t save_qname,
795 dns_rrl_result_t rrl_result, isc_result_t resp_result,
796 char *log_buf, unsigned int log_buf_len)
798 isc_buffer_t lb;
799 dns_rrl_qname_buf_t *qbuf;
800 isc_netaddr_t cidr;
801 char strbuf[ISC_MAX(sizeof("/123"), sizeof(" (12345678)"))];
802 const char *rstr;
803 isc_result_t msg_result;
805 if (log_buf_len <= 1) {
806 if (log_buf_len == 1)
807 log_buf[0] = '\0';
808 return;
810 isc_buffer_init(&lb, log_buf, log_buf_len-1);
812 if (str1 != NULL)
813 add_log_str(&lb, str1, strlen(str1));
814 if (str2 != NULL)
815 add_log_str(&lb, str2, strlen(str2));
817 switch (rrl_result) {
818 case DNS_RRL_RESULT_OK:
819 break;
820 case DNS_RRL_RESULT_DROP:
821 ADD_LOG_CSTR(&lb, "drop ");
822 break;
823 case DNS_RRL_RESULT_SLIP:
824 ADD_LOG_CSTR(&lb, "slip ");
825 break;
826 default:
827 INSIST(0);
828 break;
831 switch (e->key.s.rtype) {
832 case DNS_RRL_RTYPE_QUERY:
833 break;
834 case DNS_RRL_RTYPE_REFERRAL:
835 ADD_LOG_CSTR(&lb, "referral ");
836 break;
837 case DNS_RRL_RTYPE_NODATA:
838 ADD_LOG_CSTR(&lb, "NODATA ");
839 break;
840 case DNS_RRL_RTYPE_NXDOMAIN:
841 ADD_LOG_CSTR(&lb, "NXDOMAIN ");
842 break;
843 case DNS_RRL_RTYPE_ERROR:
844 if (resp_result == ISC_R_SUCCESS) {
845 ADD_LOG_CSTR(&lb, "error ");
846 } else {
847 rstr = isc_result_totext(resp_result);
848 add_log_str(&lb, rstr, strlen(rstr));
849 ADD_LOG_CSTR(&lb, " error ");
851 break;
852 case DNS_RRL_RTYPE_ALL:
853 ADD_LOG_CSTR(&lb, "all ");
854 break;
855 default:
856 INSIST(0);
859 if (plural)
860 ADD_LOG_CSTR(&lb, "responses to ");
861 else
862 ADD_LOG_CSTR(&lb, "response to ");
864 memset(&cidr, 0, sizeof(cidr));
865 if (e->key.s.ipv6) {
866 snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv6_prefixlen);
867 cidr.family = AF_INET6;
868 memset(&cidr.type.in6, 0, sizeof(cidr.type.in6));
869 memmove(&cidr.type.in6, e->key.s.ip, sizeof(e->key.s.ip));
870 } else {
871 snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv4_prefixlen);
872 cidr.family = AF_INET;
873 cidr.type.in.s_addr = e->key.s.ip[0];
875 msg_result = isc_netaddr_totext(&cidr, &lb);
876 if (msg_result != ISC_R_SUCCESS)
877 ADD_LOG_CSTR(&lb, "?");
878 add_log_str(&lb, strbuf, strlen(strbuf));
880 if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY ||
881 e->key.s.rtype == DNS_RRL_RTYPE_REFERRAL ||
882 e->key.s.rtype == DNS_RRL_RTYPE_NODATA ||
883 e->key.s.rtype == DNS_RRL_RTYPE_NXDOMAIN) {
884 qbuf = get_qname(rrl, e);
885 if (save_qname && qbuf == NULL &&
886 qname != NULL && dns_name_isabsolute(qname)) {
888 * Capture the qname for the "stop limiting" message.
890 qbuf = ISC_LIST_TAIL(rrl->qname_free);
891 if (qbuf != NULL) {
892 ISC_LIST_UNLINK(rrl->qname_free, qbuf, link);
893 } else if (rrl->num_qnames < DNS_RRL_QNAMES) {
894 qbuf = isc_mem_get(rrl->mctx, sizeof(*qbuf));
895 if (qbuf != NULL) {
896 memset(qbuf, 0, sizeof(*qbuf));
897 ISC_LINK_INIT(qbuf, link);
898 qbuf->index = rrl->num_qnames;
899 rrl->qnames[rrl->num_qnames++] = qbuf;
900 } else {
901 isc_log_write(dns_lctx,
902 DNS_LOGCATEGORY_RRL,
903 DNS_LOGMODULE_REQUEST,
904 DNS_RRL_LOG_FAIL,
905 "isc_mem_get(%d)"
906 " failed for RRL qname",
907 (int)sizeof(*qbuf));
910 if (qbuf != NULL) {
911 e->log_qname = qbuf->index;
912 qbuf->e = e;
913 dns_fixedname_init(&qbuf->qname);
914 dns_name_copy(qname,
915 dns_fixedname_name(&qbuf->qname),
916 NULL);
919 if (qbuf != NULL)
920 qname = dns_fixedname_name(&qbuf->qname);
921 if (qname != NULL) {
922 ADD_LOG_CSTR(&lb, " for ");
923 (void)dns_name_totext(qname, ISC_TRUE, &lb);
924 } else {
925 ADD_LOG_CSTR(&lb, " for (?)");
927 if (e->key.s.rtype != DNS_RRL_RTYPE_NXDOMAIN) {
928 ADD_LOG_CSTR(&lb, " ");
929 (void)dns_rdataclass_totext(e->key.s.qclass, &lb);
930 if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY) {
931 ADD_LOG_CSTR(&lb, " ");
932 (void)dns_rdatatype_totext(e->key.s.qtype, &lb);
935 snprintf(strbuf, sizeof(strbuf), " (%08x)",
936 e->key.s.qname_hash);
937 add_log_str(&lb, strbuf, strlen(strbuf));
941 * We saved room for '\0'.
943 log_buf[isc_buffer_usedlength(&lb)] = '\0';
946 static void
947 log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_boolean_t early,
948 char *log_buf, unsigned int log_buf_len)
950 if (e->logged) {
951 make_log_buf(rrl, e,
952 early ? "*" : NULL,
953 rrl->log_only ? "would stop limiting "
954 : "stop limiting ",
955 ISC_TRUE, NULL, ISC_FALSE,
956 DNS_RRL_RESULT_OK, ISC_R_SUCCESS,
957 log_buf, log_buf_len);
958 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
959 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
960 "%s", log_buf);
961 free_qname(rrl, e);
962 e->logged = ISC_FALSE;
963 --rrl->num_logged;
968 * Log messages for streams that have stopped being rate limited.
970 static void
971 log_stops(dns_rrl_t *rrl, isc_stdtime_t now, int limit,
972 char *log_buf, unsigned int log_buf_len)
974 dns_rrl_entry_t *e;
975 int age;
977 for (e = rrl->last_logged; e != NULL; e = ISC_LIST_PREV(e, lru)) {
978 if (!e->logged)
979 continue;
980 if (now != 0) {
981 age = get_age(rrl, e, now);
982 if (age < DNS_RRL_STOP_LOG_SECS ||
983 response_balance(rrl, e, age) < 0)
984 break;
987 log_end(rrl, e, now == 0, log_buf, log_buf_len);
988 if (rrl->num_logged <= 0)
989 break;
992 * Too many messages could stall real work.
994 if (--limit < 0) {
995 rrl->last_logged = ISC_LIST_PREV(e, lru);
996 return;
999 if (e == NULL) {
1000 INSIST(rrl->num_logged == 0);
1001 rrl->log_stops_time = now;
1003 rrl->last_logged = e;
1007 * Main rate limit interface.
1009 dns_rrl_result_t
1010 dns_rrl(dns_view_t *view,
1011 const isc_sockaddr_t *client_addr, isc_boolean_t is_tcp,
1012 dns_rdataclass_t qclass, dns_rdatatype_t qtype,
1013 dns_name_t *qname, isc_result_t resp_result, isc_stdtime_t now,
1014 isc_boolean_t wouldlog, char *log_buf, unsigned int log_buf_len)
1016 dns_rrl_t *rrl;
1017 dns_rrl_rtype_t rtype;
1018 dns_rrl_entry_t *e;
1019 isc_netaddr_t netclient;
1020 int secs;
1021 double qps, scale;
1022 int exempt_match;
1023 isc_result_t result;
1024 dns_rrl_result_t rrl_result;
1026 INSIST(log_buf != NULL && log_buf_len > 0);
1028 rrl = view->rrl;
1029 if (rrl->exempt != NULL) {
1030 isc_netaddr_fromsockaddr(&netclient, client_addr);
1031 result = dns_acl_match(&netclient, NULL, rrl->exempt,
1032 &view->aclenv, &exempt_match, NULL);
1033 if (result == ISC_R_SUCCESS && exempt_match > 0)
1034 return (DNS_RRL_RESULT_OK);
1037 LOCK(&rrl->lock);
1040 * Estimate total query per second rate when scaling by qps.
1042 if (rrl->qps_scale == 0) {
1043 qps = 0.0;
1044 scale = 1.0;
1045 } else {
1046 ++rrl->qps_responses;
1047 secs = delta_rrl_time(rrl->qps_time, now);
1048 if (secs <= 0) {
1049 qps = rrl->qps;
1050 } else {
1051 qps = (1.0*rrl->qps_responses) / secs;
1052 if (secs >= rrl->window) {
1053 if (isc_log_wouldlog(dns_lctx,
1054 DNS_RRL_LOG_DEBUG3))
1055 isc_log_write(dns_lctx,
1056 DNS_LOGCATEGORY_RRL,
1057 DNS_LOGMODULE_REQUEST,
1058 DNS_RRL_LOG_DEBUG3,
1059 "%d responses/%d seconds"
1060 " = %d qps",
1061 rrl->qps_responses, secs,
1062 (int)qps);
1063 rrl->qps = qps;
1064 rrl->qps_responses = 0;
1065 rrl->qps_time = now;
1066 } else if (qps < rrl->qps) {
1067 qps = rrl->qps;
1070 scale = rrl->qps_scale / qps;
1074 * Do maintenance once per second.
1076 if (rrl->num_logged > 0 && rrl->log_stops_time != now)
1077 log_stops(rrl, now, 8, log_buf, log_buf_len);
1080 * Notice TCP responses when scaling limits by qps.
1081 * Do not try to rate limit TCP responses.
1083 if (is_tcp) {
1084 if (scale < 1.0) {
1085 e = get_entry(rrl, client_addr,
1086 0, dns_rdatatype_none, NULL,
1087 DNS_RRL_RTYPE_TCP, now, ISC_TRUE,
1088 log_buf, log_buf_len);
1089 if (e != NULL) {
1090 e->responses = -(rrl->window+1);
1091 set_age(rrl, e, now);
1094 UNLOCK(&rrl->lock);
1095 return (ISC_R_SUCCESS);
1099 * Find the right kind of entry, creating it if necessary.
1100 * If that is impossible, then nothing more can be done
1102 switch (resp_result) {
1103 case ISC_R_SUCCESS:
1104 rtype = DNS_RRL_RTYPE_QUERY;
1105 break;
1106 case DNS_R_DELEGATION:
1107 rtype = DNS_RRL_RTYPE_REFERRAL;
1108 break;
1109 case DNS_R_NXRRSET:
1110 rtype = DNS_RRL_RTYPE_NODATA;
1111 break;
1112 case DNS_R_NXDOMAIN:
1113 rtype = DNS_RRL_RTYPE_NXDOMAIN;
1114 break;
1115 default:
1116 rtype = DNS_RRL_RTYPE_ERROR;
1117 break;
1119 e = get_entry(rrl, client_addr, qclass, qtype, qname, rtype,
1120 now, ISC_TRUE, log_buf, log_buf_len);
1121 if (e == NULL) {
1122 UNLOCK(&rrl->lock);
1123 return (DNS_RRL_RESULT_OK);
1126 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) {
1128 * Do not worry about speed or releasing the lock.
1129 * This message appears before messages from debit_rrl_entry().
1131 make_log_buf(rrl, e, "consider limiting ", NULL, ISC_FALSE,
1132 qname, ISC_FALSE, DNS_RRL_RESULT_OK, resp_result,
1133 log_buf, log_buf_len);
1134 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1135 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
1136 "%s", log_buf);
1139 rrl_result = debit_rrl_entry(rrl, e, qps, scale, client_addr, now,
1140 log_buf, log_buf_len);
1142 if (rrl->all_per_second.r != 0) {
1144 * We must debit the all-per-second token bucket if we have
1145 * an all-per-second limit for the IP address.
1146 * The all-per-second limit determines the log message
1147 * when both limits are hit.
1148 * The response limiting must continue if the
1149 * all-per-second limiting lapses.
1151 dns_rrl_entry_t *e_all;
1152 dns_rrl_result_t rrl_all_result;
1154 e_all = get_entry(rrl, client_addr,
1155 0, dns_rdatatype_none, NULL,
1156 DNS_RRL_RTYPE_ALL, now, ISC_TRUE,
1157 log_buf, log_buf_len);
1158 if (e_all == NULL) {
1159 UNLOCK(&rrl->lock);
1160 return (DNS_RRL_RESULT_OK);
1162 rrl_all_result = debit_rrl_entry(rrl, e_all, qps, scale,
1163 client_addr, now,
1164 log_buf, log_buf_len);
1165 if (rrl_all_result != DNS_RRL_RESULT_OK) {
1166 int level;
1168 e = e_all;
1169 rrl_result = rrl_all_result;
1170 if (rrl_result == DNS_RRL_RESULT_OK)
1171 level = DNS_RRL_LOG_DEBUG2;
1172 else
1173 level = DNS_RRL_LOG_DEBUG1;
1174 if (isc_log_wouldlog(dns_lctx, level)) {
1175 make_log_buf(rrl, e,
1176 "prefer all-per-second limiting ",
1177 NULL, ISC_TRUE, qname, ISC_FALSE,
1178 DNS_RRL_RESULT_OK, resp_result,
1179 log_buf, log_buf_len);
1180 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1181 DNS_LOGMODULE_REQUEST, level,
1182 "%s", log_buf);
1187 if (rrl_result == DNS_RRL_RESULT_OK) {
1188 UNLOCK(&rrl->lock);
1189 return (DNS_RRL_RESULT_OK);
1193 * Log occassionally in the rate-limit category.
1195 if ((!e->logged || e->log_secs >= DNS_RRL_MAX_LOG_SECS) &&
1196 isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP)) {
1197 make_log_buf(rrl, e, rrl->log_only ? "would " : NULL,
1198 e->logged ? "continue limiting " : "limit ",
1199 ISC_TRUE, qname, ISC_TRUE,
1200 DNS_RRL_RESULT_OK, resp_result,
1201 log_buf, log_buf_len);
1202 if (!e->logged) {
1203 e->logged = ISC_TRUE;
1204 if (++rrl->num_logged <= 1)
1205 rrl->last_logged = e;
1207 e->log_secs = 0;
1210 * Avoid holding the lock.
1212 if (!wouldlog) {
1213 UNLOCK(&rrl->lock);
1214 e = NULL;
1216 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1217 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
1218 "%s", log_buf);
1222 * Make a log message for the caller.
1224 if (wouldlog)
1225 make_log_buf(rrl, e,
1226 rrl->log_only ? "would rate limit " : "rate limit ",
1227 NULL, ISC_FALSE, qname, ISC_FALSE,
1228 rrl_result, resp_result, log_buf, log_buf_len);
1230 if (e != NULL) {
1232 * Do not save the qname unless we might need it for
1233 * the ending log message.
1235 if (!e->logged)
1236 free_qname(rrl, e);
1237 UNLOCK(&rrl->lock);
1240 return (rrl_result);
1243 void
1244 dns_rrl_view_destroy(dns_view_t *view) {
1245 dns_rrl_t *rrl;
1246 dns_rrl_block_t *b;
1247 dns_rrl_hash_t *h;
1248 char log_buf[DNS_RRL_LOG_BUF_LEN];
1249 int i;
1251 rrl = view->rrl;
1252 if (rrl == NULL)
1253 return;
1254 view->rrl = NULL;
1257 * Assume the caller takes care of locking the view and anything else.
1260 if (rrl->num_logged > 0)
1261 log_stops(rrl, 0, ISC_INT32_MAX, log_buf, sizeof(log_buf));
1263 for (i = 0; i < DNS_RRL_QNAMES; ++i) {
1264 if (rrl->qnames[i] == NULL)
1265 break;
1266 isc_mem_put(rrl->mctx, rrl->qnames[i], sizeof(*rrl->qnames[i]));
1269 if (rrl->exempt != NULL)
1270 dns_acl_detach(&rrl->exempt);
1272 DESTROYLOCK(&rrl->lock);
1274 while (!ISC_LIST_EMPTY(rrl->blocks)) {
1275 b = ISC_LIST_HEAD(rrl->blocks);
1276 ISC_LIST_UNLINK(rrl->blocks, b, link);
1277 isc_mem_put(rrl->mctx, b, b->size);
1280 h = rrl->hash;
1281 if (h != NULL)
1282 isc_mem_put(rrl->mctx, h,
1283 sizeof(*h) + (h->length - 1) * sizeof(h->bins[0]));
1285 h = rrl->old_hash;
1286 if (h != NULL)
1287 isc_mem_put(rrl->mctx, h,
1288 sizeof(*h) + (h->length - 1) * sizeof(h->bins[0]));
1290 isc_mem_putanddetach(&rrl->mctx, rrl, sizeof(*rrl));
1293 isc_result_t
1294 dns_rrl_init(dns_rrl_t **rrlp, dns_view_t *view, int min_entries) {
1295 dns_rrl_t *rrl;
1296 isc_result_t result;
1298 *rrlp = NULL;
1300 rrl = isc_mem_get(view->mctx, sizeof(*rrl));
1301 if (rrl == NULL)
1302 return (ISC_R_NOMEMORY);
1303 memset(rrl, 0, sizeof(*rrl));
1304 isc_mem_attach(view->mctx, &rrl->mctx);
1305 result = isc_mutex_init(&rrl->lock);
1306 if (result != ISC_R_SUCCESS) {
1307 isc_mem_putanddetach(&rrl->mctx, rrl, sizeof(*rrl));
1308 return (result);
1310 isc_stdtime_get(&rrl->ts_bases[0]);
1312 view->rrl = rrl;
1314 result = expand_entries(rrl, min_entries);
1315 if (result != ISC_R_SUCCESS) {
1316 dns_rrl_view_destroy(view);
1317 return (result);
1319 result = expand_rrl_hash(rrl, 0);
1320 if (result != ISC_R_SUCCESS) {
1321 dns_rrl_view_destroy(view);
1322 return (result);
1325 *rrlp = rrl;
1326 return (ISC_R_SUCCESS);