1 /* $NetBSD: rrl.c,v 1.4 2014/12/10 04:37:58 christos Exp $ */
4 * Copyright (C) 2012-2014 Internet Systems Consortium, Inc. ("ISC")
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
11 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
12 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
13 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
14 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
15 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
16 * PERFORMANCE OF THIS SOFTWARE.
22 * Rate limit DNS responses.
25 /* #define ISC_LIST_CHECKINIT */
30 #include <isc/netaddr.h>
31 #include <isc/print.h>
33 #include <dns/result.h>
34 #include <dns/rcode.h>
35 #include <dns/rdatatype.h>
36 #include <dns/rdataclass.h>
42 log_end(dns_rrl_t
*rrl
, dns_rrl_entry_t
*e
, isc_boolean_t early
,
43 char *log_buf
, unsigned int log_buf_len
);
46 * Get a modulus for a hash function that is tolerably likely to be
47 * relatively prime to most inputs. Of course, we get a prime for for initial
48 * values not larger than the square of the last prime. We often get a prime
50 * This works well in practice for hash tables up to at least 100
51 * times the square of the last prime and better than a multiplicative hash.
54 hash_divisor(unsigned int initial
) {
55 static isc_uint16_t primes
[] = {
56 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41,
57 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97,
59 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157,
60 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
61 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283,
62 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367,
63 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439,
64 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
65 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
66 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661,
67 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751,
68 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829,
69 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919,
70 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997,1009,
79 if (primes
[sizeof(primes
)/sizeof(primes
[0])-1] >= result
) {
86 if ((result
& 1) == 0)
95 if ((result
% p
) == 0) {
100 } while (pp
< &primes
[sizeof(primes
) / sizeof(primes
[0])]);
102 if (isc_log_wouldlog(dns_lctx
, DNS_RRL_LOG_DEBUG3
))
103 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_RRL
,
104 DNS_LOGMODULE_REQUEST
, DNS_RRL_LOG_DEBUG3
,
105 "%d hash_divisor() divisions in %d tries"
106 " to get %d from %d",
107 divisions
, tries
, result
, initial
);
113 * Convert a timestamp to a number of seconds in the past.
116 delta_rrl_time(isc_stdtime_t ts
, isc_stdtime_t now
) {
124 * The timestamp is in the future. That future might result from
125 * re-ordered requests, because we use timestamps on requests
126 * instead of consulting a clock. Timestamps in the distant future are
127 * assumed to result from clock changes. When the clock changes to
128 * the past, make existing timestamps appear to be in the past.
130 if (delta
< -DNS_RRL_MAX_TIME_TRAVEL
)
131 return (DNS_RRL_FOREVER
);
136 get_age(const dns_rrl_t
*rrl
, const dns_rrl_entry_t
*e
, isc_stdtime_t now
) {
138 return (DNS_RRL_FOREVER
);
139 return (delta_rrl_time(e
->ts
+ rrl
->ts_bases
[e
->ts_gen
], now
));
143 set_age(dns_rrl_t
*rrl
, dns_rrl_entry_t
*e
, isc_stdtime_t now
) {
144 dns_rrl_entry_t
*e_old
;
148 ts_gen
= rrl
->ts_gen
;
149 ts
= now
- rrl
->ts_bases
[ts_gen
];
151 if (ts
< -DNS_RRL_MAX_TIME_TRAVEL
)
152 ts
= DNS_RRL_FOREVER
;
158 * Make a new timestamp base if the current base is too old.
159 * All entries older than DNS_RRL_MAX_WINDOW seconds are ancient,
160 * useless history. Their timestamps can be treated as if they are
162 * We only do arithmetic on more recent timestamps, so bases for
163 * older timestamps can be recycled provided the old timestamps are
164 * marked as ancient history.
165 * This loop is almost always very short because most entries are
166 * recycled after one second and any entries that need to be marked
167 * are older than (DNS_RRL_TS_BASES)*DNS_RRL_MAX_TS seconds.
169 if (ts
>= DNS_RRL_MAX_TS
) {
170 ts_gen
= (ts_gen
+ 1) % DNS_RRL_TS_BASES
;
171 for (e_old
= ISC_LIST_TAIL(rrl
->lru
), i
= 0;
172 e_old
!= NULL
&& (e_old
->ts_gen
== ts_gen
||
173 !ISC_LINK_LINKED(e_old
, hlink
));
174 e_old
= ISC_LIST_PREV(e_old
, lru
), ++i
)
176 e_old
->ts_valid
= ISC_FALSE
;
179 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_RRL
,
180 DNS_LOGMODULE_REQUEST
, DNS_RRL_LOG_DEBUG1
,
181 "rrl new time base scanned %d entries"
182 " at %d for %d %d %d %d",
183 i
, now
, rrl
->ts_bases
[ts_gen
],
184 rrl
->ts_bases
[(ts_gen
+ 1) %
186 rrl
->ts_bases
[(ts_gen
+ 2) %
188 rrl
->ts_bases
[(ts_gen
+ 3) %
190 rrl
->ts_gen
= ts_gen
;
191 rrl
->ts_bases
[ts_gen
] = now
;
197 e
->ts_valid
= ISC_TRUE
;
201 expand_entries(dns_rrl_t
*rrl
, int new) {
208 if (rrl
->num_entries
+ new >= rrl
->max_entries
&&
209 rrl
->max_entries
!= 0)
211 new = rrl
->max_entries
- rrl
->num_entries
;
213 return (ISC_R_SUCCESS
);
217 * Log expansions so that the user can tune max-table-size
218 * and min-table-size.
220 if (isc_log_wouldlog(dns_lctx
, DNS_RRL_LOG_DROP
) &&
223 if (rrl
->searches
!= 0)
224 rate
/= rrl
->searches
;
225 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_RRL
,
226 DNS_LOGMODULE_REQUEST
, DNS_RRL_LOG_DROP
,
227 "increase from %d to %d RRL entries with"
228 " %d bins; average search length %.1f",
229 rrl
->num_entries
, rrl
->num_entries
+new,
230 rrl
->hash
->length
, rate
);
233 bsize
= sizeof(dns_rrl_block_t
) + (new-1)*sizeof(dns_rrl_entry_t
);
234 b
= isc_mem_get(rrl
->mctx
, bsize
);
236 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_RRL
,
237 DNS_LOGMODULE_REQUEST
, DNS_RRL_LOG_FAIL
,
238 "isc_mem_get(%d) failed for RRL entries",
240 return (ISC_R_NOMEMORY
);
246 for (i
= 0; i
< new; ++i
, ++e
) {
247 ISC_LINK_INIT(e
, hlink
);
248 ISC_LIST_INITANDAPPEND(rrl
->lru
, e
, lru
);
250 rrl
->num_entries
+= new;
251 ISC_LIST_INITANDAPPEND(rrl
->blocks
, b
, link
);
253 return (ISC_R_SUCCESS
);
256 static inline dns_rrl_bin_t
*
257 get_bin(dns_rrl_hash_t
*hash
, unsigned int hval
) {
258 INSIST(hash
!= NULL
);
259 return (&hash
->bins
[hval
% hash
->length
]);
263 free_old_hash(dns_rrl_t
*rrl
) {
264 dns_rrl_hash_t
*old_hash
;
265 dns_rrl_bin_t
*old_bin
;
266 dns_rrl_entry_t
*e
, *e_next
;
268 old_hash
= rrl
->old_hash
;
269 for (old_bin
= &old_hash
->bins
[0];
270 old_bin
< &old_hash
->bins
[old_hash
->length
];
273 for (e
= ISC_LIST_HEAD(*old_bin
); e
!= NULL
; e
= e_next
) {
274 e_next
= ISC_LIST_NEXT(e
, hlink
);
275 ISC_LINK_INIT(e
, hlink
);
279 isc_mem_put(rrl
->mctx
, old_hash
,
281 + (old_hash
->length
- 1) * sizeof(old_hash
->bins
[0]));
282 rrl
->old_hash
= NULL
;
286 expand_rrl_hash(dns_rrl_t
*rrl
, isc_stdtime_t now
) {
287 dns_rrl_hash_t
*hash
;
288 int old_bins
, new_bins
, hsize
;
291 if (rrl
->old_hash
!= NULL
)
295 * Most searches fail and so go to the end of the chain.
296 * Use a small hash table load factor.
298 old_bins
= (rrl
->hash
== NULL
) ? 0 : rrl
->hash
->length
;
299 new_bins
= old_bins
/8 + old_bins
;
300 if (new_bins
< rrl
->num_entries
)
301 new_bins
= rrl
->num_entries
;
302 new_bins
= hash_divisor(new_bins
);
304 hsize
= sizeof(dns_rrl_hash_t
) + (new_bins
-1)*sizeof(hash
->bins
[0]);
305 hash
= isc_mem_get(rrl
->mctx
, hsize
);
307 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_RRL
,
308 DNS_LOGMODULE_REQUEST
, DNS_RRL_LOG_FAIL
,
309 "isc_mem_get(%d) failed for"
312 return (ISC_R_NOMEMORY
);
314 memset(hash
, 0, hsize
);
315 hash
->length
= new_bins
;
317 hash
->gen
= rrl
->hash_gen
;
319 if (isc_log_wouldlog(dns_lctx
, DNS_RRL_LOG_DROP
) && old_bins
!= 0) {
321 if (rrl
->searches
!= 0)
322 rate
/= rrl
->searches
;
323 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_RRL
,
324 DNS_LOGMODULE_REQUEST
, DNS_RRL_LOG_DROP
,
325 "increase from %d to %d RRL bins for"
326 " %d entries; average search length %.1f",
327 old_bins
, new_bins
, rrl
->num_entries
, rate
);
330 rrl
->old_hash
= rrl
->hash
;
331 if (rrl
->old_hash
!= NULL
)
332 rrl
->old_hash
->check_time
= now
;
335 return (ISC_R_SUCCESS
);
339 ref_entry(dns_rrl_t
*rrl
, dns_rrl_entry_t
*e
, int probes
, isc_stdtime_t now
) {
341 * Make the entry most recently used.
343 if (ISC_LIST_HEAD(rrl
->lru
) != e
) {
344 if (e
== rrl
->last_logged
)
345 rrl
->last_logged
= ISC_LIST_PREV(e
, lru
);
346 ISC_LIST_UNLINK(rrl
->lru
, e
, lru
);
347 ISC_LIST_PREPEND(rrl
->lru
, e
, lru
);
351 * Expand the hash table if it is time and necessary.
352 * This will leave the newly referenced entry in a chain in the
353 * old hash table. It will migrate to the new hash table the next
354 * time it is used or be cut loose when the old hash table is destroyed.
356 rrl
->probes
+= probes
;
358 if (rrl
->searches
> 100 &&
359 delta_rrl_time(rrl
->hash
->check_time
, now
) > 1) {
360 if (rrl
->probes
/rrl
->searches
> 2)
361 expand_rrl_hash(rrl
, now
);
362 rrl
->hash
->check_time
= now
;
368 static inline isc_boolean_t
369 key_cmp(const dns_rrl_key_t
*a
, const dns_rrl_key_t
*b
) {
370 if (memcmp(a
, b
, sizeof(dns_rrl_key_t
)) == 0)
375 static inline isc_uint32_t
376 hash_key(const dns_rrl_key_t
*key
) {
381 for (i
= sizeof(*key
) / sizeof(key
->w
[0]) - 1; i
>= 0; --i
) {
382 hval
= key
->w
[i
] + (hval
<<1);
388 * Construct the hash table key.
389 * Use a hash of the DNS query name to save space in the database.
390 * Collisions result in legitimate rate limiting responses for one
391 * query name also limiting responses for other names to the
392 * same client. This is rare and benign enough given the large
393 * space costs compared to keeping the entire name in the database
394 * entry or the time costs of dynamic allocation.
397 make_key(const dns_rrl_t
*rrl
, dns_rrl_key_t
*key
,
398 const isc_sockaddr_t
*client_addr
,
399 dns_rdatatype_t qtype
, dns_name_t
*qname
, dns_rdataclass_t qclass
,
400 dns_rrl_rtype_t rtype
)
403 dns_offsets_t base_offsets
;
406 memset(key
, 0, sizeof(*key
));
408 key
->s
.rtype
= rtype
;
409 if (rtype
== DNS_RRL_RTYPE_QUERY
) {
410 key
->s
.qtype
= qtype
;
411 key
->s
.qclass
= qclass
& 0xff;
412 } else if (rtype
== DNS_RRL_RTYPE_REFERRAL
||
413 rtype
== DNS_RRL_RTYPE_NODATA
) {
415 * Because there is no qtype in the empty answer sections of
416 * referral and NODATA responses, count them as the same.
418 key
->s
.qclass
= qclass
& 0xff;
421 if (qname
!= NULL
&& qname
->labels
!= 0) {
423 * Ignore the first label of wildcards.
425 if ((qname
->attributes
& DNS_NAMEATTR_WILDCARD
) != 0 &&
426 (labels
= dns_name_countlabels(qname
)) > 1)
428 dns_name_init(&base
, base_offsets
);
429 dns_name_getlabelsequence(qname
, 1, labels
-1, &base
);
430 key
->s
.qname_hash
= dns_name_hashbylabel(&base
,
433 key
->s
.qname_hash
= dns_name_hashbylabel(qname
,
438 switch (client_addr
->type
.sa
.sa_family
) {
440 key
->s
.ip
[0] = (client_addr
->type
.sin
.sin_addr
.s_addr
&
444 key
->s
.ipv6
= ISC_TRUE
;
445 memmove(key
->s
.ip
, &client_addr
->type
.sin6
.sin6_addr
,
447 for (i
= 0; i
< DNS_RRL_MAX_PREFIX
/32; ++i
)
448 key
->s
.ip
[i
] &= rrl
->ipv6_mask
[i
];
453 static inline dns_rrl_rate_t
*
454 get_rate(dns_rrl_t
*rrl
, dns_rrl_rtype_t rtype
) {
456 case DNS_RRL_RTYPE_QUERY
:
457 return (&rrl
->responses_per_second
);
458 case DNS_RRL_RTYPE_REFERRAL
:
459 return (&rrl
->referrals_per_second
);
460 case DNS_RRL_RTYPE_NODATA
:
461 return (&rrl
->nodata_per_second
);
462 case DNS_RRL_RTYPE_NXDOMAIN
:
463 return (&rrl
->nxdomains_per_second
);
464 case DNS_RRL_RTYPE_ERROR
:
465 return (&rrl
->errors_per_second
);
466 case DNS_RRL_RTYPE_ALL
:
467 return (&rrl
->all_per_second
);
475 response_balance(dns_rrl_t
*rrl
, const dns_rrl_entry_t
*e
, int age
) {
476 dns_rrl_rate_t
*ratep
;
479 if (e
->key
.s
.rtype
== DNS_RRL_RTYPE_TCP
) {
482 ratep
= get_rate(rrl
, e
->key
.s
.rtype
);
483 rate
= ratep
->scaled
;
486 balance
= e
->responses
+ age
* rate
;
493 * Search for an entry for a response and optionally create it.
495 static dns_rrl_entry_t
*
496 get_entry(dns_rrl_t
*rrl
, const isc_sockaddr_t
*client_addr
,
497 dns_rdataclass_t qclass
, dns_rdatatype_t qtype
, dns_name_t
*qname
,
498 dns_rrl_rtype_t rtype
, isc_stdtime_t now
, isc_boolean_t create
,
499 char *log_buf
, unsigned int log_buf_len
)
504 dns_rrl_hash_t
*hash
;
505 dns_rrl_bin_t
*new_bin
, *old_bin
;
508 make_key(rrl
, &key
, client_addr
, qtype
, qname
, qclass
, rtype
);
509 hval
= hash_key(&key
);
512 * Look for the entry in the current hash table.
514 new_bin
= get_bin(rrl
->hash
, hval
);
516 e
= ISC_LIST_HEAD(*new_bin
);
518 if (key_cmp(&e
->key
, &key
)) {
519 ref_entry(rrl
, e
, probes
, now
);
523 e
= ISC_LIST_NEXT(e
, hlink
);
527 * Look in the old hash table.
529 if (rrl
->old_hash
!= NULL
) {
530 old_bin
= get_bin(rrl
->old_hash
, hval
);
531 e
= ISC_LIST_HEAD(*old_bin
);
533 if (key_cmp(&e
->key
, &key
)) {
534 ISC_LIST_UNLINK(*old_bin
, e
, hlink
);
535 ISC_LIST_PREPEND(*new_bin
, e
, hlink
);
536 e
->hash_gen
= rrl
->hash_gen
;
537 ref_entry(rrl
, e
, probes
, now
);
540 e
= ISC_LIST_NEXT(e
, hlink
);
544 * Discard prevous hash table when all of its entries are old.
546 age
= delta_rrl_time(rrl
->old_hash
->check_time
, now
);
547 if (age
> rrl
->window
)
555 * The entry does not exist, so create it by finding a free entry.
556 * Keep currently penalized and logged entries.
557 * Try to make more entries if none are idle.
558 * Steal the oldest entry if we cannot create more.
560 for (e
= ISC_LIST_TAIL(rrl
->lru
);
562 e
= ISC_LIST_PREV(e
, lru
))
564 if (!ISC_LINK_LINKED(e
, hlink
))
566 age
= get_age(rrl
, e
, now
);
571 if (!e
->logged
&& response_balance(rrl
, e
, age
) > 0)
575 expand_entries(rrl
, ISC_MIN((rrl
->num_entries
+1)/2, 1000));
576 e
= ISC_LIST_TAIL(rrl
->lru
);
579 log_end(rrl
, e
, ISC_TRUE
, log_buf
, log_buf_len
);
580 if (ISC_LINK_LINKED(e
, hlink
)) {
581 if (e
->hash_gen
== rrl
->hash_gen
)
584 hash
= rrl
->old_hash
;
585 old_bin
= get_bin(hash
, hash_key(&e
->key
));
586 ISC_LIST_UNLINK(*old_bin
, e
, hlink
);
588 ISC_LIST_PREPEND(*new_bin
, e
, hlink
);
589 e
->hash_gen
= rrl
->hash_gen
;
591 e
->ts_valid
= ISC_FALSE
;
592 ref_entry(rrl
, e
, probes
, now
);
597 debit_log(const dns_rrl_entry_t
*e
, int age
, const char *action
) {
598 char buf
[sizeof("age=12345678")];
601 if (age
== DNS_RRL_FOREVER
) {
604 snprintf(buf
, sizeof(buf
), "age=%d", age
);
607 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_RRL
,
608 DNS_LOGMODULE_REQUEST
, DNS_RRL_LOG_DEBUG3
,
609 "rrl %08x %6s responses=%-3d %s",
610 hash_key(&e
->key
), age_str
, e
->responses
, action
);
613 static inline dns_rrl_result_t
614 debit_rrl_entry(dns_rrl_t
*rrl
, dns_rrl_entry_t
*e
, double qps
, double scale
,
615 const isc_sockaddr_t
*client_addr
, isc_stdtime_t now
,
616 char *log_buf
, unsigned int log_buf_len
)
618 int rate
, new_rate
, slip
, new_slip
, age
, log_secs
, min
;
619 dns_rrl_rate_t
*ratep
;
620 dns_rrl_entry_t
const *credit_e
;
623 * Pick the rate counter.
624 * Optionally adjust the rate by the estimated query/second rate.
626 ratep
= get_rate(rrl
, e
->key
.s
.rtype
);
629 return (DNS_RRL_RESULT_OK
);
633 * The limit for clients that have used TCP is not scaled.
635 credit_e
= get_entry(rrl
, client_addr
,
636 0, dns_rdatatype_none
, NULL
,
637 DNS_RRL_RTYPE_TCP
, now
, ISC_FALSE
,
638 log_buf
, log_buf_len
);
639 if (credit_e
!= NULL
) {
640 age
= get_age(rrl
, e
, now
);
641 if (age
< rrl
->window
)
646 new_rate
= (int) (rate
* scale
);
649 if (ratep
->scaled
!= new_rate
) {
650 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_RRL
,
651 DNS_LOGMODULE_REQUEST
,
653 "%d qps scaled %s by %.2f"
655 (int)qps
, ratep
->str
, scale
,
658 ratep
->scaled
= rate
;
662 min
= -rrl
->window
* rate
;
665 * Treat time jumps into the recent past as no time.
666 * Treat entries older than the window as if they were just created
667 * Credit other entries.
669 age
= get_age(rrl
, e
, now
);
672 * Credit tokens earned during elapsed time.
674 if (age
> rrl
->window
) {
678 e
->responses
+= rate
*age
;
679 if (e
->responses
> rate
) {
685 * Find the seconds since last log message without overflowing
686 * small counter. This counter is reset when an entry is
687 * created. It is not necessarily reset when some requests
688 * are answered provided other requests continue to be dropped
689 * or slipped. This can happen when the request rate is just
693 log_secs
= e
->log_secs
;
695 if (log_secs
> DNS_RRL_MAX_LOG_SECS
|| log_secs
< 0)
696 log_secs
= DNS_RRL_MAX_LOG_SECS
;
697 e
->log_secs
= log_secs
;
700 set_age(rrl
, e
, now
);
703 * Debit the entry for this response.
705 if (--e
->responses
>= 0) {
706 if (isc_log_wouldlog(dns_lctx
, DNS_RRL_LOG_DEBUG3
))
707 debit_log(e
, age
, "");
708 return (DNS_RRL_RESULT_OK
);
711 if (e
->responses
< min
)
715 * Drop this response unless it should slip or leak.
718 if (slip
> 2 && scale
< 1.0) {
719 new_slip
= (int) (slip
* scale
);
722 if (rrl
->slip
.scaled
!= new_slip
) {
723 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_RRL
,
724 DNS_LOGMODULE_REQUEST
,
727 " by %.2f from %d to %d",
731 rrl
->slip
.scaled
= slip
;
734 if (slip
!= 0 && e
->key
.s
.rtype
!= DNS_RRL_RTYPE_ALL
) {
735 if (e
->slip_cnt
++ == 0) {
736 if ((int) e
->slip_cnt
>= slip
)
738 if (isc_log_wouldlog(dns_lctx
, DNS_RRL_LOG_DEBUG3
))
739 debit_log(e
, age
, "slip");
740 return (DNS_RRL_RESULT_SLIP
);
741 } else if ((int) e
->slip_cnt
>= slip
) {
746 if (isc_log_wouldlog(dns_lctx
, DNS_RRL_LOG_DEBUG3
))
747 debit_log(e
, age
, "drop");
748 return (DNS_RRL_RESULT_DROP
);
751 static inline dns_rrl_qname_buf_t
*
752 get_qname(dns_rrl_t
*rrl
, const dns_rrl_entry_t
*e
) {
753 dns_rrl_qname_buf_t
*qbuf
;
755 qbuf
= rrl
->qnames
[e
->log_qname
];
756 if (qbuf
== NULL
|| qbuf
->e
!= e
)
762 free_qname(dns_rrl_t
*rrl
, dns_rrl_entry_t
*e
) {
763 dns_rrl_qname_buf_t
*qbuf
;
765 qbuf
= get_qname(rrl
, e
);
768 ISC_LIST_APPEND(rrl
->qname_free
, qbuf
, link
);
773 add_log_str(isc_buffer_t
*lb
, const char *str
, unsigned int str_len
) {
776 isc_buffer_availableregion(lb
, ®ion
);
777 if (str_len
>= region
.length
) {
778 if (region
.length
<= 0)
780 str_len
= region
.length
;
782 memmove(region
.base
, str
, str_len
);
783 isc_buffer_add(lb
, str_len
);
786 #define ADD_LOG_CSTR(eb, s) add_log_str(eb, s, sizeof(s)-1)
789 * Build strings for the logs
792 make_log_buf(dns_rrl_t
*rrl
, dns_rrl_entry_t
*e
,
793 const char *str1
, const char *str2
, isc_boolean_t plural
,
794 dns_name_t
*qname
, isc_boolean_t save_qname
,
795 dns_rrl_result_t rrl_result
, isc_result_t resp_result
,
796 char *log_buf
, unsigned int log_buf_len
)
799 dns_rrl_qname_buf_t
*qbuf
;
801 char strbuf
[ISC_MAX(sizeof("/123"), sizeof(" (12345678)"))];
803 isc_result_t msg_result
;
805 if (log_buf_len
<= 1) {
806 if (log_buf_len
== 1)
810 isc_buffer_init(&lb
, log_buf
, log_buf_len
-1);
813 add_log_str(&lb
, str1
, strlen(str1
));
815 add_log_str(&lb
, str2
, strlen(str2
));
817 switch (rrl_result
) {
818 case DNS_RRL_RESULT_OK
:
820 case DNS_RRL_RESULT_DROP
:
821 ADD_LOG_CSTR(&lb
, "drop ");
823 case DNS_RRL_RESULT_SLIP
:
824 ADD_LOG_CSTR(&lb
, "slip ");
831 switch (e
->key
.s
.rtype
) {
832 case DNS_RRL_RTYPE_QUERY
:
834 case DNS_RRL_RTYPE_REFERRAL
:
835 ADD_LOG_CSTR(&lb
, "referral ");
837 case DNS_RRL_RTYPE_NODATA
:
838 ADD_LOG_CSTR(&lb
, "NODATA ");
840 case DNS_RRL_RTYPE_NXDOMAIN
:
841 ADD_LOG_CSTR(&lb
, "NXDOMAIN ");
843 case DNS_RRL_RTYPE_ERROR
:
844 if (resp_result
== ISC_R_SUCCESS
) {
845 ADD_LOG_CSTR(&lb
, "error ");
847 rstr
= isc_result_totext(resp_result
);
848 add_log_str(&lb
, rstr
, strlen(rstr
));
849 ADD_LOG_CSTR(&lb
, " error ");
852 case DNS_RRL_RTYPE_ALL
:
853 ADD_LOG_CSTR(&lb
, "all ");
860 ADD_LOG_CSTR(&lb
, "responses to ");
862 ADD_LOG_CSTR(&lb
, "response to ");
864 memset(&cidr
, 0, sizeof(cidr
));
866 snprintf(strbuf
, sizeof(strbuf
), "/%d", rrl
->ipv6_prefixlen
);
867 cidr
.family
= AF_INET6
;
868 memset(&cidr
.type
.in6
, 0, sizeof(cidr
.type
.in6
));
869 memmove(&cidr
.type
.in6
, e
->key
.s
.ip
, sizeof(e
->key
.s
.ip
));
871 snprintf(strbuf
, sizeof(strbuf
), "/%d", rrl
->ipv4_prefixlen
);
872 cidr
.family
= AF_INET
;
873 cidr
.type
.in
.s_addr
= e
->key
.s
.ip
[0];
875 msg_result
= isc_netaddr_totext(&cidr
, &lb
);
876 if (msg_result
!= ISC_R_SUCCESS
)
877 ADD_LOG_CSTR(&lb
, "?");
878 add_log_str(&lb
, strbuf
, strlen(strbuf
));
880 if (e
->key
.s
.rtype
== DNS_RRL_RTYPE_QUERY
||
881 e
->key
.s
.rtype
== DNS_RRL_RTYPE_REFERRAL
||
882 e
->key
.s
.rtype
== DNS_RRL_RTYPE_NODATA
||
883 e
->key
.s
.rtype
== DNS_RRL_RTYPE_NXDOMAIN
) {
884 qbuf
= get_qname(rrl
, e
);
885 if (save_qname
&& qbuf
== NULL
&&
886 qname
!= NULL
&& dns_name_isabsolute(qname
)) {
888 * Capture the qname for the "stop limiting" message.
890 qbuf
= ISC_LIST_TAIL(rrl
->qname_free
);
892 ISC_LIST_UNLINK(rrl
->qname_free
, qbuf
, link
);
893 } else if (rrl
->num_qnames
< DNS_RRL_QNAMES
) {
894 qbuf
= isc_mem_get(rrl
->mctx
, sizeof(*qbuf
));
896 memset(qbuf
, 0, sizeof(*qbuf
));
897 ISC_LINK_INIT(qbuf
, link
);
898 qbuf
->index
= rrl
->num_qnames
;
899 rrl
->qnames
[rrl
->num_qnames
++] = qbuf
;
901 isc_log_write(dns_lctx
,
903 DNS_LOGMODULE_REQUEST
,
906 " failed for RRL qname",
911 e
->log_qname
= qbuf
->index
;
913 dns_fixedname_init(&qbuf
->qname
);
915 dns_fixedname_name(&qbuf
->qname
),
920 qname
= dns_fixedname_name(&qbuf
->qname
);
922 ADD_LOG_CSTR(&lb
, " for ");
923 (void)dns_name_totext(qname
, ISC_TRUE
, &lb
);
925 ADD_LOG_CSTR(&lb
, " for (?)");
927 if (e
->key
.s
.rtype
!= DNS_RRL_RTYPE_NXDOMAIN
) {
928 ADD_LOG_CSTR(&lb
, " ");
929 (void)dns_rdataclass_totext(e
->key
.s
.qclass
, &lb
);
930 if (e
->key
.s
.rtype
== DNS_RRL_RTYPE_QUERY
) {
931 ADD_LOG_CSTR(&lb
, " ");
932 (void)dns_rdatatype_totext(e
->key
.s
.qtype
, &lb
);
935 snprintf(strbuf
, sizeof(strbuf
), " (%08x)",
936 e
->key
.s
.qname_hash
);
937 add_log_str(&lb
, strbuf
, strlen(strbuf
));
941 * We saved room for '\0'.
943 log_buf
[isc_buffer_usedlength(&lb
)] = '\0';
947 log_end(dns_rrl_t
*rrl
, dns_rrl_entry_t
*e
, isc_boolean_t early
,
948 char *log_buf
, unsigned int log_buf_len
)
953 rrl
->log_only
? "would stop limiting "
955 ISC_TRUE
, NULL
, ISC_FALSE
,
956 DNS_RRL_RESULT_OK
, ISC_R_SUCCESS
,
957 log_buf
, log_buf_len
);
958 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_RRL
,
959 DNS_LOGMODULE_REQUEST
, DNS_RRL_LOG_DROP
,
962 e
->logged
= ISC_FALSE
;
968 * Log messages for streams that have stopped being rate limited.
971 log_stops(dns_rrl_t
*rrl
, isc_stdtime_t now
, int limit
,
972 char *log_buf
, unsigned int log_buf_len
)
977 for (e
= rrl
->last_logged
; e
!= NULL
; e
= ISC_LIST_PREV(e
, lru
)) {
981 age
= get_age(rrl
, e
, now
);
982 if (age
< DNS_RRL_STOP_LOG_SECS
||
983 response_balance(rrl
, e
, age
) < 0)
987 log_end(rrl
, e
, now
== 0, log_buf
, log_buf_len
);
988 if (rrl
->num_logged
<= 0)
992 * Too many messages could stall real work.
995 rrl
->last_logged
= ISC_LIST_PREV(e
, lru
);
1000 INSIST(rrl
->num_logged
== 0);
1001 rrl
->log_stops_time
= now
;
1003 rrl
->last_logged
= e
;
1007 * Main rate limit interface.
1010 dns_rrl(dns_view_t
*view
,
1011 const isc_sockaddr_t
*client_addr
, isc_boolean_t is_tcp
,
1012 dns_rdataclass_t qclass
, dns_rdatatype_t qtype
,
1013 dns_name_t
*qname
, isc_result_t resp_result
, isc_stdtime_t now
,
1014 isc_boolean_t wouldlog
, char *log_buf
, unsigned int log_buf_len
)
1017 dns_rrl_rtype_t rtype
;
1019 isc_netaddr_t netclient
;
1023 isc_result_t result
;
1024 dns_rrl_result_t rrl_result
;
1026 INSIST(log_buf
!= NULL
&& log_buf_len
> 0);
1029 if (rrl
->exempt
!= NULL
) {
1030 isc_netaddr_fromsockaddr(&netclient
, client_addr
);
1031 result
= dns_acl_match(&netclient
, NULL
, rrl
->exempt
,
1032 &view
->aclenv
, &exempt_match
, NULL
);
1033 if (result
== ISC_R_SUCCESS
&& exempt_match
> 0)
1034 return (DNS_RRL_RESULT_OK
);
1040 * Estimate total query per second rate when scaling by qps.
1042 if (rrl
->qps_scale
== 0) {
1046 ++rrl
->qps_responses
;
1047 secs
= delta_rrl_time(rrl
->qps_time
, now
);
1051 qps
= (1.0*rrl
->qps_responses
) / secs
;
1052 if (secs
>= rrl
->window
) {
1053 if (isc_log_wouldlog(dns_lctx
,
1054 DNS_RRL_LOG_DEBUG3
))
1055 isc_log_write(dns_lctx
,
1056 DNS_LOGCATEGORY_RRL
,
1057 DNS_LOGMODULE_REQUEST
,
1059 "%d responses/%d seconds"
1061 rrl
->qps_responses
, secs
,
1064 rrl
->qps_responses
= 0;
1065 rrl
->qps_time
= now
;
1066 } else if (qps
< rrl
->qps
) {
1070 scale
= rrl
->qps_scale
/ qps
;
1074 * Do maintenance once per second.
1076 if (rrl
->num_logged
> 0 && rrl
->log_stops_time
!= now
)
1077 log_stops(rrl
, now
, 8, log_buf
, log_buf_len
);
1080 * Notice TCP responses when scaling limits by qps.
1081 * Do not try to rate limit TCP responses.
1085 e
= get_entry(rrl
, client_addr
,
1086 0, dns_rdatatype_none
, NULL
,
1087 DNS_RRL_RTYPE_TCP
, now
, ISC_TRUE
,
1088 log_buf
, log_buf_len
);
1090 e
->responses
= -(rrl
->window
+1);
1091 set_age(rrl
, e
, now
);
1095 return (ISC_R_SUCCESS
);
1099 * Find the right kind of entry, creating it if necessary.
1100 * If that is impossible, then nothing more can be done
1102 switch (resp_result
) {
1104 rtype
= DNS_RRL_RTYPE_QUERY
;
1106 case DNS_R_DELEGATION
:
1107 rtype
= DNS_RRL_RTYPE_REFERRAL
;
1110 rtype
= DNS_RRL_RTYPE_NODATA
;
1112 case DNS_R_NXDOMAIN
:
1113 rtype
= DNS_RRL_RTYPE_NXDOMAIN
;
1116 rtype
= DNS_RRL_RTYPE_ERROR
;
1119 e
= get_entry(rrl
, client_addr
, qclass
, qtype
, qname
, rtype
,
1120 now
, ISC_TRUE
, log_buf
, log_buf_len
);
1123 return (DNS_RRL_RESULT_OK
);
1126 if (isc_log_wouldlog(dns_lctx
, DNS_RRL_LOG_DEBUG1
)) {
1128 * Do not worry about speed or releasing the lock.
1129 * This message appears before messages from debit_rrl_entry().
1131 make_log_buf(rrl
, e
, "consider limiting ", NULL
, ISC_FALSE
,
1132 qname
, ISC_FALSE
, DNS_RRL_RESULT_OK
, resp_result
,
1133 log_buf
, log_buf_len
);
1134 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_RRL
,
1135 DNS_LOGMODULE_REQUEST
, DNS_RRL_LOG_DEBUG1
,
1139 rrl_result
= debit_rrl_entry(rrl
, e
, qps
, scale
, client_addr
, now
,
1140 log_buf
, log_buf_len
);
1142 if (rrl
->all_per_second
.r
!= 0) {
1144 * We must debit the all-per-second token bucket if we have
1145 * an all-per-second limit for the IP address.
1146 * The all-per-second limit determines the log message
1147 * when both limits are hit.
1148 * The response limiting must continue if the
1149 * all-per-second limiting lapses.
1151 dns_rrl_entry_t
*e_all
;
1152 dns_rrl_result_t rrl_all_result
;
1154 e_all
= get_entry(rrl
, client_addr
,
1155 0, dns_rdatatype_none
, NULL
,
1156 DNS_RRL_RTYPE_ALL
, now
, ISC_TRUE
,
1157 log_buf
, log_buf_len
);
1158 if (e_all
== NULL
) {
1160 return (DNS_RRL_RESULT_OK
);
1162 rrl_all_result
= debit_rrl_entry(rrl
, e_all
, qps
, scale
,
1164 log_buf
, log_buf_len
);
1165 if (rrl_all_result
!= DNS_RRL_RESULT_OK
) {
1169 rrl_result
= rrl_all_result
;
1170 if (rrl_result
== DNS_RRL_RESULT_OK
)
1171 level
= DNS_RRL_LOG_DEBUG2
;
1173 level
= DNS_RRL_LOG_DEBUG1
;
1174 if (isc_log_wouldlog(dns_lctx
, level
)) {
1175 make_log_buf(rrl
, e
,
1176 "prefer all-per-second limiting ",
1177 NULL
, ISC_TRUE
, qname
, ISC_FALSE
,
1178 DNS_RRL_RESULT_OK
, resp_result
,
1179 log_buf
, log_buf_len
);
1180 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_RRL
,
1181 DNS_LOGMODULE_REQUEST
, level
,
1187 if (rrl_result
== DNS_RRL_RESULT_OK
) {
1189 return (DNS_RRL_RESULT_OK
);
1193 * Log occassionally in the rate-limit category.
1195 if ((!e
->logged
|| e
->log_secs
>= DNS_RRL_MAX_LOG_SECS
) &&
1196 isc_log_wouldlog(dns_lctx
, DNS_RRL_LOG_DROP
)) {
1197 make_log_buf(rrl
, e
, rrl
->log_only
? "would " : NULL
,
1198 e
->logged
? "continue limiting " : "limit ",
1199 ISC_TRUE
, qname
, ISC_TRUE
,
1200 DNS_RRL_RESULT_OK
, resp_result
,
1201 log_buf
, log_buf_len
);
1203 e
->logged
= ISC_TRUE
;
1204 if (++rrl
->num_logged
<= 1)
1205 rrl
->last_logged
= e
;
1210 * Avoid holding the lock.
1216 isc_log_write(dns_lctx
, DNS_LOGCATEGORY_RRL
,
1217 DNS_LOGMODULE_REQUEST
, DNS_RRL_LOG_DROP
,
1222 * Make a log message for the caller.
1225 make_log_buf(rrl
, e
,
1226 rrl
->log_only
? "would rate limit " : "rate limit ",
1227 NULL
, ISC_FALSE
, qname
, ISC_FALSE
,
1228 rrl_result
, resp_result
, log_buf
, log_buf_len
);
1232 * Do not save the qname unless we might need it for
1233 * the ending log message.
1240 return (rrl_result
);
1244 dns_rrl_view_destroy(dns_view_t
*view
) {
1248 char log_buf
[DNS_RRL_LOG_BUF_LEN
];
1257 * Assume the caller takes care of locking the view and anything else.
1260 if (rrl
->num_logged
> 0)
1261 log_stops(rrl
, 0, ISC_INT32_MAX
, log_buf
, sizeof(log_buf
));
1263 for (i
= 0; i
< DNS_RRL_QNAMES
; ++i
) {
1264 if (rrl
->qnames
[i
] == NULL
)
1266 isc_mem_put(rrl
->mctx
, rrl
->qnames
[i
], sizeof(*rrl
->qnames
[i
]));
1269 if (rrl
->exempt
!= NULL
)
1270 dns_acl_detach(&rrl
->exempt
);
1272 DESTROYLOCK(&rrl
->lock
);
1274 while (!ISC_LIST_EMPTY(rrl
->blocks
)) {
1275 b
= ISC_LIST_HEAD(rrl
->blocks
);
1276 ISC_LIST_UNLINK(rrl
->blocks
, b
, link
);
1277 isc_mem_put(rrl
->mctx
, b
, b
->size
);
1282 isc_mem_put(rrl
->mctx
, h
,
1283 sizeof(*h
) + (h
->length
- 1) * sizeof(h
->bins
[0]));
1287 isc_mem_put(rrl
->mctx
, h
,
1288 sizeof(*h
) + (h
->length
- 1) * sizeof(h
->bins
[0]));
1290 isc_mem_putanddetach(&rrl
->mctx
, rrl
, sizeof(*rrl
));
1294 dns_rrl_init(dns_rrl_t
**rrlp
, dns_view_t
*view
, int min_entries
) {
1296 isc_result_t result
;
1300 rrl
= isc_mem_get(view
->mctx
, sizeof(*rrl
));
1302 return (ISC_R_NOMEMORY
);
1303 memset(rrl
, 0, sizeof(*rrl
));
1304 isc_mem_attach(view
->mctx
, &rrl
->mctx
);
1305 result
= isc_mutex_init(&rrl
->lock
);
1306 if (result
!= ISC_R_SUCCESS
) {
1307 isc_mem_putanddetach(&rrl
->mctx
, rrl
, sizeof(*rrl
));
1310 isc_stdtime_get(&rrl
->ts_bases
[0]);
1314 result
= expand_entries(rrl
, min_entries
);
1315 if (result
!= ISC_R_SUCCESS
) {
1316 dns_rrl_view_destroy(view
);
1319 result
= expand_rrl_hash(rrl
, 0);
1320 if (result
!= ISC_R_SUCCESS
) {
1321 dns_rrl_view_destroy(view
);
1326 return (ISC_R_SUCCESS
);