5 #include "opt_ratelimit.h"
6 #include "opt_kern_tls.h"
9 #include <sys/module.h>
10 #include <sys/kernel.h>
12 #include <sys/hhook.h>
15 #include <sys/malloc.h>
17 #include <sys/mutex.h>
19 #include <sys/proc.h> /* for proc0 declaration */
20 #include <sys/socket.h>
21 #include <sys/socketvar.h>
22 #include <sys/sysctl.h>
23 #include <sys/systm.h>
25 #include <sys/qmath.h>
27 #include <sys/stats.h> /* Must come after qmath.h and tree.h */
31 #include <sys/refcount.h>
32 #include <sys/queue.h>
33 #include <sys/tim_filter.h>
35 #include <sys/kthread.h>
36 #include <sys/kern_prefetch.h>
37 #include <sys/protosw.h>
39 #include <sys/sched.h>
40 #include <machine/cpu.h>
44 #include <net/route.h>
45 #include <net/route/nhop.h>
48 #define TCPSTATES /* for logging */
50 #include <netinet/in.h>
51 #include <netinet/in_kdtrace.h>
52 #include <netinet/in_pcb.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip_icmp.h> /* required for icmp_var.h */
55 #include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
56 #include <netinet/ip_var.h>
57 #include <netinet/ip6.h>
58 #include <netinet6/in6_pcb.h>
59 #include <netinet6/ip6_var.h>
60 #include <netinet/tcp.h>
61 #include <netinet/tcp_fsm.h>
62 #include <netinet/tcp_seq.h>
63 #include <netinet/tcp_timer.h>
64 #include <netinet/tcp_var.h>
65 #include <netinet/tcp_log_buf.h>
66 #include <netinet/tcp_syncache.h>
67 #include <netinet/tcp_hpts.h>
68 #include <netinet/tcp_accounting.h>
69 #include <netinet/tcpip.h>
70 #include <netinet/cc/cc.h>
71 #include <netinet/cc/cc_newreno.h>
72 #include <netinet/tcp_fastopen.h>
73 #include <netinet/tcp_lro.h>
74 #ifdef NETFLIX_SHARED_CWND
75 #include <netinet/tcp_shared_cwnd.h>
78 #include <netinet/tcp_offload.h>
81 #include <netinet6/tcp6_var.h>
83 #include <netinet/tcp_ecn.h>
85 #include <netipsec/ipsec_support.h>
87 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
88 #include <netipsec/ipsec.h>
89 #include <netipsec/ipsec6.h>
92 #include <netinet/udp.h>
93 #include <netinet/udp_var.h>
94 #include <machine/in_cksum.h>
97 #include <security/mac/mac_framework.h>
99 #include "sack_filter.h"
100 #include "tcp_rack.h"
101 #include "tailq_hash.h"
102 #include "opt_global.h"
105 struct rack_sendmap
*
106 tqhash_min(struct tailq_hash
*hs
)
108 struct rack_sendmap
*rsm
;
114 struct rack_sendmap
*
115 tqhash_max(struct tailq_hash
*hs
)
117 struct rack_sendmap
*rsm
;
124 tqhash_empty(struct tailq_hash
*hs
)
131 struct rack_sendmap
*
132 tqhash_find(struct tailq_hash
*hs
, uint32_t seq
)
134 struct rack_sendmap
*e
;
135 int bindex
, pbucket
, fc
= 1;
137 if ((SEQ_LT(seq
, hs
->min
)) ||
139 (SEQ_GEQ(seq
, hs
->max
))) {
143 bindex
= seq
/ SEQ_BUCKET_SIZE
;
144 bindex
%= MAX_HASH_ENTRIES
;
145 /* Lets look through the bucket it belongs to */
146 if (TAILQ_EMPTY(&hs
->ht
[bindex
])) {
149 TAILQ_FOREACH(e
, &hs
->ht
[bindex
], next
) {
152 * Special check for when a cum-ack
153 * as moved up over a seq and now its
154 * a bucket behind where it belongs. In
155 * the case of SACKs which create new rsm's
158 if (SEQ_GT(e
->r_start
, seq
)) {
163 if (SEQ_GEQ(seq
, e
->r_start
) &&
164 (SEQ_LT(seq
, e
->r_end
))) {
165 /* Its in this block */
169 /* Did not find it */
173 pbucket
= MAX_HASH_ENTRIES
- 1;
175 pbucket
= bindex
- 1;
176 TAILQ_FOREACH_REVERSE(e
, &hs
->ht
[pbucket
], rack_head
, next
) {
177 if (SEQ_GEQ(seq
, e
->r_start
) &&
178 (SEQ_LT(seq
, e
->r_end
))) {
179 /* Its in this block */
182 if (SEQ_GEQ(e
->r_end
, seq
))
188 struct rack_sendmap
*
189 tqhash_next(struct tailq_hash
*hs
, struct rack_sendmap
*rsm
)
191 struct rack_sendmap
*e
;
193 e
= TAILQ_NEXT(rsm
, next
);
195 /* Move to next bucket */
198 nxt
= rsm
->bindex
+ 1;
199 if (nxt
>= MAX_HASH_ENTRIES
)
201 e
= TAILQ_FIRST(&hs
->ht
[nxt
]);
206 struct rack_sendmap
*
207 tqhash_prev(struct tailq_hash
*hs
, struct rack_sendmap
*rsm
)
209 struct rack_sendmap
*e
;
211 e
= TAILQ_PREV(rsm
, rack_head
, next
);
216 prev
= rsm
->bindex
- 1;
218 prev
= MAX_HASH_ENTRIES
- 1;
219 e
= TAILQ_LAST(&hs
->ht
[prev
], rack_head
);
225 tqhash_remove(struct tailq_hash
*hs
, struct rack_sendmap
*rsm
, int type
)
229 if (hs
->count
== 0) {
231 hs
->rsm_max
= hs
->rsm_min
= NULL
;
232 } else if (type
== REMOVE_TYPE_CUMACK
) {
233 hs
->min
= rsm
->r_end
;
234 hs
->rsm_min
= tqhash_next(hs
, rsm
);
235 } else if (rsm
== hs
->rsm_max
) {
236 hs
->rsm_max
= tqhash_prev(hs
, rsm
);
237 hs
->max
= hs
->rsm_max
->r_end
;
239 TAILQ_REMOVE(&hs
->ht
[rsm
->bindex
], rsm
, next
);
243 tqhash_insert(struct tailq_hash
*hs
, struct rack_sendmap
*rsm
)
245 struct rack_sendmap
*e
, *l
;
251 if ((rsm
->r_end
- hs
->min
) > MAX_ALLOWED_SEQ_RANGE
) {
254 e
= tqhash_find(hs
, rsm
->r_start
);
260 rsm
->bindex
= rsm
->r_start
/ SEQ_BUCKET_SIZE
;
261 rsm
->bindex
%= MAX_HASH_ENTRIES
;
262 ebucket
= rsm
->r_end
/ SEQ_BUCKET_SIZE
;
263 ebucket
%= MAX_HASH_ENTRIES
;
264 if (ebucket
!= rsm
->bindex
) {
265 /* This RSM straddles the bucket boundary */
266 rsm
->r_flags
|= RACK_STRADDLE
;
268 rsm
->r_flags
&= ~RACK_STRADDLE
;
270 if (hs
->count
== 0) {
272 hs
->min
= rsm
->r_start
;
273 hs
->max
= rsm
->r_end
;
274 hs
->rsm_min
= hs
->rsm_max
= rsm
;
278 if (SEQ_GEQ(rsm
->r_end
, hs
->max
)) {
279 hs
->max
= rsm
->r_end
;
281 } if (SEQ_LEQ(rsm
->r_start
, hs
->min
)) {
282 hs
->min
= rsm
->r_start
;
286 /* Check the common case of inserting at the end */
287 l
= TAILQ_LAST(&hs
->ht
[rsm
->bindex
], rack_head
);
288 if ((l
== NULL
) || (SEQ_GT(rsm
->r_start
, l
->r_start
))) {
289 TAILQ_INSERT_TAIL(&hs
->ht
[rsm
->bindex
], rsm
, next
);
292 TAILQ_FOREACH(e
, &hs
->ht
[rsm
->bindex
], next
) {
293 if (SEQ_LEQ(rsm
->r_start
, e
->r_start
)) {
295 TAILQ_INSERT_BEFORE(e
, rsm
, next
);
300 TAILQ_INSERT_TAIL(&hs
->ht
[rsm
->bindex
], rsm
, next
);
306 tqhash_init(struct tailq_hash
*hs
)
310 for(i
= 0; i
< MAX_HASH_ENTRIES
; i
++) {
311 TAILQ_INIT(&hs
->ht
[i
]);
313 hs
->min
= hs
->max
= 0;
314 hs
->rsm_min
= hs
->rsm_max
= NULL
;
319 tqhash_trim(struct tailq_hash
*hs
, uint32_t th_ack
)
321 struct rack_sendmap
*rsm
;
323 if (SEQ_LT(th_ack
, hs
->min
)) {
324 /* It can't be behind our current min */
327 if (SEQ_GEQ(th_ack
, hs
->max
)) {
328 /* It can't be beyond or at our current max */
331 rsm
= tqhash_min(hs
);
333 /* nothing to trim */
336 if (SEQ_GEQ(th_ack
, rsm
->r_end
)) {
338 * You can't trim all bytes instead
339 * you need to remove it.
343 if (SEQ_GT(th_ack
, hs
->min
))
346 * Should we trim it for the caller?
347 * they may have already which is ok...
349 if (SEQ_GT(th_ack
, rsm
->r_start
)) {
350 rsm
->r_start
= th_ack
;
356 tqhash_update_end(struct tailq_hash
*hs
, struct rack_sendmap
*rsm
,
359 if (hs
->max
== rsm
->r_end
)