1 /* $NetBSD: tcp_subr.c,v 1.237 2009/05/27 17:41:03 pooka Exp $ */
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * Copyright (c) 1997, 1998, 2000, 2001, 2008 The NetBSD Foundation, Inc.
34 * All rights reserved.
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
38 * Facility, NASA Ames Research Center.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
63 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
64 * The Regents of the University of California. All rights reserved.
66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions
69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
90 * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
93 #include <sys/cdefs.h>
94 __KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.237 2009/05/27 17:41:03 pooka Exp $");
97 #include "opt_ipsec.h"
98 #include "opt_tcp_compat_42.h"
99 #include "opt_inet_csum.h"
100 #include "opt_mbuftrace.h"
103 #include <sys/param.h>
104 #include <sys/proc.h>
105 #include <sys/systm.h>
106 #include <sys/malloc.h>
107 #include <sys/mbuf.h>
108 #include <sys/socket.h>
109 #include <sys/socketvar.h>
110 #include <sys/protosw.h>
111 #include <sys/errno.h>
112 #include <sys/kernel.h>
113 #include <sys/pool.h>
119 #include <net/route.h>
122 #include <netinet/in.h>
123 #include <netinet/in_systm.h>
124 #include <netinet/ip.h>
125 #include <netinet/in_pcb.h>
126 #include <netinet/ip_var.h>
127 #include <netinet/ip_icmp.h>
131 #include <netinet/in.h>
133 #include <netinet/ip6.h>
134 #include <netinet6/in6_pcb.h>
135 #include <netinet6/ip6_var.h>
136 #include <netinet6/in6_var.h>
137 #include <netinet6/ip6protosw.h>
138 #include <netinet/icmp6.h>
139 #include <netinet6/nd6.h>
142 #include <netinet/tcp.h>
143 #include <netinet/tcp_fsm.h>
144 #include <netinet/tcp_seq.h>
145 #include <netinet/tcp_timer.h>
146 #include <netinet/tcp_var.h>
147 #include <netinet/tcp_private.h>
148 #include <netinet/tcp_congctl.h>
149 #include <netinet/tcpip.h>
152 #include <netinet6/ipsec.h>
153 #include <netkey/key.h>
157 #include <netipsec/ipsec.h>
158 #include <netipsec/xform.h>
160 #include <netipsec/ipsec6.h>
162 #include <netipsec/key.h>
163 #endif /* FAST_IPSEC*/
166 struct inpcbtable tcbtable
; /* head of queue of active tcpcb's */
167 u_int32_t tcp_now
; /* for RFC 1323 timestamps */
169 percpu_t
*tcpstat_percpu
;
171 /* patchable/settable parameters for tcp */
172 int tcp_mssdflt
= TCP_MSS
;
173 int tcp_minmss
= TCP_MINMSS
;
174 int tcp_rttdflt
= TCPTV_SRTTDFLT
/ PR_SLOWHZ
;
175 int tcp_do_rfc1323
= 1; /* window scaling / timestamps (obsolete) */
177 int tcp_do_rfc1948
= 0; /* ISS by cryptographic hash */
179 int tcp_do_sack
= 1; /* selective acknowledgement */
180 int tcp_do_win_scale
= 1; /* RFC1323 window scaling */
181 int tcp_do_timestamps
= 1; /* RFC1323 timestamps */
182 int tcp_ack_on_push
= 0; /* set to enable immediate ACK-on-PUSH */
183 int tcp_do_ecn
= 0; /* Explicit Congestion Notification */
185 #define TCP_INIT_WIN 0 /* initial slow start window */
187 #ifndef TCP_INIT_WIN_LOCAL
188 #define TCP_INIT_WIN_LOCAL 4 /* initial slow start window for local nets */
190 int tcp_init_win
= TCP_INIT_WIN
;
191 int tcp_init_win_local
= TCP_INIT_WIN_LOCAL
;
192 int tcp_mss_ifmtu
= 0;
194 int tcp_compat_42
= 1;
196 int tcp_compat_42
= 0;
198 int tcp_rst_ppslim
= 100; /* 100pps */
199 int tcp_ackdrop_ppslim
= 100; /* 100pps */
200 int tcp_do_loopback_cksum
= 0;
201 int tcp_do_abc
= 1; /* RFC3465 Appropriate byte counting. */
202 int tcp_abc_aggressive
= 1; /* 1: L=2*SMSS 0: L=1*SMSS */
203 int tcp_sack_tp_maxholes
= 32;
204 int tcp_sack_globalmaxholes
= 1024;
205 int tcp_sack_globalholes
= 0;
206 int tcp_ecn_maxretries
= 1;
210 #define TCBHASHSIZE 128
212 int tcbhashsize
= TCBHASHSIZE
;
214 /* syn hash parameters */
215 #define TCP_SYN_HASH_SIZE 293
216 #define TCP_SYN_BUCKET_SIZE 35
217 int tcp_syn_cache_size
= TCP_SYN_HASH_SIZE
;
218 int tcp_syn_cache_limit
= TCP_SYN_HASH_SIZE
*TCP_SYN_BUCKET_SIZE
;
219 int tcp_syn_bucket_limit
= 3*TCP_SYN_BUCKET_SIZE
;
220 struct syn_cache_head tcp_syn_cache
[TCP_SYN_HASH_SIZE
];
222 int tcp_freeq(struct tcpcb
*);
225 void tcp_mtudisc_callback(struct in_addr
);
228 void tcp6_mtudisc_callback(struct in6_addr
*);
232 void tcp6_mtudisc(struct in6pcb
*, int);
235 static struct pool tcpcb_pool
;
237 #ifdef TCP_CSUM_COUNTERS
238 #include <sys/device.h>
241 struct evcnt tcp_hwcsum_bad
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
242 NULL
, "tcp", "hwcsum bad");
243 struct evcnt tcp_hwcsum_ok
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
244 NULL
, "tcp", "hwcsum ok");
245 struct evcnt tcp_hwcsum_data
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
246 NULL
, "tcp", "hwcsum data");
247 struct evcnt tcp_swcsum
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
248 NULL
, "tcp", "swcsum");
250 EVCNT_ATTACH_STATIC(tcp_hwcsum_bad
);
251 EVCNT_ATTACH_STATIC(tcp_hwcsum_ok
);
252 EVCNT_ATTACH_STATIC(tcp_hwcsum_data
);
253 EVCNT_ATTACH_STATIC(tcp_swcsum
);
254 #endif /* defined(INET) */
257 struct evcnt tcp6_hwcsum_bad
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
258 NULL
, "tcp6", "hwcsum bad");
259 struct evcnt tcp6_hwcsum_ok
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
260 NULL
, "tcp6", "hwcsum ok");
261 struct evcnt tcp6_hwcsum_data
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
262 NULL
, "tcp6", "hwcsum data");
263 struct evcnt tcp6_swcsum
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
264 NULL
, "tcp6", "swcsum");
266 EVCNT_ATTACH_STATIC(tcp6_hwcsum_bad
);
267 EVCNT_ATTACH_STATIC(tcp6_hwcsum_ok
);
268 EVCNT_ATTACH_STATIC(tcp6_hwcsum_data
);
269 EVCNT_ATTACH_STATIC(tcp6_swcsum
);
270 #endif /* defined(INET6) */
271 #endif /* TCP_CSUM_COUNTERS */
274 #ifdef TCP_OUTPUT_COUNTERS
275 #include <sys/device.h>
277 struct evcnt tcp_output_bigheader
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
278 NULL
, "tcp", "output big header");
279 struct evcnt tcp_output_predict_hit
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
280 NULL
, "tcp", "output predict hit");
281 struct evcnt tcp_output_predict_miss
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
282 NULL
, "tcp", "output predict miss");
283 struct evcnt tcp_output_copysmall
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
284 NULL
, "tcp", "output copy small");
285 struct evcnt tcp_output_copybig
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
286 NULL
, "tcp", "output copy big");
287 struct evcnt tcp_output_refbig
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
288 NULL
, "tcp", "output reference big");
290 EVCNT_ATTACH_STATIC(tcp_output_bigheader
);
291 EVCNT_ATTACH_STATIC(tcp_output_predict_hit
);
292 EVCNT_ATTACH_STATIC(tcp_output_predict_miss
);
293 EVCNT_ATTACH_STATIC(tcp_output_copysmall
);
294 EVCNT_ATTACH_STATIC(tcp_output_copybig
);
295 EVCNT_ATTACH_STATIC(tcp_output_refbig
);
297 #endif /* TCP_OUTPUT_COUNTERS */
299 #ifdef TCP_REASS_COUNTERS
300 #include <sys/device.h>
302 struct evcnt tcp_reass_
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
303 NULL
, "tcp_reass", "calls");
304 struct evcnt tcp_reass_empty
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
305 &tcp_reass_
, "tcp_reass", "insert into empty queue");
306 struct evcnt tcp_reass_iteration
[8] = {
307 EVCNT_INITIALIZER(EVCNT_TYPE_MISC
, &tcp_reass_
, "tcp_reass", ">7 iterations"),
308 EVCNT_INITIALIZER(EVCNT_TYPE_MISC
, &tcp_reass_
, "tcp_reass", "1 iteration"),
309 EVCNT_INITIALIZER(EVCNT_TYPE_MISC
, &tcp_reass_
, "tcp_reass", "2 iterations"),
310 EVCNT_INITIALIZER(EVCNT_TYPE_MISC
, &tcp_reass_
, "tcp_reass", "3 iterations"),
311 EVCNT_INITIALIZER(EVCNT_TYPE_MISC
, &tcp_reass_
, "tcp_reass", "4 iterations"),
312 EVCNT_INITIALIZER(EVCNT_TYPE_MISC
, &tcp_reass_
, "tcp_reass", "5 iterations"),
313 EVCNT_INITIALIZER(EVCNT_TYPE_MISC
, &tcp_reass_
, "tcp_reass", "6 iterations"),
314 EVCNT_INITIALIZER(EVCNT_TYPE_MISC
, &tcp_reass_
, "tcp_reass", "7 iterations"),
316 struct evcnt tcp_reass_prependfirst
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
317 &tcp_reass_
, "tcp_reass", "prepend to first");
318 struct evcnt tcp_reass_prepend
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
319 &tcp_reass_
, "tcp_reass", "prepend");
320 struct evcnt tcp_reass_insert
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
321 &tcp_reass_
, "tcp_reass", "insert");
322 struct evcnt tcp_reass_inserttail
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
323 &tcp_reass_
, "tcp_reass", "insert at tail");
324 struct evcnt tcp_reass_append
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
325 &tcp_reass_
, "tcp_reass", "append");
326 struct evcnt tcp_reass_appendtail
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
327 &tcp_reass_
, "tcp_reass", "append to tail fragment");
328 struct evcnt tcp_reass_overlaptail
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
329 &tcp_reass_
, "tcp_reass", "overlap at end");
330 struct evcnt tcp_reass_overlapfront
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
331 &tcp_reass_
, "tcp_reass", "overlap at start");
332 struct evcnt tcp_reass_segdup
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
333 &tcp_reass_
, "tcp_reass", "duplicate segment");
334 struct evcnt tcp_reass_fragdup
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
335 &tcp_reass_
, "tcp_reass", "duplicate fragment");
337 EVCNT_ATTACH_STATIC(tcp_reass_
);
338 EVCNT_ATTACH_STATIC(tcp_reass_empty
);
339 EVCNT_ATTACH_STATIC2(tcp_reass_iteration
, 0);
340 EVCNT_ATTACH_STATIC2(tcp_reass_iteration
, 1);
341 EVCNT_ATTACH_STATIC2(tcp_reass_iteration
, 2);
342 EVCNT_ATTACH_STATIC2(tcp_reass_iteration
, 3);
343 EVCNT_ATTACH_STATIC2(tcp_reass_iteration
, 4);
344 EVCNT_ATTACH_STATIC2(tcp_reass_iteration
, 5);
345 EVCNT_ATTACH_STATIC2(tcp_reass_iteration
, 6);
346 EVCNT_ATTACH_STATIC2(tcp_reass_iteration
, 7);
347 EVCNT_ATTACH_STATIC(tcp_reass_prependfirst
);
348 EVCNT_ATTACH_STATIC(tcp_reass_prepend
);
349 EVCNT_ATTACH_STATIC(tcp_reass_insert
);
350 EVCNT_ATTACH_STATIC(tcp_reass_inserttail
);
351 EVCNT_ATTACH_STATIC(tcp_reass_append
);
352 EVCNT_ATTACH_STATIC(tcp_reass_appendtail
);
353 EVCNT_ATTACH_STATIC(tcp_reass_overlaptail
);
354 EVCNT_ATTACH_STATIC(tcp_reass_overlapfront
);
355 EVCNT_ATTACH_STATIC(tcp_reass_segdup
);
356 EVCNT_ATTACH_STATIC(tcp_reass_fragdup
);
358 #endif /* TCP_REASS_COUNTERS */
361 struct mowner tcp_mowner
= MOWNER_INIT("tcp", "");
362 struct mowner tcp_rx_mowner
= MOWNER_INIT("tcp", "rx");
363 struct mowner tcp_tx_mowner
= MOWNER_INIT("tcp", "tx");
364 struct mowner tcp_sock_mowner
= MOWNER_INIT("tcp", "sock");
365 struct mowner tcp_sock_rx_mowner
= MOWNER_INIT("tcp", "sock rx");
366 struct mowner tcp_sock_tx_mowner
= MOWNER_INIT("tcp", "sock tx");
377 in_pcbinit(&tcbtable
, tcbhashsize
, tcbhashsize
);
378 pool_init(&tcpcb_pool
, sizeof(struct tcpcb
), 0, 0, 0, "tcpcbpl",
381 hlen
= sizeof(struct ip
) + sizeof(struct tcphdr
);
383 if (sizeof(struct ip
) < sizeof(struct ip6_hdr
))
384 hlen
= sizeof(struct ip6_hdr
) + sizeof(struct tcphdr
);
386 if (max_protohdr
< hlen
)
388 if (max_linkhdr
+ hlen
> MHLEN
)
392 icmp_mtudisc_callback_register(tcp_mtudisc_callback
);
395 icmp6_mtudisc_callback_register(tcp6_mtudisc_callback
);
400 /* Initialize timer state. */
403 /* Initialize the compressed state engine. */
406 /* Initialize the congestion control algorithms. */
409 /* Initialize the TCPCB template. */
410 tcp_tcpcb_template();
412 /* Initialize reassembly queue */
418 MOWNER_ATTACH(&tcp_tx_mowner
);
419 MOWNER_ATTACH(&tcp_rx_mowner
);
420 MOWNER_ATTACH(&tcp_reass_mowner
);
421 MOWNER_ATTACH(&tcp_sock_mowner
);
422 MOWNER_ATTACH(&tcp_sock_tx_mowner
);
423 MOWNER_ATTACH(&tcp_sock_rx_mowner
);
424 MOWNER_ATTACH(&tcp_mowner
);
426 tcpstat_percpu
= percpu_alloc(sizeof(uint64_t) * TCP_NSTATS
);
430 * Create template to be used to send tcp packets on a connection.
431 * Call after host entry created, allocates an mbuf and fills
432 * in a skeletal tcp/ip header, minimizing the amount of work
433 * necessary when the connection is used.
436 tcp_template(struct tcpcb
*tp
)
438 struct inpcb
*inp
= tp
->t_inpcb
;
440 struct in6pcb
*in6p
= tp
->t_in6pcb
;
446 switch (tp
->t_family
) {
448 hlen
= sizeof(struct ip
);
453 /* mapped addr case */
454 if (IN6_IS_ADDR_V4MAPPED(&in6p
->in6p_laddr
)
455 && IN6_IS_ADDR_V4MAPPED(&in6p
->in6p_faddr
))
459 return NULL
; /*EINVAL*/
462 hlen
= sizeof(struct ip6_hdr
);
464 /* more sainty check? */
467 return NULL
; /*EINVAL*/
470 hlen
= 0; /*pacify gcc*/
471 return NULL
; /*EAFNOSUPPORT*/
474 if (hlen
+ sizeof(struct tcphdr
) > MCLBYTES
)
475 panic("mclbytes too small for t_template");
478 if (m
&& m
->m_len
== hlen
+ sizeof(struct tcphdr
))
483 m
= tp
->t_template
= NULL
;
484 MGETHDR(m
, M_DONTWAIT
, MT_HEADER
);
485 if (m
&& hlen
+ sizeof(struct tcphdr
) > MHLEN
) {
486 MCLGET(m
, M_DONTWAIT
);
487 if ((m
->m_flags
& M_EXT
) == 0) {
494 MCLAIM(m
, &tcp_mowner
);
495 m
->m_pkthdr
.len
= m
->m_len
= hlen
+ sizeof(struct tcphdr
);
498 memset(mtod(m
, void *), 0, m
->m_len
);
500 n
= (struct tcphdr
*)(mtod(m
, char *) + hlen
);
502 switch (tp
->t_family
) {
506 mtod(m
, struct ip
*)->ip_v
= 4;
507 mtod(m
, struct ip
*)->ip_hl
= hlen
>> 2;
508 ipov
= mtod(m
, struct ipovly
*);
509 ipov
->ih_pr
= IPPROTO_TCP
;
510 ipov
->ih_len
= htons(sizeof(struct tcphdr
));
512 ipov
->ih_src
= inp
->inp_laddr
;
513 ipov
->ih_dst
= inp
->inp_faddr
;
517 /* mapped addr case */
518 bcopy(&in6p
->in6p_laddr
.s6_addr32
[3], &ipov
->ih_src
,
519 sizeof(ipov
->ih_src
));
520 bcopy(&in6p
->in6p_faddr
.s6_addr32
[3], &ipov
->ih_dst
,
521 sizeof(ipov
->ih_dst
));
525 * Compute the pseudo-header portion of the checksum
526 * now. We incrementally add in the TCP option and
527 * payload lengths later, and then compute the TCP
528 * checksum right before the packet is sent off onto
531 n
->th_sum
= in_cksum_phdr(ipov
->ih_src
.s_addr
,
533 htons(sizeof(struct tcphdr
) + IPPROTO_TCP
));
540 mtod(m
, struct ip
*)->ip_v
= 6;
541 ip6
= mtod(m
, struct ip6_hdr
*);
542 ip6
->ip6_nxt
= IPPROTO_TCP
;
543 ip6
->ip6_plen
= htons(sizeof(struct tcphdr
));
544 ip6
->ip6_src
= in6p
->in6p_laddr
;
545 ip6
->ip6_dst
= in6p
->in6p_faddr
;
546 ip6
->ip6_flow
= in6p
->in6p_flowinfo
& IPV6_FLOWINFO_MASK
;
547 if (ip6_auto_flowlabel
) {
548 ip6
->ip6_flow
&= ~IPV6_FLOWLABEL_MASK
;
550 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK
);
552 ip6
->ip6_vfc
&= ~IPV6_VERSION_MASK
;
553 ip6
->ip6_vfc
|= IPV6_VERSION
;
556 * Compute the pseudo-header portion of the checksum
557 * now. We incrementally add in the TCP option and
558 * payload lengths later, and then compute the TCP
559 * checksum right before the packet is sent off onto
562 n
->th_sum
= in6_cksum_phdr(&in6p
->in6p_laddr
,
563 &in6p
->in6p_faddr
, htonl(sizeof(struct tcphdr
)),
570 n
->th_sport
= inp
->inp_lport
;
571 n
->th_dport
= inp
->inp_fport
;
575 n
->th_sport
= in6p
->in6p_lport
;
576 n
->th_dport
= in6p
->in6p_fport
;
590 * Send a single message to the TCP at address specified by
591 * the given TCP/IP header. If m == 0, then we make a copy
592 * of the tcpiphdr at ti and send directly to the addressed host.
593 * This is used to force keep alive messages out using the TCP
594 * template for a connection tp->t_template. If flags are given
595 * then we send a message back to the TCP which originated the
596 * segment ti, and discard the mbuf containing it and any other
599 * In any case the ack and sequence number of the transmitted
600 * segment are as specified by the parameters.
603 tcp_respond(struct tcpcb
*tp
, struct mbuf
*template, struct mbuf
*m
,
604 struct tcphdr
*th0
, tcp_seq ack
, tcp_seq seq
, int flags
)
610 int error
, tlen
, win
= 0;
616 int family
; /* family on packet, not inpcb/in6pcb! */
620 if (tp
!= NULL
&& (flags
& TH_RST
) == 0) {
622 if (tp
->t_inpcb
&& tp
->t_in6pcb
)
623 panic("tcp_respond: both t_inpcb and t_in6pcb are set");
627 win
= sbspace(&tp
->t_inpcb
->inp_socket
->so_rcv
);
631 win
= sbspace(&tp
->t_in6pcb
->in6p_socket
->so_rcv
);
635 th
= NULL
; /* Quell uninitialized warning */
644 /* get family information from template */
645 switch (mtod(template, struct ip
*)->ip_v
) {
648 hlen
= sizeof(struct ip
);
653 hlen
= sizeof(struct ip6_hdr
);
660 MGETHDR(m
, M_DONTWAIT
, MT_HEADER
);
662 MCLAIM(m
, &tcp_tx_mowner
);
663 MCLGET(m
, M_DONTWAIT
);
664 if ((m
->m_flags
& M_EXT
) == 0) {
677 m
->m_data
+= max_linkhdr
;
678 bcopy(mtod(template, void *), mtod(m
, void *),
682 ip
= mtod(m
, struct ip
*);
683 th
= (struct tcphdr
*)(ip
+ 1);
687 ip6
= mtod(m
, struct ip6_hdr
*);
688 th
= (struct tcphdr
*)(ip6
+ 1);
693 /* noone will visit here */
701 if ((m
->m_flags
& M_PKTHDR
) == 0) {
703 printf("non PKTHDR to tcp_respond\n");
710 panic("th0 == NULL in tcp_respond");
713 /* get family information from m */
714 switch (mtod(m
, struct ip
*)->ip_v
) {
717 hlen
= sizeof(struct ip
);
718 ip
= mtod(m
, struct ip
*);
723 hlen
= sizeof(struct ip6_hdr
);
724 ip6
= mtod(m
, struct ip6_hdr
*);
731 /* clear h/w csum flags inherited from rx packet */
732 m
->m_pkthdr
.csum_flags
= 0;
734 if ((flags
& TH_SYN
) == 0 || sizeof(*th0
) > (th0
->th_off
<< 2))
737 tlen
= th0
->th_off
<< 2;
739 if (m
->m_len
> hlen
+ tlen
&& (m
->m_flags
& M_EXT
) == 0 &&
740 mtod(m
, char *) + hlen
== (char *)th0
) {
741 m
->m_len
= hlen
+ tlen
;
748 if (max_linkhdr
+ hlen
+ tlen
> MCLBYTES
) {
753 MGETHDR(n
, M_DONTWAIT
, MT_HEADER
);
754 if (n
&& max_linkhdr
+ hlen
+ tlen
> MHLEN
) {
755 MCLGET(n
, M_DONTWAIT
);
756 if ((n
->m_flags
& M_EXT
) == 0) {
766 MCLAIM(n
, &tcp_tx_mowner
);
767 n
->m_data
+= max_linkhdr
;
768 n
->m_len
= hlen
+ tlen
;
769 m_copyback(n
, 0, hlen
, mtod(m
, void *));
770 m_copyback(n
, hlen
, tlen
, (void *)th0
);
777 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
780 ip
= mtod(m
, struct ip
*);
781 th
= (struct tcphdr
*)(ip
+ 1);
782 ip
->ip_p
= IPPROTO_TCP
;
783 xchg(ip
->ip_dst
, ip
->ip_src
, struct in_addr
);
784 ip
->ip_p
= IPPROTO_TCP
;
788 ip6
= mtod(m
, struct ip6_hdr
*);
789 th
= (struct tcphdr
*)(ip6
+ 1);
790 ip6
->ip6_nxt
= IPPROTO_TCP
;
791 xchg(ip6
->ip6_dst
, ip6
->ip6_src
, struct in6_addr
);
792 ip6
->ip6_nxt
= IPPROTO_TCP
;
797 /* noone will visit here */
802 xchg(th
->th_dport
, th
->th_sport
, u_int16_t
);
804 tlen
= 0; /*be friendly with the following code*/
806 th
->th_seq
= htonl(seq
);
807 th
->th_ack
= htonl(ack
);
809 if ((flags
& TH_SYN
) == 0) {
811 win
>>= tp
->rcv_scale
;
812 if (win
> TCP_MAXWIN
)
814 th
->th_win
= htons((u_int16_t
)win
);
815 th
->th_off
= sizeof (struct tcphdr
) >> 2;
818 tlen
+= th
->th_off
<< 2;
819 m
->m_len
= hlen
+ tlen
;
820 m
->m_pkthdr
.len
= hlen
+ tlen
;
821 m
->m_pkthdr
.rcvif
= (struct ifnet
*) 0;
822 th
->th_flags
= flags
;
829 struct ipovly
*ipov
= (struct ipovly
*)ip
;
830 memset(ipov
->ih_x1
, 0, sizeof ipov
->ih_x1
);
831 ipov
->ih_len
= htons((u_int16_t
)tlen
);
834 th
->th_sum
= in_cksum(m
, hlen
+ tlen
);
835 ip
->ip_len
= htons(hlen
+ tlen
);
836 ip
->ip_ttl
= ip_defttl
;
844 th
->th_sum
= in6_cksum(m
, IPPROTO_TCP
, sizeof(struct ip6_hdr
),
846 ip6
->ip6_plen
= htons(tlen
);
847 if (tp
&& tp
->t_in6pcb
) {
849 ro
= &tp
->t_in6pcb
->in6p_route
;
850 oifp
= (rt
= rtcache_validate(ro
)) != NULL
? rt
->rt_ifp
852 ip6
->ip6_hlim
= in6_selecthlim(tp
->t_in6pcb
, oifp
);
854 ip6
->ip6_hlim
= ip6_defhlim
;
855 ip6
->ip6_flow
&= ~IPV6_FLOWINFO_MASK
;
856 if (ip6_auto_flowlabel
) {
858 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK
);
865 if (tp
&& tp
->t_inpcb
)
866 so
= tp
->t_inpcb
->inp_socket
;
868 else if (tp
&& tp
->t_in6pcb
)
869 so
= tp
->t_in6pcb
->in6p_socket
;
874 if (tp
!= NULL
&& tp
->t_inpcb
!= NULL
) {
875 ro
= &tp
->t_inpcb
->inp_route
;
877 if (family
!= AF_INET
)
878 panic("tcp_respond: address family mismatch");
879 if (!in_hosteq(ip
->ip_dst
, tp
->t_inpcb
->inp_faddr
)) {
880 panic("tcp_respond: ip_dst %x != inp_faddr %x",
881 ntohl(ip
->ip_dst
.s_addr
),
882 ntohl(tp
->t_inpcb
->inp_faddr
.s_addr
));
887 else if (tp
!= NULL
&& tp
->t_in6pcb
!= NULL
) {
888 ro
= (struct route
*)&tp
->t_in6pcb
->in6p_route
;
890 if (family
== AF_INET
) {
891 if (!IN6_IS_ADDR_V4MAPPED(&tp
->t_in6pcb
->in6p_faddr
))
892 panic("tcp_respond: not mapped addr");
893 if (memcmp(&ip
->ip_dst
,
894 &tp
->t_in6pcb
->in6p_faddr
.s6_addr32
[3],
895 sizeof(ip
->ip_dst
)) != 0) {
896 panic("tcp_respond: ip_dst != in6p_faddr");
898 } else if (family
== AF_INET6
) {
899 if (!IN6_ARE_ADDR_EQUAL(&ip6
->ip6_dst
,
900 &tp
->t_in6pcb
->in6p_faddr
))
901 panic("tcp_respond: ip6_dst != in6p_faddr");
903 panic("tcp_respond: address family mismatch");
913 error
= ip_output(m
, NULL
, ro
,
914 (tp
&& tp
->t_mtudisc
? IP_MTUDISC
: 0),
915 (struct ip_moptions
*)0, so
);
920 error
= ip6_output(m
, NULL
, ro
, 0, NULL
, so
, NULL
);
924 error
= EAFNOSUPPORT
;
932 * Template TCPCB. Rather than zeroing a new TCPCB and initializing
933 * a bunch of members individually, we maintain this template for the
934 * static and mostly-static components of the TCPCB, and copy it into
935 * the new TCPCB instead.
937 static struct tcpcb tcpcb_template
= {
938 .t_srtt
= TCPTV_SRTTBASE
,
939 .t_rttmin
= TCPTV_MIN
,
941 .snd_cwnd
= TCP_MAXWIN
<< TCP_MAX_WINSHIFT
,
942 .snd_ssthresh
= TCP_MAXWIN
<< TCP_MAX_WINSHIFT
,
950 * Updates the TCPCB template whenever a parameter that would affect
951 * the template is changed.
954 tcp_tcpcb_template(void)
956 struct tcpcb
*tp
= &tcpcb_template
;
959 tp
->t_peermss
= tcp_mssdflt
;
960 tp
->t_ourmss
= tcp_mssdflt
;
961 tp
->t_segsz
= tcp_mssdflt
;
964 if (tcp_do_rfc1323
&& tcp_do_win_scale
)
965 flags
|= TF_REQ_SCALE
;
966 if (tcp_do_rfc1323
&& tcp_do_timestamps
)
967 flags
|= TF_REQ_TSTMP
;
971 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
972 * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives
973 * reasonable initial retransmit time.
975 tp
->t_rttvar
= tcp_rttdflt
* PR_SLOWHZ
<< (TCP_RTTVAR_SHIFT
+ 2 - 1);
976 TCPT_RANGESET(tp
->t_rxtcur
, TCP_REXMTVAL(tp
),
977 TCPTV_MIN
, TCPTV_REXMTMAX
);
980 tp
->t_keepinit
= tcp_keepinit
;
981 tp
->t_keepidle
= tcp_keepidle
;
982 tp
->t_keepintvl
= tcp_keepintvl
;
983 tp
->t_keepcnt
= tcp_keepcnt
;
984 tp
->t_maxidle
= tp
->t_keepcnt
* tp
->t_keepintvl
;
988 * Create a new TCP control block, making an
989 * empty reassembly queue and hooking it to the argument
990 * protocol control block.
992 /* family selects inpcb, or in6pcb */
994 tcp_newtcpcb(int family
, void *aux
)
1002 /* XXX Consider using a pool_cache for speed. */
1003 tp
= pool_get(&tcpcb_pool
, PR_NOWAIT
); /* splsoftnet via tcp_usrreq */
1006 memcpy(tp
, &tcpcb_template
, sizeof(*tp
));
1007 TAILQ_INIT(&tp
->segq
);
1008 TAILQ_INIT(&tp
->timeq
);
1009 tp
->t_family
= family
; /* may be overridden later on */
1010 TAILQ_INIT(&tp
->snd_holes
);
1011 LIST_INIT(&tp
->t_sc
); /* XXX can template this */
1013 /* Don't sweat this loop; hopefully the compiler will unroll it. */
1014 for (i
= 0; i
< TCPT_NTIMERS
; i
++) {
1015 callout_init(&tp
->t_timer
[i
], CALLOUT_MPSAFE
);
1016 TCP_TIMER_INIT(tp
, i
);
1018 callout_init(&tp
->t_delack_ch
, CALLOUT_MPSAFE
);
1023 struct inpcb
*inp
= (struct inpcb
*)aux
;
1025 inp
->inp_ip
.ip_ttl
= ip_defttl
;
1026 inp
->inp_ppcb
= (void *)tp
;
1029 tp
->t_mtudisc
= ip_mtudisc
;
1035 struct in6pcb
*in6p
= (struct in6pcb
*)aux
;
1037 in6p
->in6p_ip6
.ip6_hlim
= in6_selecthlim(in6p
,
1038 (rt
= rtcache_validate(&in6p
->in6p_route
)) != NULL
1041 in6p
->in6p_ppcb
= (void *)tp
;
1043 tp
->t_in6pcb
= in6p
;
1044 /* for IPv6, always try to run path MTU discovery */
1050 for (i
= 0; i
< TCPT_NTIMERS
; i
++)
1051 callout_destroy(&tp
->t_timer
[i
]);
1052 callout_destroy(&tp
->t_delack_ch
);
1053 pool_put(&tcpcb_pool
, tp
); /* splsoftnet via tcp_usrreq */
1058 * Initialize our timebase. When we send timestamps, we take
1059 * the delta from tcp_now -- this means each connection always
1060 * gets a timebase of 1, which makes it, among other things,
1061 * more difficult to determine how long a system has been up,
1062 * and thus how many TCP sequence increments have occurred.
1064 * We start with 1, because 0 doesn't work with linux, which
1065 * considers timestamp 0 in a SYN packet as a bug and disables
1068 tp
->ts_timebase
= tcp_now
- 1;
1070 tcp_congctl_select(tp
, tcp_congctl_global_name
);
1076 * Drop a TCP connection, reporting
1077 * the specified error. If connection is synchronized,
1078 * then send a RST to peer.
1081 tcp_drop(struct tcpcb
*tp
, int errno
)
1083 struct socket
*so
= NULL
;
1086 if (tp
->t_inpcb
&& tp
->t_in6pcb
)
1087 panic("tcp_drop: both t_inpcb and t_in6pcb are set");
1091 so
= tp
->t_inpcb
->inp_socket
;
1095 so
= tp
->t_in6pcb
->in6p_socket
;
1100 if (TCPS_HAVERCVDSYN(tp
->t_state
)) {
1101 tp
->t_state
= TCPS_CLOSED
;
1102 (void) tcp_output(tp
);
1103 TCP_STATINC(TCP_STAT_DROPS
);
1105 TCP_STATINC(TCP_STAT_CONNDROPS
);
1106 if (errno
== ETIMEDOUT
&& tp
->t_softerror
)
1107 errno
= tp
->t_softerror
;
1108 so
->so_error
= errno
;
1109 return (tcp_close(tp
));
1113 * Close a TCP control block:
1114 * discard all space held by the tcp
1115 * discard internet protocol block
1116 * wake up any sleepers
1119 tcp_close(struct tcpcb
*tp
)
1123 struct in6pcb
*in6p
;
1134 in6p
= tp
->t_in6pcb
;
1139 so
= inp
->inp_socket
;
1140 ro
= &inp
->inp_route
;
1144 so
= in6p
->in6p_socket
;
1145 ro
= (struct route
*)&in6p
->in6p_route
;
1151 * If we sent enough data to get some meaningful characteristics,
1152 * save them in the routing entry. 'Enough' is arbitrarily
1153 * defined as the sendpipesize (default 4K) * 16. This would
1154 * give us 16 rtt samples assuming we only get one sample per
1155 * window (the usual case on a long haul net). 16 samples is
1156 * enough for the srtt filter to converge to within 5% of the correct
1157 * value; fewer samples and we could save a very bogus rtt.
1159 * Don't update the default route's characteristics and don't
1160 * update anything that the user "locked".
1162 if (SEQ_LT(tp
->iss
+ so
->so_snd
.sb_hiwat
* 16, tp
->snd_max
) &&
1163 ro
&& (rt
= rtcache_validate(ro
)) != NULL
&&
1164 !in_nullhost(satocsin(rt_getkey(rt
))->sin_addr
)) {
1167 if ((rt
->rt_rmx
.rmx_locks
& RTV_RTT
) == 0) {
1169 ((RTM_RTTUNIT
/ PR_SLOWHZ
) >> (TCP_RTT_SHIFT
+ 2));
1170 if (rt
->rt_rmx
.rmx_rtt
&& i
)
1172 * filter this update to half the old & half
1173 * the new values, converting scale.
1174 * See route.h and tcp_var.h for a
1175 * description of the scaling constants.
1177 rt
->rt_rmx
.rmx_rtt
=
1178 (rt
->rt_rmx
.rmx_rtt
+ i
) / 2;
1180 rt
->rt_rmx
.rmx_rtt
= i
;
1182 if ((rt
->rt_rmx
.rmx_locks
& RTV_RTTVAR
) == 0) {
1184 ((RTM_RTTUNIT
/ PR_SLOWHZ
) >> (TCP_RTTVAR_SHIFT
+ 2));
1185 if (rt
->rt_rmx
.rmx_rttvar
&& i
)
1186 rt
->rt_rmx
.rmx_rttvar
=
1187 (rt
->rt_rmx
.rmx_rttvar
+ i
) / 2;
1189 rt
->rt_rmx
.rmx_rttvar
= i
;
1192 * update the pipelimit (ssthresh) if it has been updated
1193 * already or if a pipesize was specified & the threshhold
1194 * got below half the pipesize. I.e., wait for bad news
1195 * before we start updating, then update on both good
1198 if (((rt
->rt_rmx
.rmx_locks
& RTV_SSTHRESH
) == 0 &&
1199 (i
= tp
->snd_ssthresh
) && rt
->rt_rmx
.rmx_ssthresh
) ||
1200 i
< (rt
->rt_rmx
.rmx_sendpipe
/ 2)) {
1202 * convert the limit from user data bytes to
1203 * packets then to packet data bytes.
1205 i
= (i
+ tp
->t_segsz
/ 2) / tp
->t_segsz
;
1208 i
*= (u_long
)(tp
->t_segsz
+ sizeof (struct tcpiphdr
));
1209 if (rt
->rt_rmx
.rmx_ssthresh
)
1210 rt
->rt_rmx
.rmx_ssthresh
=
1211 (rt
->rt_rmx
.rmx_ssthresh
+ i
) / 2;
1213 rt
->rt_rmx
.rmx_ssthresh
= i
;
1216 #endif /* RTV_RTT */
1217 /* free the reassembly queue, if any */
1219 (void) tcp_freeq(tp
);
1220 TCP_REASS_UNLOCK(tp
);
1222 /* free the SACK holes list. */
1223 tcp_free_sackholes(tp
);
1224 tcp_congctl_release(tp
);
1225 syn_cache_cleanup(tp
);
1227 if (tp
->t_template
) {
1228 m_free(tp
->t_template
);
1229 tp
->t_template
= NULL
;
1233 * Detaching the pcb will unlock the socket/tcpcb, and stopping
1234 * the timers can also drop the lock. We need to prevent access
1235 * to the tcpcb as it's half torn down. Flag the pcb as dead
1236 * (prevents access by timers) and only then detach it.
1238 tp
->t_flags
|= TF_DEAD
;
1241 soisdisconnected(so
);
1246 in6p
->in6p_ppcb
= 0;
1247 soisdisconnected(so
);
1248 in6_pcbdetach(in6p
);
1252 * pcb is no longer visble elsewhere, so we can safely release
1253 * the lock in callout_halt() if needed.
1255 TCP_STATINC(TCP_STAT_CLOSED
);
1256 for (j
= 0; j
< TCPT_NTIMERS
; j
++) {
1257 callout_halt(&tp
->t_timer
[j
], softnet_lock
);
1258 callout_destroy(&tp
->t_timer
[j
]);
1260 callout_halt(&tp
->t_delack_ch
, softnet_lock
);
1261 callout_destroy(&tp
->t_delack_ch
);
1262 pool_put(&tcpcb_pool
, tp
);
1264 return ((struct tcpcb
*)0);
1268 tcp_freeq(struct tcpcb
*tp
)
1272 #ifdef TCPREASS_DEBUG
1276 TCP_REASS_LOCK_CHECK(tp
);
1278 while ((qe
= TAILQ_FIRST(&tp
->segq
)) != NULL
) {
1279 #ifdef TCPREASS_DEBUG
1280 printf("tcp_freeq[%p,%d]: %u:%u(%u) 0x%02x\n",
1281 tp
, i
++, qe
->ipqe_seq
, qe
->ipqe_seq
+ qe
->ipqe_len
,
1282 qe
->ipqe_len
, qe
->ipqe_flags
& (TH_SYN
|TH_FIN
|TH_RST
));
1284 TAILQ_REMOVE(&tp
->segq
, qe
, ipqe_q
);
1285 TAILQ_REMOVE(&tp
->timeq
, qe
, ipqe_timeq
);
1286 m_freem(qe
->ipqe_m
);
1291 KASSERT(TAILQ_EMPTY(&tp
->timeq
));
1296 * Protocol drain routine. Called when memory is in short supply.
1297 * Don't acquire softnet_lock as can be called from hardware
1298 * interrupt handler.
1303 struct inpcb_hdr
*inph
;
1306 KERNEL_LOCK(1, NULL
);
1309 * Free the sequence queue of all TCP connections.
1311 CIRCLEQ_FOREACH(inph
, &tcbtable
.inpt_queue
, inph_queue
) {
1312 switch (inph
->inph_af
) {
1314 tp
= intotcpcb((struct inpcb
*)inph
);
1318 tp
= in6totcpcb((struct in6pcb
*)inph
);
1327 * We may be called from a device's interrupt
1328 * context. If the tcpcb is already busy,
1329 * just bail out now.
1331 if (tcp_reass_lock_try(tp
) == 0)
1334 TCP_STATINC(TCP_STAT_CONNSDRAINED
);
1335 TCP_REASS_UNLOCK(tp
);
1339 KERNEL_UNLOCK_ONE(NULL
);
1343 * Notify a tcp user of an asynchronous error;
1344 * store error as soft error, but wake up user
1345 * (for now, won't do anything until can select for soft error).
1348 tcp_notify(struct inpcb
*inp
, int error
)
1350 struct tcpcb
*tp
= (struct tcpcb
*)inp
->inp_ppcb
;
1351 struct socket
*so
= inp
->inp_socket
;
1354 * Ignore some errors if we are hooked up.
1355 * If connection hasn't completed, has retransmitted several times,
1356 * and receives a second error, give up now. This is better
1357 * than waiting a long time to establish a connection that
1358 * can never complete.
1360 if (tp
->t_state
== TCPS_ESTABLISHED
&&
1361 (error
== EHOSTUNREACH
|| error
== ENETUNREACH
||
1362 error
== EHOSTDOWN
)) {
1364 } else if (TCPS_HAVEESTABLISHED(tp
->t_state
) == 0 &&
1365 tp
->t_rxtshift
> 3 && tp
->t_softerror
)
1366 so
->so_error
= error
;
1368 tp
->t_softerror
= error
;
1369 cv_broadcast(&so
->so_cv
);
1376 tcp6_notify(struct in6pcb
*in6p
, int error
)
1378 struct tcpcb
*tp
= (struct tcpcb
*)in6p
->in6p_ppcb
;
1379 struct socket
*so
= in6p
->in6p_socket
;
1382 * Ignore some errors if we are hooked up.
1383 * If connection hasn't completed, has retransmitted several times,
1384 * and receives a second error, give up now. This is better
1385 * than waiting a long time to establish a connection that
1386 * can never complete.
1388 if (tp
->t_state
== TCPS_ESTABLISHED
&&
1389 (error
== EHOSTUNREACH
|| error
== ENETUNREACH
||
1390 error
== EHOSTDOWN
)) {
1392 } else if (TCPS_HAVEESTABLISHED(tp
->t_state
) == 0 &&
1393 tp
->t_rxtshift
> 3 && tp
->t_softerror
)
1394 so
->so_error
= error
;
1396 tp
->t_softerror
= error
;
1397 cv_broadcast(&so
->so_cv
);
1405 tcp6_ctlinput(int cmd
, const struct sockaddr
*sa
, void *d
)
1408 void (*notify
)(struct in6pcb
*, int) = tcp6_notify
;
1410 struct ip6_hdr
*ip6
;
1411 const struct sockaddr_in6
*sa6_src
= NULL
;
1412 const struct sockaddr_in6
*sa6
= (const struct sockaddr_in6
*)sa
;
1416 if (sa
->sa_family
!= AF_INET6
||
1417 sa
->sa_len
!= sizeof(struct sockaddr_in6
))
1419 if ((unsigned)cmd
>= PRC_NCMDS
)
1421 else if (cmd
== PRC_QUENCH
) {
1423 * Don't honor ICMP Source Quench messages meant for
1427 } else if (PRC_IS_REDIRECT(cmd
))
1428 notify
= in6_rtchange
, d
= NULL
;
1429 else if (cmd
== PRC_MSGSIZE
)
1430 ; /* special code is present, see below */
1431 else if (cmd
== PRC_HOSTDEAD
)
1433 else if (inet6ctlerrmap
[cmd
] == 0)
1436 /* if the parameter is from icmp6, decode it. */
1438 struct ip6ctlparam
*ip6cp
= (struct ip6ctlparam
*)d
;
1440 ip6
= ip6cp
->ip6c_ip6
;
1441 off
= ip6cp
->ip6c_off
;
1442 sa6_src
= ip6cp
->ip6c_src
;
1452 * XXX: We assume that when ip6 is non NULL,
1453 * M and OFF are valid.
1456 /* check if we can safely examine src and dst ports */
1457 if (m
->m_pkthdr
.len
< off
+ sizeof(th
)) {
1458 if (cmd
== PRC_MSGSIZE
)
1459 icmp6_mtudisc_update((struct ip6ctlparam
*)d
, 0);
1463 memset(&th
, 0, sizeof(th
));
1464 m_copydata(m
, off
, sizeof(th
), (void *)&th
);
1466 if (cmd
== PRC_MSGSIZE
) {
1470 * Check to see if we have a valid TCP connection
1471 * corresponding to the address in the ICMPv6 message
1474 if (in6_pcblookup_connect(&tcbtable
, &sa6
->sin6_addr
,
1476 (const struct in6_addr
*)&sa6_src
->sin6_addr
,
1481 * Depending on the value of "valid" and routing table
1482 * size (mtudisc_{hi,lo}wat), we will:
1483 * - recalcurate the new MTU and create the
1484 * corresponding routing entry, or
1485 * - ignore the MTU change notification.
1487 icmp6_mtudisc_update((struct ip6ctlparam
*)d
, valid
);
1490 * no need to call in6_pcbnotify, it should have been
1491 * called via callback if necessary
1496 nmatch
= in6_pcbnotify(&tcbtable
, sa
, th
.th_dport
,
1497 (const struct sockaddr
*)sa6_src
, th
.th_sport
, cmd
, NULL
, notify
);
1498 if (nmatch
== 0 && syn_cache_count
&&
1499 (inet6ctlerrmap
[cmd
] == EHOSTUNREACH
||
1500 inet6ctlerrmap
[cmd
] == ENETUNREACH
||
1501 inet6ctlerrmap
[cmd
] == EHOSTDOWN
))
1502 syn_cache_unreach((const struct sockaddr
*)sa6_src
,
1505 (void) in6_pcbnotify(&tcbtable
, sa
, 0,
1506 (const struct sockaddr
*)sa6_src
, 0, cmd
, NULL
, notify
);
1514 /* assumes that ip header and tcp header are contiguous on mbuf */
1516 tcp_ctlinput(int cmd
, const struct sockaddr
*sa
, void *v
)
1521 extern const int inetctlerrmap
[];
1522 void (*notify
)(struct inpcb
*, int) = tcp_notify
;
1530 struct in6pcb
*in6p
;
1531 struct in6_addr src6
, dst6
;
1534 if (sa
->sa_family
!= AF_INET
||
1535 sa
->sa_len
!= sizeof(struct sockaddr_in
))
1537 if ((unsigned)cmd
>= PRC_NCMDS
)
1539 errno
= inetctlerrmap
[cmd
];
1540 if (cmd
== PRC_QUENCH
)
1542 * Don't honor ICMP Source Quench messages meant for
1546 else if (PRC_IS_REDIRECT(cmd
))
1547 notify
= in_rtchange
, ip
= 0;
1548 else if (cmd
== PRC_MSGSIZE
&& ip
&& ip
->ip_v
== 4) {
1550 * Check to see if we have a valid TCP connection
1551 * corresponding to the address in the ICMP message
1554 * Boundary check is made in icmp_input(), with ICMP_ADVLENMIN.
1556 th
= (struct tcphdr
*)((char *)ip
+ (ip
->ip_hl
<< 2));
1558 memset(&src6
, 0, sizeof(src6
));
1559 memset(&dst6
, 0, sizeof(dst6
));
1560 src6
.s6_addr16
[5] = dst6
.s6_addr16
[5] = 0xffff;
1561 memcpy(&src6
.s6_addr32
[3], &ip
->ip_src
, sizeof(struct in_addr
));
1562 memcpy(&dst6
.s6_addr32
[3], &ip
->ip_dst
, sizeof(struct in_addr
));
1564 if ((inp
= in_pcblookup_connect(&tcbtable
, ip
->ip_dst
,
1565 th
->th_dport
, ip
->ip_src
, th
->th_sport
)) != NULL
)
1572 else if ((in6p
= in6_pcblookup_connect(&tcbtable
, &dst6
,
1573 th
->th_dport
, &src6
, th
->th_sport
, 0)) != NULL
)
1580 * Now that we've validated that we are actually communicating
1581 * with the host indicated in the ICMP message, locate the
1582 * ICMP header, recalculate the new MTU, and create the
1583 * corresponding routing entry.
1585 icp
= (struct icmp
*)((char *)ip
-
1586 offsetof(struct icmp
, icmp_ip
));
1588 if ((tp
= intotcpcb(inp
)) == NULL
)
1593 if ((tp
= in6totcpcb(in6p
)) == NULL
)
1599 seq
= ntohl(th
->th_seq
);
1600 if (SEQ_LT(seq
, tp
->snd_una
) || SEQ_GT(seq
, tp
->snd_max
))
1603 * If the ICMP message advertises a Next-Hop MTU
1604 * equal or larger than the maximum packet size we have
1605 * ever sent, drop the message.
1607 mtu
= (u_int
)ntohs(icp
->icmp_nextmtu
);
1608 if (mtu
>= tp
->t_pmtud_mtu_sent
)
1610 if (mtu
>= tcp_hdrsz(tp
) + tp
->t_pmtud_mss_acked
) {
1612 * Calculate new MTU, and create corresponding
1613 * route (traditional PMTUD).
1615 tp
->t_flags
&= ~TF_PMTUD_PEND
;
1616 icmp_mtudisc(icp
, ip
->ip_dst
);
1619 * Record the information got in the ICMP
1620 * message; act on it later.
1621 * If we had already recorded an ICMP message,
1622 * replace the old one only if the new message
1623 * refers to an older TCP segment
1625 if (tp
->t_flags
& TF_PMTUD_PEND
) {
1626 if (SEQ_LT(tp
->t_pmtud_th_seq
, seq
))
1629 tp
->t_flags
|= TF_PMTUD_PEND
;
1630 tp
->t_pmtud_th_seq
= seq
;
1631 tp
->t_pmtud_nextmtu
= icp
->icmp_nextmtu
;
1632 tp
->t_pmtud_ip_len
= icp
->icmp_ip
.ip_len
;
1633 tp
->t_pmtud_ip_hl
= icp
->icmp_ip
.ip_hl
;
1636 } else if (cmd
== PRC_HOSTDEAD
)
1638 else if (errno
== 0)
1640 if (ip
&& ip
->ip_v
== 4 && sa
->sa_family
== AF_INET
) {
1641 th
= (struct tcphdr
*)((char *)ip
+ (ip
->ip_hl
<< 2));
1642 nmatch
= in_pcbnotify(&tcbtable
, satocsin(sa
)->sin_addr
,
1643 th
->th_dport
, ip
->ip_src
, th
->th_sport
, errno
, notify
);
1644 if (nmatch
== 0 && syn_cache_count
&&
1645 (inetctlerrmap
[cmd
] == EHOSTUNREACH
||
1646 inetctlerrmap
[cmd
] == ENETUNREACH
||
1647 inetctlerrmap
[cmd
] == EHOSTDOWN
)) {
1648 struct sockaddr_in sin
;
1649 memset(&sin
, 0, sizeof(sin
));
1650 sin
.sin_len
= sizeof(sin
);
1651 sin
.sin_family
= AF_INET
;
1652 sin
.sin_port
= th
->th_sport
;
1653 sin
.sin_addr
= ip
->ip_src
;
1654 syn_cache_unreach((struct sockaddr
*)&sin
, sa
, th
);
1657 /* XXX mapped address case */
1659 in_pcbnotifyall(&tcbtable
, satocsin(sa
)->sin_addr
, errno
,
1665 * When a source quench is received, we are being notified of congestion.
1666 * Close the congestion window down to the Loss Window (one segment).
1667 * We will gradually open it again as we proceed.
1670 tcp_quench(struct inpcb
*inp
, int errno
)
1672 struct tcpcb
*tp
= intotcpcb(inp
);
1675 tp
->snd_cwnd
= tp
->t_segsz
;
1676 tp
->t_bytes_acked
= 0;
1683 tcp6_quench(struct in6pcb
*in6p
, int errno
)
1685 struct tcpcb
*tp
= in6totcpcb(in6p
);
1688 tp
->snd_cwnd
= tp
->t_segsz
;
1689 tp
->t_bytes_acked
= 0;
1696 * Path MTU Discovery handlers.
1699 tcp_mtudisc_callback(struct in_addr faddr
)
1702 struct in6_addr in6
;
1705 in_pcbnotifyall(&tcbtable
, faddr
, EMSGSIZE
, tcp_mtudisc
);
1707 memset(&in6
, 0, sizeof(in6
));
1708 in6
.s6_addr16
[5] = 0xffff;
1709 memcpy(&in6
.s6_addr32
[3], &faddr
, sizeof(struct in_addr
));
1710 tcp6_mtudisc_callback(&in6
);
1715 * On receipt of path MTU corrections, flush old route and replace it
1716 * with the new one. Retransmit all unacknowledged packets, to ensure
1717 * that all packets will be received.
1720 tcp_mtudisc(struct inpcb
*inp
, int errno
)
1722 struct tcpcb
*tp
= intotcpcb(inp
);
1723 struct rtentry
*rt
= in_pcbrtentry(inp
);
1728 * If this was not a host route, remove and realloc.
1730 if ((rt
->rt_flags
& RTF_HOST
) == 0) {
1731 in_rtchange(inp
, errno
);
1732 if ((rt
= in_pcbrtentry(inp
)) == 0)
1737 * Slow start out of the error condition. We
1738 * use the MTU because we know it's smaller
1739 * than the previously transmitted segment.
1741 * Note: This is more conservative than the
1742 * suggestion in draft-floyd-incr-init-win-03.
1744 if (rt
->rt_rmx
.rmx_mtu
!= 0)
1746 TCP_INITIAL_WINDOW(tcp_init_win
,
1747 rt
->rt_rmx
.rmx_mtu
);
1751 * Resend unacknowledged packets.
1753 tp
->snd_nxt
= tp
->sack_newdata
= tp
->snd_una
;
1761 * Path MTU Discovery handlers.
1764 tcp6_mtudisc_callback(struct in6_addr
*faddr
)
1766 struct sockaddr_in6 sin6
;
1768 memset(&sin6
, 0, sizeof(sin6
));
1769 sin6
.sin6_family
= AF_INET6
;
1770 sin6
.sin6_len
= sizeof(struct sockaddr_in6
);
1771 sin6
.sin6_addr
= *faddr
;
1772 (void) in6_pcbnotify(&tcbtable
, (struct sockaddr
*)&sin6
, 0,
1773 (const struct sockaddr
*)&sa6_any
, 0, PRC_MSGSIZE
, NULL
, tcp6_mtudisc
);
1777 tcp6_mtudisc(struct in6pcb
*in6p
, int errno
)
1779 struct tcpcb
*tp
= in6totcpcb(in6p
);
1780 struct rtentry
*rt
= in6_pcbrtentry(in6p
);
1785 * If this was not a host route, remove and realloc.
1787 if ((rt
->rt_flags
& RTF_HOST
) == 0) {
1788 in6_rtchange(in6p
, errno
);
1789 if ((rt
= in6_pcbrtentry(in6p
)) == 0)
1794 * Slow start out of the error condition. We
1795 * use the MTU because we know it's smaller
1796 * than the previously transmitted segment.
1798 * Note: This is more conservative than the
1799 * suggestion in draft-floyd-incr-init-win-03.
1801 if (rt
->rt_rmx
.rmx_mtu
!= 0)
1803 TCP_INITIAL_WINDOW(tcp_init_win
,
1804 rt
->rt_rmx
.rmx_mtu
);
1808 * Resend unacknowledged packets.
1810 tp
->snd_nxt
= tp
->sack_newdata
= tp
->snd_una
;
1817 * Compute the MSS to advertise to the peer. Called only during
1818 * the 3-way handshake. If we are the server (peer initiated
1819 * connection), we are called with a pointer to the interface
1820 * on which the SYN packet arrived. If we are the client (we
1821 * initiated connection), we are called with a pointer to the
1822 * interface out which this connection should go.
1824 * NOTE: Do not subtract IP option/extension header size nor IPsec
1825 * header size from MSS advertisement. MSS option must hold the maximum
1826 * segment size we can accept, so it must always be:
1827 * max(if mtu) - ip header - tcp header
1830 tcp_mss_to_advertise(const struct ifnet
*ifp
, int af
)
1832 extern u_long in_maxmtu
;
1837 * In order to avoid defeating path MTU discovery on the peer,
1838 * we advertise the max MTU of all attached networks as our MSS,
1839 * per RFC 1191, section 3.1.
1841 * We provide the option to advertise just the MTU of
1842 * the interface on which we hope this connection will
1843 * be receiving. If we are responding to a SYN, we
1844 * will have a pretty good idea about this, but when
1845 * initiating a connection there is a bit more doubt.
1847 * We also need to ensure that loopback has a large enough
1848 * MSS, as the loopback MTU is never included in in_maxmtu.
1858 mss
= IN6_LINKMTU(ifp
);
1863 if (tcp_mss_ifmtu
== 0)
1866 mss
= max(in_maxmtu
, mss
);
1870 mss
= max(in6_maxmtu
, mss
);
1877 hdrsiz
= sizeof(struct ip
);
1881 hdrsiz
= sizeof(struct ip6_hdr
);
1888 hdrsiz
+= sizeof(struct tcphdr
);
1892 mss
= max(tcp_mssdflt
, mss
);
1897 * Set connection variables based on the peer's advertised MSS.
1898 * We are passed the TCPCB for the actual connection. If we
1899 * are the server, we are called by the compressed state engine
1900 * when the 3-way handshake is complete. If we are the client,
1901 * we are called when we receive the SYN,ACK from the server.
1903 * NOTE: Our advertised MSS value must be initialized in the TCPCB
1904 * before this routine is called!
1907 tcp_mss_from_peer(struct tcpcb
*tp
, int offer
)
1910 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
1917 if (tp
->t_inpcb
&& tp
->t_in6pcb
)
1918 panic("tcp_mss_from_peer: both t_inpcb and t_in6pcb are set");
1924 so
= tp
->t_inpcb
->inp_socket
;
1925 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
1926 rt
= in_pcbrtentry(tp
->t_inpcb
);
1932 so
= tp
->t_in6pcb
->in6p_socket
;
1933 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
1934 rt
= in6_pcbrtentry(tp
->t_in6pcb
);
1940 * As per RFC1122, use the default MSS value, unless they
1941 * sent us an offer. Do not accept offers less than 256 bytes.
1946 mss
= max(mss
, 256); /* sanity */
1947 tp
->t_peermss
= mss
;
1948 mss
-= tcp_optlen(tp
);
1951 mss
-= ip_optlen(tp
->t_inpcb
);
1955 mss
-= ip6_optlen(tp
->t_in6pcb
);
1959 * If there's a pipesize, change the socket buffer to that size.
1960 * Make the socket buffer an integral number of MSS units. If
1961 * the MSS is larger than the socket buffer, artificially decrease
1965 if (rt
!= NULL
&& rt
->rt_rmx
.rmx_sendpipe
!= 0)
1966 bufsize
= rt
->rt_rmx
.rmx_sendpipe
;
1970 KASSERT(so
!= NULL
);
1971 bufsize
= so
->so_snd
.sb_hiwat
;
1976 bufsize
= roundup(bufsize
, mss
);
1977 if (bufsize
> sb_max
)
1979 (void) sbreserve(&so
->so_snd
, bufsize
, so
);
1984 if (rt
!= NULL
&& rt
->rt_rmx
.rmx_ssthresh
) {
1986 * There's some sort of gateway or interface buffer
1987 * limit on the path. Use this to set the slow
1988 * start threshold, but set the threshold to no less
1991 tp
->snd_ssthresh
= max(2 * mss
, rt
->rt_rmx
.rmx_ssthresh
);
1997 * Processing necessary when a TCP connection is established.
2000 tcp_established(struct tcpcb
*tp
)
2009 if (tp
->t_inpcb
&& tp
->t_in6pcb
)
2010 panic("tcp_established: both t_inpcb and t_in6pcb are set");
2016 so
= tp
->t_inpcb
->inp_socket
;
2017 #if defined(RTV_RPIPE)
2018 rt
= in_pcbrtentry(tp
->t_inpcb
);
2024 so
= tp
->t_in6pcb
->in6p_socket
;
2025 #if defined(RTV_RPIPE)
2026 rt
= in6_pcbrtentry(tp
->t_in6pcb
);
2031 tp
->t_state
= TCPS_ESTABLISHED
;
2032 TCP_TIMER_ARM(tp
, TCPT_KEEP
, tp
->t_keepidle
);
2035 if (rt
!= NULL
&& rt
->rt_rmx
.rmx_recvpipe
!= 0)
2036 bufsize
= rt
->rt_rmx
.rmx_recvpipe
;
2040 KASSERT(so
!= NULL
);
2041 bufsize
= so
->so_rcv
.sb_hiwat
;
2043 if (bufsize
> tp
->t_ourmss
) {
2044 bufsize
= roundup(bufsize
, tp
->t_ourmss
);
2045 if (bufsize
> sb_max
)
2047 (void) sbreserve(&so
->so_rcv
, bufsize
, so
);
2052 * Check if there's an initial rtt or rttvar. Convert from the
2053 * route-table units to scaled multiples of the slow timeout timer.
2054 * Called only during the 3-way handshake.
2057 tcp_rmx_rtt(struct tcpcb
*tp
)
2060 struct rtentry
*rt
= NULL
;
2064 if (tp
->t_inpcb
&& tp
->t_in6pcb
)
2065 panic("tcp_rmx_rtt: both t_inpcb and t_in6pcb are set");
2069 rt
= in_pcbrtentry(tp
->t_inpcb
);
2073 rt
= in6_pcbrtentry(tp
->t_in6pcb
);
2078 if (tp
->t_srtt
== 0 && (rtt
= rt
->rt_rmx
.rmx_rtt
)) {
2080 * XXX The lock bit for MTU indicates that the value
2081 * is also a minimum value; this is subject to time.
2083 if (rt
->rt_rmx
.rmx_locks
& RTV_RTT
)
2084 TCPT_RANGESET(tp
->t_rttmin
,
2085 rtt
/ (RTM_RTTUNIT
/ PR_SLOWHZ
),
2086 TCPTV_MIN
, TCPTV_REXMTMAX
);
2088 ((RTM_RTTUNIT
/ PR_SLOWHZ
) >> (TCP_RTT_SHIFT
+ 2));
2089 if (rt
->rt_rmx
.rmx_rttvar
) {
2090 tp
->t_rttvar
= rt
->rt_rmx
.rmx_rttvar
/
2091 ((RTM_RTTUNIT
/ PR_SLOWHZ
) >>
2092 (TCP_RTTVAR_SHIFT
+ 2));
2094 /* Default variation is +- 1 rtt */
2096 tp
->t_srtt
>> (TCP_RTT_SHIFT
- TCP_RTTVAR_SHIFT
);
2098 TCPT_RANGESET(tp
->t_rxtcur
,
2099 ((tp
->t_srtt
>> 2) + tp
->t_rttvar
) >> (1 + 2),
2100 tp
->t_rttmin
, TCPTV_REXMTMAX
);
2105 tcp_seq tcp_iss_seq
= 0; /* tcp initial seq # */
2107 u_int8_t tcp_iss_secret
[16]; /* 128 bits; should be plenty */
2111 * Get a new sequence value given a tcp control block
2114 tcp_new_iss(struct tcpcb
*tp
, tcp_seq addin
)
2118 if (tp
->t_inpcb
!= NULL
) {
2119 return (tcp_new_iss1(&tp
->t_inpcb
->inp_laddr
,
2120 &tp
->t_inpcb
->inp_faddr
, tp
->t_inpcb
->inp_lport
,
2121 tp
->t_inpcb
->inp_fport
, sizeof(tp
->t_inpcb
->inp_laddr
),
2126 if (tp
->t_in6pcb
!= NULL
) {
2127 return (tcp_new_iss1(&tp
->t_in6pcb
->in6p_laddr
,
2128 &tp
->t_in6pcb
->in6p_faddr
, tp
->t_in6pcb
->in6p_lport
,
2129 tp
->t_in6pcb
->in6p_fport
, sizeof(tp
->t_in6pcb
->in6p_laddr
),
2134 panic("tcp_new_iss");
2138 * This routine actually generates a new TCP initial sequence number.
2141 tcp_new_iss1(void *laddr
, void *faddr
, u_int16_t lport
, u_int16_t fport
,
2142 size_t addrsz
, tcp_seq addin
)
2147 static bool tcp_iss_gotten_secret
;
2150 * If we haven't been here before, initialize our cryptographic
2153 if (tcp_iss_gotten_secret
== false) {
2154 rnd_extract_data(tcp_iss_secret
, sizeof(tcp_iss_secret
),
2156 tcp_iss_gotten_secret
= true;
2159 if (tcp_do_rfc1948
) {
2161 u_int8_t hash
[16]; /* XXX MD5 knowledge */
2164 * Compute the base value of the ISS. It is a hash
2165 * of (saddr, sport, daddr, dport, secret).
2169 MD5Update(&ctx
, (u_char
*) laddr
, addrsz
);
2170 MD5Update(&ctx
, (u_char
*) &lport
, sizeof(lport
));
2172 MD5Update(&ctx
, (u_char
*) faddr
, addrsz
);
2173 MD5Update(&ctx
, (u_char
*) &fport
, sizeof(fport
));
2175 MD5Update(&ctx
, tcp_iss_secret
, sizeof(tcp_iss_secret
));
2177 MD5Final(hash
, &ctx
);
2179 memcpy(&tcp_iss
, hash
, sizeof(tcp_iss
));
2182 * Now increment our "timer", and add it in to
2183 * the computed value.
2186 * XXX TCP_ISSINCR too large to use?
2188 tcp_iss_seq
+= TCP_ISSINCR
;
2190 printf("ISS hash 0x%08x, ", tcp_iss
);
2192 tcp_iss
+= tcp_iss_seq
+ addin
;
2194 printf("new ISS 0x%08x\n", tcp_iss
);
2197 #endif /* NRND > 0 */
2203 rnd_extract_data(&tcp_iss
, sizeof(tcp_iss
), RND_EXTRACT_ANY
);
2205 tcp_iss
= arc4random();
2209 * If we were asked to add some amount to a known value,
2210 * we will take a random value obtained above, mask off
2211 * the upper bits, and add in the known value. We also
2212 * add in a constant to ensure that we are at least a
2213 * certain distance from the original value.
2215 * This is used when an old connection is in timed wait
2216 * and we have a new one coming in, for instance.
2220 printf("Random %08x, ", tcp_iss
);
2222 tcp_iss
&= TCP_ISS_RANDOM_MASK
;
2223 tcp_iss
+= addin
+ TCP_ISSINCR
;
2225 printf("Old ISS %08x, ISS %08x\n", addin
, tcp_iss
);
2228 tcp_iss
&= TCP_ISS_RANDOM_MASK
;
2229 tcp_iss
+= tcp_iss_seq
;
2230 tcp_iss_seq
+= TCP_ISSINCR
;
2232 printf("ISS %08x\n", tcp_iss
);
2237 if (tcp_compat_42
) {
2239 * Limit it to the positive range for really old TCP
2241 * Just AND off the top bit instead of checking if
2242 * is set first - saves a branch 50% of the time.
2244 tcp_iss
&= 0x7fffffff; /* XXX */
2250 #if defined(IPSEC) || defined(FAST_IPSEC)
2251 /* compute ESP/AH header size for TCP, including outer IP header. */
2253 ipsec4_hdrsiz_tcp(struct tcpcb
*tp
)
2258 /* XXX mapped addr case (tp->t_in6pcb) */
2259 if (!tp
|| !tp
->t_template
|| !(inp
= tp
->t_inpcb
))
2261 switch (tp
->t_family
) {
2263 /* XXX: should use currect direction. */
2264 hdrsiz
= ipsec4_hdrsiz(tp
->t_template
, IPSEC_DIR_OUTBOUND
, inp
);
2276 ipsec6_hdrsiz_tcp(struct tcpcb
*tp
)
2278 struct in6pcb
*in6p
;
2281 if (!tp
|| !tp
->t_template
|| !(in6p
= tp
->t_in6pcb
))
2283 switch (tp
->t_family
) {
2285 /* XXX: should use currect direction. */
2286 hdrsiz
= ipsec6_hdrsiz(tp
->t_template
, IPSEC_DIR_OUTBOUND
, in6p
);
2289 /* mapped address case - tricky */
2301 * Determine the length of the TCP options for this connection.
2303 * XXX: What do we do for SACK, when we add that? Just reserve
2304 * all of the space? Otherwise we can't exactly be incrementing
2305 * cwnd by an amount that varies depending on the amount we last
2310 tcp_optlen(struct tcpcb
*tp
)
2315 if ((tp
->t_flags
& (TF_REQ_TSTMP
|TF_RCVD_TSTMP
|TF_NOOPT
)) ==
2316 (TF_REQ_TSTMP
| TF_RCVD_TSTMP
))
2317 optlen
+= TCPOLEN_TSTAMP_APPA
;
2319 #ifdef TCP_SIGNATURE
2320 if (tp
->t_flags
& TF_SIGNATURE
)
2321 optlen
+= TCPOLEN_SIGNATURE
+ 2;
2322 #endif /* TCP_SIGNATURE */
2328 tcp_hdrsz(struct tcpcb
*tp
)
2332 switch (tp
->t_family
) {
2335 hlen
= sizeof(struct ip6_hdr
);
2339 hlen
= sizeof(struct ip
);
2345 hlen
+= sizeof(struct tcphdr
);
2347 if ((tp
->t_flags
& (TF_REQ_TSTMP
|TF_NOOPT
)) == TF_REQ_TSTMP
&&
2348 (tp
->t_flags
& TF_RCVD_TSTMP
) == TF_RCVD_TSTMP
)
2349 hlen
+= TCPOLEN_TSTAMP_APPA
;
2350 #ifdef TCP_SIGNATURE
2351 if (tp
->t_flags
& TF_SIGNATURE
)
2352 hlen
+= TCPOLEN_SIGLEN
;
2358 tcp_statinc(u_int stat
)
2361 KASSERT(stat
< TCP_NSTATS
);
2366 tcp_statadd(u_int stat
, uint64_t val
)
2369 KASSERT(stat
< TCP_NSTATS
);
2370 TCP_STATADD(stat
, val
);