1 /* $NetBSD: tcp_timer.c,v 1.83 2008/11/09 17:32:38 bouyer Exp $ */
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * Copyright (c) 1997, 1998, 2001, 2005 The NetBSD Foundation, Inc.
34 * All rights reserved.
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
38 * Facility, NASA Ames Research Center.
39 * This code is derived from software contributed to The NetBSD Foundation
40 * by Charles M. Hannum.
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
51 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
52 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
53 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
54 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
55 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
56 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
57 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
58 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
59 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
60 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
61 * POSSIBILITY OF SUCH DAMAGE.
65 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
66 * The Regents of the University of California. All rights reserved.
68 * Redistribution and use in source and binary forms, with or without
69 * modification, are permitted provided that the following conditions
71 * 1. Redistributions of source code must retain the above copyright
72 * notice, this list of conditions and the following disclaimer.
73 * 2. Redistributions in binary form must reproduce the above copyright
74 * notice, this list of conditions and the following disclaimer in the
75 * documentation and/or other materials provided with the distribution.
76 * 3. Neither the name of the University nor the names of its contributors
77 * may be used to endorse or promote products derived from this software
78 * without specific prior written permission.
80 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
81 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
82 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
83 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
84 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
85 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
86 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
87 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
88 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
89 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
92 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
95 #include <sys/cdefs.h>
96 __KERNEL_RCSID(0, "$NetBSD: tcp_timer.c,v 1.83 2008/11/09 17:32:38 bouyer Exp $");
99 #include "opt_tcp_debug.h"
101 #include <sys/param.h>
102 #include <sys/systm.h>
103 #include <sys/malloc.h>
104 #include <sys/mbuf.h>
105 #include <sys/socket.h>
106 #include <sys/socketvar.h>
107 #include <sys/protosw.h>
108 #include <sys/errno.h>
109 #include <sys/kernel.h>
112 #include <net/route.h>
114 #include <netinet/in.h>
115 #include <netinet/in_systm.h>
116 #include <netinet/ip.h>
117 #include <netinet/in_pcb.h>
118 #include <netinet/ip_var.h>
119 #include <netinet/ip_icmp.h>
123 #include <netinet/in.h>
125 #include <netinet/ip6.h>
126 #include <netinet6/in6_pcb.h>
129 #include <netinet/tcp.h>
130 #include <netinet/tcp_fsm.h>
131 #include <netinet/tcp_seq.h>
132 #include <netinet/tcp_timer.h>
133 #include <netinet/tcp_var.h>
134 #include <netinet/tcp_private.h>
135 #include <netinet/tcp_congctl.h>
136 #include <netinet/tcpip.h>
138 #include <netinet/tcp_debug.h>
142 * Various tunable timer parameters. These are initialized in tcp_init(),
143 * unless they are patched.
145 u_int tcp_keepinit
= 0;
146 u_int tcp_keepidle
= 0;
147 u_int tcp_keepintvl
= 0;
148 u_int tcp_keepcnt
= 0; /* max idle probes */
150 int tcp_maxpersistidle
= 0; /* max idle time in persist */
153 * Time to delay the ACK. This is initialized in tcp_init(), unless
156 int tcp_delack_ticks
= 0;
158 void tcp_timer_rexmt(void *);
159 void tcp_timer_persist(void *);
160 void tcp_timer_keep(void *);
161 void tcp_timer_2msl(void *);
163 const tcp_timer_func_t tcp_timer_funcs
[TCPT_NTIMERS
] = {
171 * Timer state initialization, called from tcp_init().
177 if (tcp_keepinit
== 0)
178 tcp_keepinit
= TCPTV_KEEP_INIT
;
180 if (tcp_keepidle
== 0)
181 tcp_keepidle
= TCPTV_KEEP_IDLE
;
183 if (tcp_keepintvl
== 0)
184 tcp_keepintvl
= TCPTV_KEEPINTVL
;
186 if (tcp_keepcnt
== 0)
187 tcp_keepcnt
= TCPTV_KEEPCNT
;
189 if (tcp_maxpersistidle
== 0)
190 tcp_maxpersistidle
= TCPTV_KEEP_IDLE
;
192 if (tcp_delack_ticks
== 0)
193 tcp_delack_ticks
= TCP_DELACK_TICKS
;
197 * Callout to process delayed ACKs for a TCPCB.
200 tcp_delack(void *arg
)
202 struct tcpcb
*tp
= arg
;
205 * If tcp_output() wasn't able to transmit the ACK
206 * for whatever reason, it will restart the delayed
210 mutex_enter(softnet_lock
);
211 if ((tp
->t_flags
& (TF_DEAD
| TF_DELACK
)) != TF_DELACK
) {
212 mutex_exit(softnet_lock
);
215 if (!callout_expired(&tp
->t_delack_ch
)) {
216 mutex_exit(softnet_lock
);
220 tp
->t_flags
|= TF_ACKNOW
;
221 KERNEL_LOCK(1, NULL
);
222 (void) tcp_output(tp
);
223 KERNEL_UNLOCK_ONE(NULL
);
224 mutex_exit(softnet_lock
);
228 * Tcp protocol timeout routine called every 500 ms.
229 * Updates the timers in all active tcb's and
230 * causes finite state machine actions if timers expire.
236 mutex_enter(softnet_lock
);
237 tcp_iss_seq
+= TCP_ISSINCR
; /* increment iss */
238 tcp_now
++; /* for timestamps */
239 mutex_exit(softnet_lock
);
243 * Cancel all timers for TCP tp.
246 tcp_canceltimers(struct tcpcb
*tp
)
250 for (i
= 0; i
< TCPT_NTIMERS
; i
++)
251 TCP_TIMER_DISARM(tp
, i
);
254 const int tcp_backoff
[TCP_MAXRXTSHIFT
+ 1] =
255 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
257 const int tcp_totbackoff
= 511; /* sum of tcp_backoff[] */
260 * TCP timer processing.
264 tcp_timer_rexmt(void *arg
)
266 struct tcpcb
*tp
= arg
;
269 struct socket
*so
= NULL
;
273 mutex_enter(softnet_lock
);
274 if ((tp
->t_flags
& TF_DEAD
) != 0) {
275 mutex_exit(softnet_lock
);
278 if (!callout_expired(&tp
->t_timer
[TCPT_REXMT
])) {
279 mutex_exit(softnet_lock
);
283 KERNEL_LOCK(1, NULL
);
284 if ((tp
->t_flags
& TF_PMTUD_PEND
) && tp
->t_inpcb
&&
285 SEQ_GEQ(tp
->t_pmtud_th_seq
, tp
->snd_una
) &&
286 SEQ_LT(tp
->t_pmtud_th_seq
, (int)(tp
->snd_una
+ tp
->t_ourmss
))) {
287 extern struct sockaddr_in icmpsrc
;
290 tp
->t_flags
&= ~TF_PMTUD_PEND
;
292 /* XXX create fake icmp message with relevant entries */
293 icmp
.icmp_nextmtu
= tp
->t_pmtud_nextmtu
;
294 icmp
.icmp_ip
.ip_len
= tp
->t_pmtud_ip_len
;
295 icmp
.icmp_ip
.ip_hl
= tp
->t_pmtud_ip_hl
;
296 icmpsrc
.sin_addr
= tp
->t_inpcb
->inp_faddr
;
297 icmp_mtudisc(&icmp
, icmpsrc
.sin_addr
);
300 * Notify all connections to the same peer about
301 * new mss and trigger retransmit.
303 in_pcbnotifyall(&tcbtable
, icmpsrc
.sin_addr
, EMSGSIZE
,
305 KERNEL_UNLOCK_ONE(NULL
);
306 mutex_exit(softnet_lock
);
312 so
= tp
->t_inpcb
->inp_socket
;
316 so
= tp
->t_in6pcb
->in6p_socket
;
318 ostate
= tp
->t_state
;
319 #endif /* TCP_DEBUG */
322 * Clear the SACK scoreboard, reset FACK estimate.
324 tcp_free_sackholes(tp
);
325 tp
->snd_fack
= tp
->snd_una
;
328 * Retransmission timer went off. Message has not
329 * been acked within retransmit interval. Back off
330 * to a longer retransmit interval and retransmit one segment.
333 if (++tp
->t_rxtshift
> TCP_MAXRXTSHIFT
) {
334 tp
->t_rxtshift
= TCP_MAXRXTSHIFT
;
335 TCP_STATINC(TCP_STAT_TIMEOUTDROP
);
336 tp
= tcp_drop(tp
, tp
->t_softerror
?
337 tp
->t_softerror
: ETIMEDOUT
);
340 TCP_STATINC(TCP_STAT_REXMTTIMEO
);
341 rto
= TCP_REXMTVAL(tp
);
342 if (rto
< tp
->t_rttmin
)
344 TCPT_RANGESET(tp
->t_rxtcur
, rto
* tcp_backoff
[tp
->t_rxtshift
],
345 tp
->t_rttmin
, TCPTV_REXMTMAX
);
346 TCP_TIMER_ARM(tp
, TCPT_REXMT
, tp
->t_rxtcur
);
349 * If we are losing and we are trying path MTU discovery,
350 * try turning it off. This will avoid black holes in
351 * the network which suppress or fail to send "packet
352 * too big" ICMP messages. We should ideally do
353 * lots more sophisticated searching to find the right
356 if (tp
->t_mtudisc
&& tp
->t_rxtshift
> TCP_MAXRXTSHIFT
/ 6) {
357 TCP_STATINC(TCP_STAT_PMTUBLACKHOLE
);
360 /* try turning PMTUD off */
365 /* try using IPv6 minimum MTU */
370 /* XXX: more sophisticated Black hole recovery code? */
374 * If losing, let the lower level know and try for
375 * a better route. Also, if we backed off this far,
376 * our srtt estimate is probably bogus. Clobber it
377 * so we'll take the next rtt measurement as our srtt;
378 * move the current srtt into rttvar to keep the current
379 * retransmit times until then.
381 if (tp
->t_rxtshift
> TCP_MAXRXTSHIFT
/ 4) {
384 in_losing(tp
->t_inpcb
);
388 in6_losing(tp
->t_in6pcb
);
390 tp
->t_rttvar
+= (tp
->t_srtt
>> TCP_RTT_SHIFT
);
393 tp
->snd_nxt
= tp
->snd_una
;
394 tp
->snd_high
= tp
->snd_max
;
396 * If timing a segment in this window, stop the timer.
400 * Remember if we are retransmitting a SYN, because if
401 * we do, set the initial congestion window must be set
404 if (tp
->t_state
== TCPS_SYN_SENT
)
405 tp
->t_flags
|= TF_SYN_REXMT
;
408 * Adjust congestion control parameters.
410 tp
->t_congctl
->slow_retransmit(tp
);
412 (void) tcp_output(tp
);
416 if (tp
&& so
->so_options
& SO_DEBUG
)
417 tcp_trace(TA_USER
, ostate
, tp
, NULL
,
418 PRU_SLOWTIMO
| (TCPT_REXMT
<< 8));
420 KERNEL_UNLOCK_ONE(NULL
);
421 mutex_exit(softnet_lock
);
425 tcp_timer_persist(void *arg
)
427 struct tcpcb
*tp
= arg
;
430 struct socket
*so
= NULL
;
434 mutex_enter(softnet_lock
);
435 if ((tp
->t_flags
& TF_DEAD
) != 0) {
436 mutex_exit(softnet_lock
);
439 if (!callout_expired(&tp
->t_timer
[TCPT_PERSIST
])) {
440 mutex_exit(softnet_lock
);
444 KERNEL_LOCK(1, NULL
);
448 so
= tp
->t_inpcb
->inp_socket
;
452 so
= tp
->t_in6pcb
->in6p_socket
;
455 ostate
= tp
->t_state
;
456 #endif /* TCP_DEBUG */
459 * Persistance timer into zero window.
460 * Force a byte to be output, if possible.
464 * Hack: if the peer is dead/unreachable, we do not
465 * time out if the window is closed. After a full
466 * backoff, drop the connection if the idle time
467 * (no responses to probes) reaches the maximum
468 * backoff that we would use if retransmitting.
470 rto
= TCP_REXMTVAL(tp
);
471 if (rto
< tp
->t_rttmin
)
473 if (tp
->t_rxtshift
== TCP_MAXRXTSHIFT
&&
474 ((tcp_now
- tp
->t_rcvtime
) >= tcp_maxpersistidle
||
475 (tcp_now
- tp
->t_rcvtime
) >= rto
* tcp_totbackoff
)) {
476 TCP_STATINC(TCP_STAT_PERSISTDROPS
);
477 tp
= tcp_drop(tp
, ETIMEDOUT
);
480 TCP_STATINC(TCP_STAT_PERSISTTIMEO
);
483 (void) tcp_output(tp
);
488 if (tp
&& so
->so_options
& SO_DEBUG
)
489 tcp_trace(TA_USER
, ostate
, tp
, NULL
,
490 PRU_SLOWTIMO
| (TCPT_PERSIST
<< 8));
492 KERNEL_UNLOCK_ONE(NULL
);
493 mutex_exit(softnet_lock
);
497 tcp_timer_keep(void *arg
)
499 struct tcpcb
*tp
= arg
;
500 struct socket
*so
= NULL
; /* Quell compiler warning */
505 mutex_enter(softnet_lock
);
506 if ((tp
->t_flags
& TF_DEAD
) != 0) {
507 mutex_exit(softnet_lock
);
510 if (!callout_expired(&tp
->t_timer
[TCPT_KEEP
])) {
511 mutex_exit(softnet_lock
);
515 KERNEL_LOCK(1, NULL
);
518 ostate
= tp
->t_state
;
519 #endif /* TCP_DEBUG */
522 * Keep-alive timer went off; send something
523 * or drop connection if idle for too long.
526 TCP_STATINC(TCP_STAT_KEEPTIMEO
);
527 if (TCPS_HAVEESTABLISHED(tp
->t_state
) == 0)
531 so
= tp
->t_inpcb
->inp_socket
;
535 so
= tp
->t_in6pcb
->in6p_socket
;
538 if (so
->so_options
& SO_KEEPALIVE
&&
539 tp
->t_state
<= TCPS_CLOSE_WAIT
) {
540 if ((tp
->t_maxidle
> 0) &&
541 ((tcp_now
- tp
->t_rcvtime
) >=
542 tp
->t_keepidle
+ tp
->t_maxidle
))
545 * Send a packet designed to force a response
546 * if the peer is up and reachable:
547 * either an ACK if the connection is still alive,
548 * or an RST if the peer has closed the connection
549 * due to timeout or reboot.
550 * Using sequence number tp->snd_una-1
551 * causes the transmitted zero-length segment
552 * to lie outside the receive window;
553 * by the protocol spec, this requires the
554 * correspondent TCP to respond.
556 TCP_STATINC(TCP_STAT_KEEPPROBE
);
559 * The keepalive packet must have nonzero
560 * length to get a 4.2 host to respond.
562 (void)tcp_respond(tp
, tp
->t_template
,
563 (struct mbuf
*)NULL
, NULL
, tp
->rcv_nxt
- 1,
566 (void)tcp_respond(tp
, tp
->t_template
,
567 (struct mbuf
*)NULL
, NULL
, tp
->rcv_nxt
,
570 TCP_TIMER_ARM(tp
, TCPT_KEEP
, tp
->t_keepintvl
);
572 TCP_TIMER_ARM(tp
, TCPT_KEEP
, tp
->t_keepidle
);
575 if (tp
&& so
->so_options
& SO_DEBUG
)
576 tcp_trace(TA_USER
, ostate
, tp
, NULL
,
577 PRU_SLOWTIMO
| (TCPT_KEEP
<< 8));
579 KERNEL_UNLOCK_ONE(NULL
);
580 mutex_exit(softnet_lock
);
584 TCP_STATINC(TCP_STAT_KEEPDROPS
);
585 (void) tcp_drop(tp
, ETIMEDOUT
);
586 KERNEL_UNLOCK_ONE(NULL
);
587 mutex_exit(softnet_lock
);
591 tcp_timer_2msl(void *arg
)
593 struct tcpcb
*tp
= arg
;
595 struct socket
*so
= NULL
;
599 mutex_enter(softnet_lock
);
600 if ((tp
->t_flags
& TF_DEAD
) != 0) {
601 mutex_exit(softnet_lock
);
604 if (!callout_expired(&tp
->t_timer
[TCPT_2MSL
])) {
605 mutex_exit(softnet_lock
);
610 * 2 MSL timeout went off, clear the SACK scoreboard, reset
613 KERNEL_LOCK(1, NULL
);
614 tcp_free_sackholes(tp
);
615 tp
->snd_fack
= tp
->snd_una
;
620 so
= tp
->t_inpcb
->inp_socket
;
624 so
= tp
->t_in6pcb
->in6p_socket
;
627 ostate
= tp
->t_state
;
628 #endif /* TCP_DEBUG */
631 * 2 MSL timeout in shutdown went off. If we're closed but
632 * still waiting for peer to close and connection has been idle
633 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
634 * control block. Otherwise, check again in a bit.
636 if (tp
->t_state
!= TCPS_TIME_WAIT
&&
637 ((tp
->t_maxidle
== 0) ||
638 ((tcp_now
- tp
->t_rcvtime
) <= tp
->t_maxidle
)))
639 TCP_TIMER_ARM(tp
, TCPT_2MSL
, tp
->t_keepintvl
);
644 if (tp
&& so
->so_options
& SO_DEBUG
)
645 tcp_trace(TA_USER
, ostate
, tp
, NULL
,
646 PRU_SLOWTIMO
| (TCPT_2MSL
<< 8));
648 mutex_exit(softnet_lock
);
649 KERNEL_UNLOCK_ONE(NULL
);