1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* SCTP kernel implementation
3 * Copyright (c) 1999-2000 Cisco, Inc.
4 * Copyright (c) 1999-2001 Motorola, Inc.
5 * Copyright (c) 2001-2003 International Business Machines Corp.
6 * Copyright (c) 2001 Intel Corp.
7 * Copyright (c) 2001 La Monte H.P. Yarroll
9 * This file is part of the SCTP kernel implementation
11 * This module provides the abstraction for an SCTP transport representing
12 * a remote transport address. For local transport addresses, we just use
15 * Please send any bug reports or fixes you make to the
17 * lksctp developers <linux-sctp@vger.kernel.org>
19 * Written or modified by:
20 * La Monte H.P. Yarroll <piggy@acm.org>
21 * Karl Knutson <karl@athena.chicago.il.us>
22 * Jon Grimm <jgrimm@us.ibm.com>
23 * Xingang Guo <xingang.guo@intel.com>
24 * Hui Huang <hui.huang@nokia.com>
25 * Sridhar Samudrala <sri@us.ibm.com>
26 * Ardelle Fan <ardelle.fan@intel.com>
29 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
31 #include <linux/slab.h>
32 #include <linux/types.h>
33 #include <linux/random.h>
34 #include <net/sctp/sctp.h>
35 #include <net/sctp/sm.h>
37 /* 1st Level Abstractions. */
39 /* Initialize a new transport from provided memory. */
40 static struct sctp_transport
*sctp_transport_init(struct net
*net
,
41 struct sctp_transport
*peer
,
42 const union sctp_addr
*addr
,
45 /* Copy in the address. */
46 peer
->af_specific
= sctp_get_af_specific(addr
->sa
.sa_family
);
47 memcpy(&peer
->ipaddr
, addr
, peer
->af_specific
->sockaddr_len
);
48 memset(&peer
->saddr
, 0, sizeof(union sctp_addr
));
50 peer
->sack_generation
= 0;
52 /* From 6.3.1 RTO Calculation:
54 * C1) Until an RTT measurement has been made for a packet sent to the
55 * given destination transport address, set RTO to the protocol
56 * parameter 'RTO.Initial'.
58 peer
->rto
= msecs_to_jiffies(net
->sctp
.rto_initial
);
60 peer
->last_time_heard
= 0;
61 peer
->last_time_ecne_reduced
= jiffies
;
63 peer
->param_flags
= SPP_HB_DISABLE
|
67 /* Initialize the default path max_retrans. */
68 peer
->pathmaxrxt
= net
->sctp
.max_retrans_path
;
69 peer
->pf_retrans
= net
->sctp
.pf_retrans
;
71 INIT_LIST_HEAD(&peer
->transmitted
);
72 INIT_LIST_HEAD(&peer
->send_ready
);
73 INIT_LIST_HEAD(&peer
->transports
);
75 timer_setup(&peer
->T3_rtx_timer
, sctp_generate_t3_rtx_event
, 0);
76 timer_setup(&peer
->hb_timer
, sctp_generate_heartbeat_event
, 0);
77 timer_setup(&peer
->reconf_timer
, sctp_generate_reconf_event
, 0);
78 timer_setup(&peer
->probe_timer
, sctp_generate_probe_event
, 0);
79 timer_setup(&peer
->proto_unreach_timer
,
80 sctp_generate_proto_unreach_event
, 0);
82 /* Initialize the 64-bit random nonce sent with heartbeat. */
83 get_random_bytes(&peer
->hb_nonce
, sizeof(peer
->hb_nonce
));
85 refcount_set(&peer
->refcnt
, 1);
90 /* Allocate and initialize a new transport. */
91 struct sctp_transport
*sctp_transport_new(struct net
*net
,
92 const union sctp_addr
*addr
,
95 struct sctp_transport
*transport
;
97 transport
= kzalloc(sizeof(*transport
), gfp
);
101 if (!sctp_transport_init(net
, transport
, addr
, gfp
))
104 SCTP_DBG_OBJCNT_INC(transport
);
115 /* This transport is no longer needed. Free up if possible, or
116 * delay until it last reference count.
118 void sctp_transport_free(struct sctp_transport
*transport
)
120 /* Try to delete the heartbeat timer. */
121 if (del_timer(&transport
->hb_timer
))
122 sctp_transport_put(transport
);
124 /* Delete the T3_rtx timer if it's active.
125 * There is no point in not doing this now and letting
126 * structure hang around in memory since we know
127 * the transport is going away.
129 if (del_timer(&transport
->T3_rtx_timer
))
130 sctp_transport_put(transport
);
132 if (del_timer(&transport
->reconf_timer
))
133 sctp_transport_put(transport
);
135 if (del_timer(&transport
->probe_timer
))
136 sctp_transport_put(transport
);
138 /* Delete the ICMP proto unreachable timer if it's active. */
139 if (del_timer(&transport
->proto_unreach_timer
))
140 sctp_transport_put(transport
);
142 sctp_transport_put(transport
);
145 static void sctp_transport_destroy_rcu(struct rcu_head
*head
)
147 struct sctp_transport
*transport
;
149 transport
= container_of(head
, struct sctp_transport
, rcu
);
151 dst_release(transport
->dst
);
153 SCTP_DBG_OBJCNT_DEC(transport
);
156 /* Destroy the transport data structure.
157 * Assumes there are no more users of this structure.
159 static void sctp_transport_destroy(struct sctp_transport
*transport
)
161 if (unlikely(refcount_read(&transport
->refcnt
))) {
162 WARN(1, "Attempt to destroy undead transport %p!\n", transport
);
166 sctp_packet_free(&transport
->packet
);
169 sctp_association_put(transport
->asoc
);
171 call_rcu(&transport
->rcu
, sctp_transport_destroy_rcu
);
174 /* Start T3_rtx timer if it is not already running and update the heartbeat
175 * timer. This routine is called every time a DATA chunk is sent.
177 void sctp_transport_reset_t3_rtx(struct sctp_transport
*transport
)
179 /* RFC 2960 6.3.2 Retransmission Timer Rules
181 * R1) Every time a DATA chunk is sent to any address(including a
182 * retransmission), if the T3-rtx timer of that address is not running
183 * start it running so that it will expire after the RTO of that
187 if (!timer_pending(&transport
->T3_rtx_timer
))
188 if (!mod_timer(&transport
->T3_rtx_timer
,
189 jiffies
+ transport
->rto
))
190 sctp_transport_hold(transport
);
193 void sctp_transport_reset_hb_timer(struct sctp_transport
*transport
)
195 unsigned long expires
;
197 /* When a data chunk is sent, reset the heartbeat interval. */
198 expires
= jiffies
+ sctp_transport_timeout(transport
);
199 if (!mod_timer(&transport
->hb_timer
,
200 expires
+ get_random_u32_below(transport
->rto
)))
201 sctp_transport_hold(transport
);
204 void sctp_transport_reset_reconf_timer(struct sctp_transport
*transport
)
206 if (!timer_pending(&transport
->reconf_timer
))
207 if (!mod_timer(&transport
->reconf_timer
,
208 jiffies
+ transport
->rto
))
209 sctp_transport_hold(transport
);
212 void sctp_transport_reset_probe_timer(struct sctp_transport
*transport
)
214 if (!mod_timer(&transport
->probe_timer
,
215 jiffies
+ transport
->probe_interval
))
216 sctp_transport_hold(transport
);
219 void sctp_transport_reset_raise_timer(struct sctp_transport
*transport
)
221 if (!mod_timer(&transport
->probe_timer
,
222 jiffies
+ transport
->probe_interval
* 30))
223 sctp_transport_hold(transport
);
226 /* This transport has been assigned to an association.
227 * Initialize fields from the association or from the sock itself.
228 * Register the reference count in the association.
230 void sctp_transport_set_owner(struct sctp_transport
*transport
,
231 struct sctp_association
*asoc
)
233 transport
->asoc
= asoc
;
234 sctp_association_hold(asoc
);
237 /* Initialize the pmtu of a transport. */
238 void sctp_transport_pmtu(struct sctp_transport
*transport
, struct sock
*sk
)
240 /* If we don't have a fresh route, look one up */
241 if (!transport
->dst
|| transport
->dst
->obsolete
) {
242 sctp_transport_dst_release(transport
);
243 transport
->af_specific
->get_dst(transport
, &transport
->saddr
,
247 if (transport
->param_flags
& SPP_PMTUD_DISABLE
) {
248 struct sctp_association
*asoc
= transport
->asoc
;
250 if (!transport
->pathmtu
&& asoc
&& asoc
->pathmtu
)
251 transport
->pathmtu
= asoc
->pathmtu
;
252 if (transport
->pathmtu
)
257 transport
->pathmtu
= sctp_dst_mtu(transport
->dst
);
259 transport
->pathmtu
= SCTP_DEFAULT_MAXSEGMENT
;
261 sctp_transport_pl_update(transport
);
264 void sctp_transport_pl_send(struct sctp_transport
*t
)
266 if (t
->pl
.probe_count
< SCTP_MAX_PROBES
)
269 t
->pl
.probe_count
= 0;
270 if (t
->pl
.state
== SCTP_PL_BASE
) {
271 if (t
->pl
.probe_size
== SCTP_BASE_PLPMTU
) { /* BASE_PLPMTU Confirmation Failed */
272 t
->pl
.state
= SCTP_PL_ERROR
; /* Base -> Error */
274 t
->pl
.pmtu
= SCTP_BASE_PLPMTU
;
275 t
->pathmtu
= t
->pl
.pmtu
+ sctp_transport_pl_hlen(t
);
276 sctp_assoc_sync_pmtu(t
->asoc
);
278 } else if (t
->pl
.state
== SCTP_PL_SEARCH
) {
279 if (t
->pl
.pmtu
== t
->pl
.probe_size
) { /* Black Hole Detected */
280 t
->pl
.state
= SCTP_PL_BASE
; /* Search -> Base */
281 t
->pl
.probe_size
= SCTP_BASE_PLPMTU
;
282 t
->pl
.probe_high
= 0;
284 t
->pl
.pmtu
= SCTP_BASE_PLPMTU
;
285 t
->pathmtu
= t
->pl
.pmtu
+ sctp_transport_pl_hlen(t
);
286 sctp_assoc_sync_pmtu(t
->asoc
);
287 } else { /* Normal probe failure. */
288 t
->pl
.probe_high
= t
->pl
.probe_size
;
289 t
->pl
.probe_size
= t
->pl
.pmtu
;
291 } else if (t
->pl
.state
== SCTP_PL_COMPLETE
) {
292 if (t
->pl
.pmtu
== t
->pl
.probe_size
) { /* Black Hole Detected */
293 t
->pl
.state
= SCTP_PL_BASE
; /* Search Complete -> Base */
294 t
->pl
.probe_size
= SCTP_BASE_PLPMTU
;
296 t
->pl
.pmtu
= SCTP_BASE_PLPMTU
;
297 t
->pathmtu
= t
->pl
.pmtu
+ sctp_transport_pl_hlen(t
);
298 sctp_assoc_sync_pmtu(t
->asoc
);
303 pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
304 __func__
, t
, t
->pl
.state
, t
->pl
.pmtu
, t
->pl
.probe_size
, t
->pl
.probe_high
);
308 bool sctp_transport_pl_recv(struct sctp_transport
*t
)
310 pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
311 __func__
, t
, t
->pl
.state
, t
->pl
.pmtu
, t
->pl
.probe_size
, t
->pl
.probe_high
);
313 t
->pl
.pmtu
= t
->pl
.probe_size
;
314 t
->pl
.probe_count
= 0;
315 if (t
->pl
.state
== SCTP_PL_BASE
) {
316 t
->pl
.state
= SCTP_PL_SEARCH
; /* Base -> Search */
317 t
->pl
.probe_size
+= SCTP_PL_BIG_STEP
;
318 } else if (t
->pl
.state
== SCTP_PL_ERROR
) {
319 t
->pl
.state
= SCTP_PL_SEARCH
; /* Error -> Search */
321 t
->pl
.pmtu
= t
->pl
.probe_size
;
322 t
->pathmtu
= t
->pl
.pmtu
+ sctp_transport_pl_hlen(t
);
323 sctp_assoc_sync_pmtu(t
->asoc
);
324 t
->pl
.probe_size
+= SCTP_PL_BIG_STEP
;
325 } else if (t
->pl
.state
== SCTP_PL_SEARCH
) {
326 if (!t
->pl
.probe_high
) {
327 if (t
->pl
.probe_size
< SCTP_MAX_PLPMTU
) {
328 t
->pl
.probe_size
= min(t
->pl
.probe_size
+ SCTP_PL_BIG_STEP
,
332 t
->pl
.probe_high
= SCTP_MAX_PLPMTU
;
334 t
->pl
.probe_size
+= SCTP_PL_MIN_STEP
;
335 if (t
->pl
.probe_size
>= t
->pl
.probe_high
) {
336 t
->pl
.probe_high
= 0;
337 t
->pl
.state
= SCTP_PL_COMPLETE
; /* Search -> Search Complete */
339 t
->pl
.probe_size
= t
->pl
.pmtu
;
340 t
->pathmtu
= t
->pl
.pmtu
+ sctp_transport_pl_hlen(t
);
341 sctp_assoc_sync_pmtu(t
->asoc
);
342 sctp_transport_reset_raise_timer(t
);
344 } else if (t
->pl
.state
== SCTP_PL_COMPLETE
) {
345 /* Raise probe_size again after 30 * interval in Search Complete */
346 t
->pl
.state
= SCTP_PL_SEARCH
; /* Search Complete -> Search */
347 t
->pl
.probe_size
= min(t
->pl
.probe_size
+ SCTP_PL_MIN_STEP
, SCTP_MAX_PLPMTU
);
350 return t
->pl
.state
== SCTP_PL_COMPLETE
;
353 static bool sctp_transport_pl_toobig(struct sctp_transport
*t
, u32 pmtu
)
355 pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, ptb: %d\n",
356 __func__
, t
, t
->pl
.state
, t
->pl
.pmtu
, t
->pl
.probe_size
, pmtu
);
358 if (pmtu
< SCTP_MIN_PLPMTU
|| pmtu
>= t
->pl
.probe_size
)
361 if (t
->pl
.state
== SCTP_PL_BASE
) {
362 if (pmtu
>= SCTP_MIN_PLPMTU
&& pmtu
< SCTP_BASE_PLPMTU
) {
363 t
->pl
.state
= SCTP_PL_ERROR
; /* Base -> Error */
365 t
->pl
.pmtu
= SCTP_BASE_PLPMTU
;
366 t
->pathmtu
= t
->pl
.pmtu
+ sctp_transport_pl_hlen(t
);
369 } else if (t
->pl
.state
== SCTP_PL_SEARCH
) {
370 if (pmtu
>= SCTP_BASE_PLPMTU
&& pmtu
< t
->pl
.pmtu
) {
371 t
->pl
.state
= SCTP_PL_BASE
; /* Search -> Base */
372 t
->pl
.probe_size
= SCTP_BASE_PLPMTU
;
373 t
->pl
.probe_count
= 0;
375 t
->pl
.probe_high
= 0;
376 t
->pl
.pmtu
= SCTP_BASE_PLPMTU
;
377 t
->pathmtu
= t
->pl
.pmtu
+ sctp_transport_pl_hlen(t
);
379 } else if (pmtu
> t
->pl
.pmtu
&& pmtu
< t
->pl
.probe_size
) {
380 t
->pl
.probe_size
= pmtu
;
381 t
->pl
.probe_count
= 0;
383 } else if (t
->pl
.state
== SCTP_PL_COMPLETE
) {
384 if (pmtu
>= SCTP_BASE_PLPMTU
&& pmtu
< t
->pl
.pmtu
) {
385 t
->pl
.state
= SCTP_PL_BASE
; /* Complete -> Base */
386 t
->pl
.probe_size
= SCTP_BASE_PLPMTU
;
387 t
->pl
.probe_count
= 0;
389 t
->pl
.probe_high
= 0;
390 t
->pl
.pmtu
= SCTP_BASE_PLPMTU
;
391 t
->pathmtu
= t
->pl
.pmtu
+ sctp_transport_pl_hlen(t
);
392 sctp_transport_reset_probe_timer(t
);
400 bool sctp_transport_update_pmtu(struct sctp_transport
*t
, u32 pmtu
)
402 struct sock
*sk
= t
->asoc
->base
.sk
;
403 struct dst_entry
*dst
;
406 if (unlikely(pmtu
< SCTP_DEFAULT_MINSEGMENT
)) {
407 pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
408 __func__
, pmtu
, SCTP_DEFAULT_MINSEGMENT
);
409 /* Use default minimum segment instead */
410 pmtu
= SCTP_DEFAULT_MINSEGMENT
;
412 pmtu
= SCTP_TRUNC4(pmtu
);
414 if (sctp_transport_pl_enabled(t
))
415 return sctp_transport_pl_toobig(t
, pmtu
- sctp_transport_pl_hlen(t
));
417 dst
= sctp_transport_dst_check(t
);
419 struct sctp_pf
*pf
= sctp_get_pf_specific(dst
->ops
->family
);
420 union sctp_addr addr
;
422 pf
->af
->from_sk(&addr
, sk
);
423 pf
->to_sk_daddr(&t
->ipaddr
, sk
);
424 dst
->ops
->update_pmtu(dst
, sk
, NULL
, pmtu
, true);
425 pf
->to_sk_daddr(&addr
, sk
);
427 dst
= sctp_transport_dst_check(t
);
431 t
->af_specific
->get_dst(t
, &t
->saddr
, &t
->fl
, sk
);
436 /* Re-fetch, as under layers may have a higher minimum size */
437 pmtu
= sctp_dst_mtu(dst
);
438 change
= t
->pathmtu
!= pmtu
;
445 /* Caches the dst entry and source address for a transport's destination
448 void sctp_transport_route(struct sctp_transport
*transport
,
449 union sctp_addr
*saddr
, struct sctp_sock
*opt
)
451 struct sctp_association
*asoc
= transport
->asoc
;
452 struct sctp_af
*af
= transport
->af_specific
;
454 sctp_transport_dst_release(transport
);
455 af
->get_dst(transport
, saddr
, &transport
->fl
, sctp_opt2sk(opt
));
458 memcpy(&transport
->saddr
, saddr
, sizeof(union sctp_addr
));
460 af
->get_saddr(opt
, transport
, &transport
->fl
);
462 sctp_transport_pmtu(transport
, sctp_opt2sk(opt
));
464 /* Initialize sk->sk_rcv_saddr, if the transport is the
465 * association's active path for getsockname().
467 if (transport
->dst
&& asoc
&&
468 (!asoc
->peer
.primary_path
|| transport
== asoc
->peer
.active_path
))
469 opt
->pf
->to_sk_saddr(&transport
->saddr
, asoc
->base
.sk
);
472 /* Hold a reference to a transport. */
473 int sctp_transport_hold(struct sctp_transport
*transport
)
475 return refcount_inc_not_zero(&transport
->refcnt
);
478 /* Release a reference to a transport and clean up
479 * if there are no more references.
481 void sctp_transport_put(struct sctp_transport
*transport
)
483 if (refcount_dec_and_test(&transport
->refcnt
))
484 sctp_transport_destroy(transport
);
487 /* Update transport's RTO based on the newly calculated RTT. */
488 void sctp_transport_update_rto(struct sctp_transport
*tp
, __u32 rtt
)
490 if (unlikely(!tp
->rto_pending
))
491 /* We should not be doing any RTO updates unless rto_pending is set. */
492 pr_debug("%s: rto_pending not set on transport %p!\n", __func__
, tp
);
494 if (tp
->rttvar
|| tp
->srtt
) {
495 struct net
*net
= tp
->asoc
->base
.net
;
496 /* 6.3.1 C3) When a new RTT measurement R' is made, set
497 * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|
498 * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'
501 /* Note: The above algorithm has been rewritten to
502 * express rto_beta and rto_alpha as inverse powers
504 * For example, assuming the default value of RTO.Alpha of
505 * 1/8, rto_alpha would be expressed as 3.
507 tp
->rttvar
= tp
->rttvar
- (tp
->rttvar
>> net
->sctp
.rto_beta
)
508 + (((__u32
)abs((__s64
)tp
->srtt
- (__s64
)rtt
)) >> net
->sctp
.rto_beta
);
509 tp
->srtt
= tp
->srtt
- (tp
->srtt
>> net
->sctp
.rto_alpha
)
510 + (rtt
>> net
->sctp
.rto_alpha
);
512 /* 6.3.1 C2) When the first RTT measurement R is made, set
513 * SRTT <- R, RTTVAR <- R/2.
516 tp
->rttvar
= rtt
>> 1;
519 /* 6.3.1 G1) Whenever RTTVAR is computed, if RTTVAR = 0, then
520 * adjust RTTVAR <- G, where G is the CLOCK GRANULARITY.
523 tp
->rttvar
= SCTP_CLOCK_GRANULARITY
;
525 /* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */
526 tp
->rto
= tp
->srtt
+ (tp
->rttvar
<< 2);
528 /* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min
529 * seconds then it is rounded up to RTO.Min seconds.
531 if (tp
->rto
< tp
->asoc
->rto_min
)
532 tp
->rto
= tp
->asoc
->rto_min
;
534 /* 6.3.1 C7) A maximum value may be placed on RTO provided it is
535 * at least RTO.max seconds.
537 if (tp
->rto
> tp
->asoc
->rto_max
)
538 tp
->rto
= tp
->asoc
->rto_max
;
540 sctp_max_rto(tp
->asoc
, tp
);
543 /* Reset rto_pending so that a new RTT measurement is started when a
544 * new data chunk is sent.
548 pr_debug("%s: transport:%p, rtt:%d, srtt:%d rttvar:%d, rto:%ld\n",
549 __func__
, tp
, rtt
, tp
->srtt
, tp
->rttvar
, tp
->rto
);
552 /* This routine updates the transport's cwnd and partial_bytes_acked
553 * parameters based on the bytes acked in the received SACK.
555 void sctp_transport_raise_cwnd(struct sctp_transport
*transport
,
556 __u32 sack_ctsn
, __u32 bytes_acked
)
558 struct sctp_association
*asoc
= transport
->asoc
;
559 __u32 cwnd
, ssthresh
, flight_size
, pba
, pmtu
;
561 cwnd
= transport
->cwnd
;
562 flight_size
= transport
->flight_size
;
564 /* See if we need to exit Fast Recovery first */
565 if (asoc
->fast_recovery
&&
566 TSN_lte(asoc
->fast_recovery_exit
, sack_ctsn
))
567 asoc
->fast_recovery
= 0;
569 ssthresh
= transport
->ssthresh
;
570 pba
= transport
->partial_bytes_acked
;
571 pmtu
= transport
->asoc
->pathmtu
;
573 if (cwnd
<= ssthresh
) {
575 * o When cwnd is less than or equal to ssthresh, an SCTP
576 * endpoint MUST use the slow-start algorithm to increase
577 * cwnd only if the current congestion window is being fully
578 * utilized, an incoming SACK advances the Cumulative TSN
579 * Ack Point, and the data sender is not in Fast Recovery.
580 * Only when these three conditions are met can the cwnd be
581 * increased; otherwise, the cwnd MUST not be increased.
582 * If these conditions are met, then cwnd MUST be increased
583 * by, at most, the lesser of 1) the total size of the
584 * previously outstanding DATA chunk(s) acknowledged, and
585 * 2) the destination's path MTU. This upper bound protects
586 * against the ACK-Splitting attack outlined in [SAVAGE99].
588 if (asoc
->fast_recovery
)
591 /* The appropriate cwnd increase algorithm is performed
592 * if, and only if the congestion window is being fully
593 * utilized. Note that RFC4960 Errata 3.22 removed the
594 * other condition on ctsn moving.
596 if (flight_size
< cwnd
)
599 if (bytes_acked
> pmtu
)
604 pr_debug("%s: slow start: transport:%p, bytes_acked:%d, "
605 "cwnd:%d, ssthresh:%d, flight_size:%d, pba:%d\n",
606 __func__
, transport
, bytes_acked
, cwnd
, ssthresh
,
609 /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh,
610 * upon each SACK arrival, increase partial_bytes_acked
611 * by the total number of bytes of all new chunks
612 * acknowledged in that SACK including chunks
613 * acknowledged by the new Cumulative TSN Ack and by Gap
614 * Ack Blocks. (updated by RFC4960 Errata 3.22)
616 * When partial_bytes_acked is greater than cwnd and
617 * before the arrival of the SACK the sender had less
618 * bytes of data outstanding than cwnd (i.e., before
619 * arrival of the SACK, flightsize was less than cwnd),
620 * reset partial_bytes_acked to cwnd. (RFC 4960 Errata
623 * When partial_bytes_acked is equal to or greater than
624 * cwnd and before the arrival of the SACK the sender
625 * had cwnd or more bytes of data outstanding (i.e.,
626 * before arrival of the SACK, flightsize was greater
627 * than or equal to cwnd), partial_bytes_acked is reset
628 * to (partial_bytes_acked - cwnd). Next, cwnd is
629 * increased by MTU. (RFC 4960 Errata 3.12)
632 if (pba
> cwnd
&& flight_size
< cwnd
)
634 if (pba
>= cwnd
&& flight_size
>= cwnd
) {
639 pr_debug("%s: congestion avoidance: transport:%p, "
640 "bytes_acked:%d, cwnd:%d, ssthresh:%d, "
641 "flight_size:%d, pba:%d\n", __func__
,
642 transport
, bytes_acked
, cwnd
, ssthresh
,
646 transport
->cwnd
= cwnd
;
647 transport
->partial_bytes_acked
= pba
;
650 /* This routine is used to lower the transport's cwnd when congestion is
653 void sctp_transport_lower_cwnd(struct sctp_transport
*transport
,
654 enum sctp_lower_cwnd reason
)
656 struct sctp_association
*asoc
= transport
->asoc
;
659 case SCTP_LOWER_CWND_T3_RTX
:
660 /* RFC 2960 Section 7.2.3, sctpimpguide
661 * When the T3-rtx timer expires on an address, SCTP should
662 * perform slow start by:
663 * ssthresh = max(cwnd/2, 4*MTU)
665 * partial_bytes_acked = 0
667 transport
->ssthresh
= max(transport
->cwnd
/2,
669 transport
->cwnd
= asoc
->pathmtu
;
671 /* T3-rtx also clears fast recovery */
672 asoc
->fast_recovery
= 0;
675 case SCTP_LOWER_CWND_FAST_RTX
:
676 /* RFC 2960 7.2.4 Adjust the ssthresh and cwnd of the
677 * destination address(es) to which the missing DATA chunks
678 * were last sent, according to the formula described in
681 * RFC 2960 7.2.3, sctpimpguide Upon detection of packet
682 * losses from SACK (see Section 7.2.4), An endpoint
683 * should do the following:
684 * ssthresh = max(cwnd/2, 4*MTU)
686 * partial_bytes_acked = 0
688 if (asoc
->fast_recovery
)
691 /* Mark Fast recovery */
692 asoc
->fast_recovery
= 1;
693 asoc
->fast_recovery_exit
= asoc
->next_tsn
- 1;
695 transport
->ssthresh
= max(transport
->cwnd
/2,
697 transport
->cwnd
= transport
->ssthresh
;
700 case SCTP_LOWER_CWND_ECNE
:
701 /* RFC 2481 Section 6.1.2.
702 * If the sender receives an ECN-Echo ACK packet
703 * then the sender knows that congestion was encountered in the
704 * network on the path from the sender to the receiver. The
705 * indication of congestion should be treated just as a
706 * congestion loss in non-ECN Capable TCP. That is, the TCP
707 * source halves the congestion window "cwnd" and reduces the
708 * slow start threshold "ssthresh".
709 * A critical condition is that TCP does not react to
710 * congestion indications more than once every window of
711 * data (or more loosely more than once every round-trip time).
713 if (time_after(jiffies
, transport
->last_time_ecne_reduced
+
715 transport
->ssthresh
= max(transport
->cwnd
/2,
717 transport
->cwnd
= transport
->ssthresh
;
718 transport
->last_time_ecne_reduced
= jiffies
;
722 case SCTP_LOWER_CWND_INACTIVE
:
723 /* RFC 2960 Section 7.2.1, sctpimpguide
724 * When the endpoint does not transmit data on a given
725 * transport address, the cwnd of the transport address
726 * should be adjusted to max(cwnd/2, 4*MTU) per RTO.
727 * NOTE: Although the draft recommends that this check needs
728 * to be done every RTO interval, we do it every hearbeat
731 transport
->cwnd
= max(transport
->cwnd
/2,
733 /* RFC 4960 Errata 3.27.2: also adjust sshthresh */
734 transport
->ssthresh
= transport
->cwnd
;
738 transport
->partial_bytes_acked
= 0;
740 pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d\n",
741 __func__
, transport
, reason
, transport
->cwnd
,
742 transport
->ssthresh
);
745 /* Apply Max.Burst limit to the congestion window:
746 * sctpimpguide-05 2.14.2
747 * D) When the time comes for the sender to
748 * transmit new DATA chunks, the protocol parameter Max.Burst MUST
749 * first be applied to limit how many new DATA chunks may be sent.
750 * The limit is applied by adjusting cwnd as follows:
751 * if ((flightsize+ Max.Burst * MTU) < cwnd)
752 * cwnd = flightsize + Max.Burst * MTU
755 void sctp_transport_burst_limited(struct sctp_transport
*t
)
757 struct sctp_association
*asoc
= t
->asoc
;
758 u32 old_cwnd
= t
->cwnd
;
761 if (t
->burst_limited
|| asoc
->max_burst
== 0)
764 max_burst_bytes
= t
->flight_size
+ (asoc
->max_burst
* asoc
->pathmtu
);
765 if (max_burst_bytes
< old_cwnd
) {
766 t
->cwnd
= max_burst_bytes
;
767 t
->burst_limited
= old_cwnd
;
771 /* Restore the old cwnd congestion window, after the burst had it's
774 void sctp_transport_burst_reset(struct sctp_transport
*t
)
776 if (t
->burst_limited
) {
777 t
->cwnd
= t
->burst_limited
;
778 t
->burst_limited
= 0;
782 /* What is the next timeout value for this transport? */
783 unsigned long sctp_transport_timeout(struct sctp_transport
*trans
)
785 /* RTO + timer slack +/- 50% of RTO */
786 unsigned long timeout
= trans
->rto
>> 1;
788 if (trans
->state
!= SCTP_UNCONFIRMED
&&
789 trans
->state
!= SCTP_PF
)
790 timeout
+= trans
->hbinterval
;
792 return max_t(unsigned long, timeout
, HZ
/ 5);
795 /* Reset transport variables to their initial values */
796 void sctp_transport_reset(struct sctp_transport
*t
)
798 struct sctp_association
*asoc
= t
->asoc
;
800 /* RFC 2960 (bis), Section 5.2.4
801 * All the congestion control parameters (e.g., cwnd, ssthresh)
802 * related to this peer MUST be reset to their initial values
803 * (see Section 6.2.1)
805 t
->cwnd
= min(4*asoc
->pathmtu
, max_t(__u32
, 2*asoc
->pathmtu
, 4380));
806 t
->burst_limited
= 0;
807 t
->ssthresh
= asoc
->peer
.i
.a_rwnd
;
808 t
->rto
= asoc
->rto_initial
;
809 sctp_max_rto(asoc
, t
);
814 /* Reset these additional variables so that we have a clean slate. */
815 t
->partial_bytes_acked
= 0;
821 /* Initialize the state information for SFR-CACC */
822 t
->cacc
.changeover_active
= 0;
823 t
->cacc
.cycling_changeover
= 0;
824 t
->cacc
.next_tsn_at_change
= 0;
825 t
->cacc
.cacc_saw_newack
= 0;
828 /* Schedule retransmission on the given transport */
829 void sctp_transport_immediate_rtx(struct sctp_transport
*t
)
831 /* Stop pending T3_rtx_timer */
832 if (del_timer(&t
->T3_rtx_timer
))
833 sctp_transport_put(t
);
835 sctp_retransmit(&t
->asoc
->outqueue
, t
, SCTP_RTXR_T3_RTX
);
836 if (!timer_pending(&t
->T3_rtx_timer
)) {
837 if (!mod_timer(&t
->T3_rtx_timer
, jiffies
+ t
->rto
))
838 sctp_transport_hold(t
);
843 void sctp_transport_dst_release(struct sctp_transport
*t
)
847 t
->dst_pending_confirm
= 0;
850 /* Schedule neighbour confirm */
851 void sctp_transport_dst_confirm(struct sctp_transport
*t
)
853 t
->dst_pending_confirm
= 1;