2 * Connection oriented routing
3 * Copyright (C) 2007-2023 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
19 * neighbor congestion window:
20 * increment by 4096 every round trip if more that 2/3 of cwin is used
22 * in case of packet loss decrease by 1/4:
23 * - <= 1/8 immediately and
24 * - <= 1/4 during the next round trip
26 * in case of multiple packet loss events, do not decrement more than once per
29 * when acks are received, do not free the entire window at once to avoid send
33 void cor_nbcongwin_data_retransmitted(struct cor_neighbor
*nb
,
36 __u64 min_cwin
= cor_mss_conndata(nb
, 0) * 2 << NBCONGWIN_SHIFT
;
41 spin_lock_irqsave(&nb
->nbcongwin
.lock
, iflags
);
43 #ifdef FIXED_NBCONGWIN
45 atomic64_set(&nb
->nbcongwin
.cwin
, FIXED_NBCONGWIN
<< NBCONGWIN_SHIFT
);
49 cwin
= atomic64_read(&nb
->nbcongwin
.cwin
);
51 /* printk(KERN_ERR "retrans %llu %llu\n", cwin >> NBCONGWIN_SHIFT,
53 print_conn_bufstats(nb); */
55 BUG_ON(nb
->nbcongwin
.cwin_shrinkto
> cwin
);
57 if (nb
->nbcongwin
.cwin_shrinkto
== cwin
) {
58 cwin
= max(min_cwin
, cwin
- cwin
/ 16);
59 atomic64_set(&nb
->nbcongwin
.cwin
, cwin
);
62 nb
->nbcongwin
.cwin_shrinkto
= max(min_cwin
, cwin
- cwin
/ 16);
66 spin_unlock_irqrestore(&nb
->nbcongwin
.lock
, iflags
);
69 static void cor_nbcongwin_resume(struct cor_neighbor
*nb_cwlocked
)
71 struct cor_dev
*cd
= nb_cwlocked
->cd
;
73 __u64 data_intransit
= atomic64_read(
74 &nb_cwlocked
->nbcongwin
.data_intransit
);
75 __u64 cwin
= atomic64_read(&nb_cwlocked
->nbcongwin
.cwin
);
77 if (data_intransit
>= cwin
>> NBCONGWIN_SHIFT
)
80 spin_lock(&cd
->send_queue
.qlock
);
81 if (nb_cwlocked
->rb
.in_queue
== RB_INQUEUE_NBCONGWIN
) {
82 if (nb_cwlocked
->conns_waiting
.cnt
== 0) {
83 nb_cwlocked
->rb
.in_queue
= RB_INQUEUE_FALSE
;
85 _cor_dev_queue_enqueue(cd
, &nb_cwlocked
->rb
, 0, ns_to_ktime(0),
86 QOS_CALLER_NEIGHBOR
, 1, 0);
89 spin_unlock(&cd
->send_queue
.qlock
);
92 static void cor_nbcongwin_sched_timer(struct cor_neighbor
*nb_cwlocked
)
94 if (unlikely(cor_get_neigh_state(nb_cwlocked
) == NEIGHBOR_STATE_KILLED
))
97 hrtimer_start(&nb_cwlocked
->nbcongwin
.acked_timer
, ms_to_ktime(3),
101 static int cor_nbcongwin_update_intransit(struct cor_neighbor
*nb_cwlocked
,
106 __s64 time_passed
= ktime_to_us(ktime_sub(now
,
107 nb_cwlocked
->nbcongwin
.acked_refresh_time
));
108 __s64 time_needed
= ktime_to_us(ktime_sub(
109 nb_cwlocked
->nbcongwin
.acked_timeout_time
,
110 nb_cwlocked
->nbcongwin
.acked_refresh_time
));
112 if (time_passed
< 0 || time_needed
<= 0 || time_passed
>= time_needed
) {
113 bytes
= nb_cwlocked
->nbcongwin
.data_acked
;
114 nb_cwlocked
->nbcongwin
.acked_timeout_time
= now
;
115 } else if (unlikely(nb_cwlocked
->nbcongwin
.data_acked
>= (1LL << 40))) {
116 bytes
= ((__u64
) time_passed
) * div64_u64(
117 nb_cwlocked
->nbcongwin
.data_acked
,
118 (__u64
) time_needed
);
120 while (time_passed
>= (1LL << 24)) {
121 time_needed
= time_needed
>> 1;
122 time_passed
= time_passed
>> 1;
124 bytes
= div64_u64(nb_cwlocked
->nbcongwin
.data_acked
*
125 (__u64
) time_passed
, (__u64
) time_needed
);
131 nb_cwlocked
->nbcongwin
.acked_refresh_time
= now
;
132 nb_cwlocked
->nbcongwin
.data_acked
-= bytes
;
134 /* printk(KERN_ERR "ack delayed %llu %llu\n",
135 atomic64_read(&nb_cwlocked->nbcongwin.data_intransit),
138 cor_update_atomic_sum(&nb_cwlocked
->nbcongwin
.data_intransit
, bytes
, 0);
143 enum hrtimer_restart
cor_nbcongwin_hrtimerfunc(struct hrtimer
*nb_congwin_timer
)
145 struct cor_neighbor
*nb
= container_of(nb_congwin_timer
,
147 nbcongwin
.acked_timer
);
148 unsigned long iflags
;
151 spin_lock_irqsave(&nb
->nbcongwin
.lock
, iflags
);
153 resume
= cor_nbcongwin_update_intransit(nb
, ktime_get());
155 if (nb
->nbcongwin
.data_acked
> 0)
156 cor_nbcongwin_sched_timer(nb
);
159 cor_nbcongwin_resume(nb
);
161 spin_unlock_irqrestore(&nb
->nbcongwin
.lock
, iflags
);
163 return HRTIMER_NORESTART
;
166 static void cor_nbcongwin_update_cwin(struct cor_neighbor
*nb_cwlocked
,
169 #ifdef FIXED_NBCONGWIN
171 atomic64_set(&nb_cwlocked
->nbcongwin
.cwin
,
172 FIXED_NBCONGWIN
<< NBCONGWIN_SHIFT
);
176 __u64 data_intransit
= atomic64_read(
177 &nb_cwlocked
->nbcongwin
.data_intransit
);
178 __u64 cwin
= atomic64_read(&nb_cwlocked
->nbcongwin
.cwin
);
179 __u64 CWIN_MUL
= (1 << NBCONGWIN_SHIFT
);
180 __u32 INCR_PER_RTT
= 8192;
185 if (nb_cwlocked
->nbcongwin
.cwin_shrinkto
< cwin
) {
186 __u64 shrinkby
= (bytes_acked
<< (NBCONGWIN_SHIFT
- 2));
188 BUILD_BUG_ON(NBCONGWIN_SHIFT
< 2);
190 if (unlikely(shrinkby
> cwin
))
195 if (cwin
< nb_cwlocked
->nbcongwin
.cwin_shrinkto
)
196 cwin
= nb_cwlocked
->nbcongwin
.cwin_shrinkto
;
200 if (cwin
* 2 > data_intransit
* CWIN_MUL
* 3)
203 cwin_tmp
= max(cwin
, bytes_acked
<< NBCONGWIN_SHIFT
);
205 if (unlikely(bytes_acked
>= U64_MAX
/ INCR_PER_RTT
/ CWIN_MUL
))
206 incrby
= div64_u64(bytes_acked
* INCR_PER_RTT
,
207 cwin_tmp
/ CWIN_MUL
/ CWIN_MUL
);
208 else if (unlikely(bytes_acked
>=
209 U64_MAX
/ INCR_PER_RTT
/ CWIN_MUL
/ CWIN_MUL
))
210 incrby
= div64_u64(bytes_acked
* INCR_PER_RTT
* CWIN_MUL
,
211 cwin_tmp
/ CWIN_MUL
);
213 incrby
= div64_u64(bytes_acked
* INCR_PER_RTT
* CWIN_MUL
*
216 BUG_ON(incrby
> INCR_PER_RTT
* CWIN_MUL
);
218 if (unlikely(cwin
+ incrby
< cwin
))
223 if (unlikely(nb_cwlocked
->nbcongwin
.cwin_shrinkto
+ incrby
<
224 nb_cwlocked
->nbcongwin
.cwin_shrinkto
))
225 nb_cwlocked
->nbcongwin
.cwin_shrinkto
= U64_MAX
;
227 nb_cwlocked
->nbcongwin
.cwin_shrinkto
+= incrby
;
230 atomic64_set(&nb_cwlocked
->nbcongwin
.cwin
, cwin
);
234 void cor_nbcongwin_data_acked(struct cor_neighbor
*nb
, __u64 bytes_acked
)
236 unsigned long iflags
;
238 __u64 delay_us
= atomic_read(&nb
->max_remote_ackconn_delay_us
);
243 ack_now
= bytes_acked
;
245 __u32 mss
= cor_mss_conndata(nb
, 1);
246 ack_now
= mss
+ bytes_acked
/ 8;
247 if (ack_now
> bytes_acked
)
248 ack_now
= bytes_acked
;
250 ack_delayed
= bytes_acked
- ack_now
;
252 spin_lock_irqsave(&nb
->nbcongwin
.lock
, iflags
);
256 cor_nbcongwin_update_intransit(nb
, now
);
258 /* printk(KERN_ERR "ack now %llu %llu %llu\n",
259 atomic64_read(&nb->nbcongwin.data_intransit),
260 ack_now, ack_delayed); */
262 cor_nbcongwin_update_cwin(nb
, bytes_acked
);
264 cor_update_atomic_sum(&nb
->nbcongwin
.data_intransit
, ack_now
, 0);
266 if (ack_delayed
!= 0) {
267 nb
->nbcongwin
.acked_refresh_time
= now
;
268 nb
->nbcongwin
.acked_timeout_time
=
269 ktime_add_us(now
, delay_us
);
270 nb
->nbcongwin
.data_acked
+= ack_delayed
;
271 if (nb
->nbcongwin
.data_acked
== ack_delayed
)
272 cor_nbcongwin_sched_timer(nb
);
275 cor_nbcongwin_resume(nb
);
277 spin_unlock_irqrestore(&nb
->nbcongwin
.lock
, iflags
);
280 void cor_nbcongwin_data_sent(struct cor_neighbor
*nb
, __u32 bytes_sent
)
282 atomic64_add(bytes_sent
, &nb
->nbcongwin
.data_intransit
);
285 int cor_nbcongwin_send_allowed(struct cor_neighbor
*nb
)
287 unsigned long iflags
;
289 struct cor_dev
*cd
= nb
->cd
;
293 if (atomic64_read(&nb
->nbcongwin
.data_intransit
) <=
294 atomic64_read(&nb
->nbcongwin
.cwin
) >> NBCONGWIN_SHIFT
)
297 spin_lock_irqsave(&nb
->nbcongwin
.lock
, iflags
);
299 if (atomic64_read(&nb
->nbcongwin
.data_intransit
) <=
300 atomic64_read(&nb
->nbcongwin
.cwin
) >> NBCONGWIN_SHIFT
)
305 spin_lock(&cd
->send_queue
.qlock
);
306 if (nb
->rb
.in_queue
== RB_INQUEUE_FALSE
) {
307 nb
->rb
.in_queue
= RB_INQUEUE_NBCONGWIN
;
308 } else if (nb
->rb
.in_queue
== RB_INQUEUE_TRUE
) {
309 list_del(&nb
->rb
.lh
);
310 cor_nb_kref_put_bug(nb
, "qos_queue_nb");
311 nb
->rb
.in_queue
= RB_INQUEUE_NBCONGWIN
;
312 BUG_ON(nb
->conns_waiting
.cnt
> cd
->send_queue
.numconns
);
313 cd
->send_queue
.numconns
-= nb
->conns_waiting
.cnt
;
314 BUG_ON(nb
->conns_waiting
.priority_sum
> cd
->send_queue
.priority_sum
);
315 cd
->send_queue
.priority_sum
-= nb
->conns_waiting
.priority_sum
;
317 if (list_empty(&cd
->send_queue
.neighbors_waiting
) &&
318 list_empty(&cd
->send_queue
.neighbors_waiting_nextpass
)) {
319 BUG_ON(cd
->send_queue
.numconns
!= 0);
320 BUG_ON(cd
->send_queue
.priority_sum
!= 0);
325 cor_dev_queue_set_congstatus(cd
);
326 } else if (nb
->rb
.in_queue
== RB_INQUEUE_NBCONGWIN
) {
330 spin_unlock(&cd
->send_queue
.qlock
);
333 kref_put(&cd
->ref
, cor_dev_free
);
336 spin_unlock_irqrestore(&nb
->nbcongwin
.lock
, iflags
);