create cor_dev
[cor.git] / net / cor / neigh_congwin.c
blob25213fca95f719ea8d497be52322ee55932e5159
1 /**
2 * Connection oriented routing
3 * Copyright (C) 2007-2023 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
16 #include "cor.h"
18 /**
19 * neighbor congestion window:
20 * increment by 4096 every round trip if more that 2/3 of cwin is used
22 * in case of packet loss decrease by 1/4:
23 * - <= 1/8 immediately and
24 * - <= 1/4 during the next round trip
26 * in case of multiple packet loss events, do not decrement more than once per
27 * round trip
29 * when acks are received, do not free the entire window at once to avoid send
30 * bursts
33 void cor_nbcongwin_data_retransmitted(struct cor_neighbor *nb,
34 __u64 bytes_sent)
36 __u64 min_cwin = cor_mss_conndata(nb, 0) * 2 << NBCONGWIN_SHIFT;
37 __u64 cwin;
39 unsigned long iflags;
41 spin_lock_irqsave(&nb->nbcongwin.lock, iflags);
43 #ifdef FIXED_NBCONGWIN
45 atomic64_set(&nb->nbcongwin.cwin, FIXED_NBCONGWIN << NBCONGWIN_SHIFT);
47 #else
49 cwin = atomic64_read(&nb->nbcongwin.cwin);
51 /* printk(KERN_ERR "retrans %llu %llu\n", cwin >> NBCONGWIN_SHIFT,
52 get_bufspace_used());
53 print_conn_bufstats(nb); */
55 BUG_ON(nb->nbcongwin.cwin_shrinkto > cwin);
57 if (nb->nbcongwin.cwin_shrinkto == cwin) {
58 cwin = max(min_cwin, cwin - cwin / 16);
59 atomic64_set(&nb->nbcongwin.cwin, cwin);
62 nb->nbcongwin.cwin_shrinkto = max(min_cwin, cwin - cwin / 16);
64 #endif
66 spin_unlock_irqrestore(&nb->nbcongwin.lock, iflags);
69 static void cor_nbcongwin_resume(struct cor_neighbor *nb_cwlocked)
71 struct cor_dev *cd = nb_cwlocked->cd;
73 __u64 data_intransit = atomic64_read(
74 &nb_cwlocked->nbcongwin.data_intransit);
75 __u64 cwin = atomic64_read(&nb_cwlocked->nbcongwin.cwin);
77 if (data_intransit >= cwin >> NBCONGWIN_SHIFT)
78 return;
80 spin_lock(&cd->send_queue.qlock);
81 if (nb_cwlocked->rb.in_queue == RB_INQUEUE_NBCONGWIN) {
82 if (nb_cwlocked->conns_waiting.cnt == 0) {
83 nb_cwlocked->rb.in_queue = RB_INQUEUE_FALSE;
84 } else {
85 _cor_dev_queue_enqueue(cd, &nb_cwlocked->rb, 0, ns_to_ktime(0),
86 QOS_CALLER_NEIGHBOR, 1, 0);
89 spin_unlock(&cd->send_queue.qlock);
92 static void cor_nbcongwin_sched_timer(struct cor_neighbor *nb_cwlocked)
94 if (unlikely(cor_get_neigh_state(nb_cwlocked) == NEIGHBOR_STATE_KILLED))
95 return;
97 hrtimer_start(&nb_cwlocked->nbcongwin.acked_timer, ms_to_ktime(3),
98 HRTIMER_MODE_REL);
101 static int cor_nbcongwin_update_intransit(struct cor_neighbor *nb_cwlocked,
102 ktime_t now)
104 __u64 bytes;
106 __s64 time_passed = ktime_to_us(ktime_sub(now,
107 nb_cwlocked->nbcongwin.acked_refresh_time));
108 __s64 time_needed = ktime_to_us(ktime_sub(
109 nb_cwlocked->nbcongwin.acked_timeout_time,
110 nb_cwlocked->nbcongwin.acked_refresh_time));
112 if (time_passed < 0 || time_needed <= 0 || time_passed >= time_needed) {
113 bytes = nb_cwlocked->nbcongwin.data_acked;
114 nb_cwlocked->nbcongwin.acked_timeout_time = now;
115 } else if (unlikely(nb_cwlocked->nbcongwin.data_acked >= (1LL << 40))) {
116 bytes = ((__u64) time_passed) * div64_u64(
117 nb_cwlocked->nbcongwin.data_acked,
118 (__u64) time_needed);
119 } else {
120 while (time_passed >= (1LL << 24)) {
121 time_needed = time_needed >> 1;
122 time_passed = time_passed >> 1;
124 bytes = div64_u64(nb_cwlocked->nbcongwin.data_acked *
125 (__u64) time_passed, (__u64) time_needed);
128 if (bytes == 0)
129 return 0;
131 nb_cwlocked->nbcongwin.acked_refresh_time = now;
132 nb_cwlocked->nbcongwin.data_acked -= bytes;
134 /* printk(KERN_ERR "ack delayed %llu %llu\n",
135 atomic64_read(&nb_cwlocked->nbcongwin.data_intransit),
136 bytes); */
138 cor_update_atomic_sum(&nb_cwlocked->nbcongwin.data_intransit, bytes, 0);
140 return 1;
143 enum hrtimer_restart cor_nbcongwin_hrtimerfunc(struct hrtimer *nb_congwin_timer)
145 struct cor_neighbor *nb = container_of(nb_congwin_timer,
146 struct cor_neighbor,
147 nbcongwin.acked_timer);
148 unsigned long iflags;
149 int resume;
151 spin_lock_irqsave(&nb->nbcongwin.lock, iflags);
153 resume = cor_nbcongwin_update_intransit(nb, ktime_get());
155 if (nb->nbcongwin.data_acked > 0)
156 cor_nbcongwin_sched_timer(nb);
158 if (resume)
159 cor_nbcongwin_resume(nb);
161 spin_unlock_irqrestore(&nb->nbcongwin.lock, iflags);
163 return HRTIMER_NORESTART;
166 static void cor_nbcongwin_update_cwin(struct cor_neighbor *nb_cwlocked,
167 __u64 bytes_acked)
169 #ifdef FIXED_NBCONGWIN
171 atomic64_set(&nb_cwlocked->nbcongwin.cwin,
172 FIXED_NBCONGWIN << NBCONGWIN_SHIFT);
174 #else
176 __u64 data_intransit = atomic64_read(
177 &nb_cwlocked->nbcongwin.data_intransit);
178 __u64 cwin = atomic64_read(&nb_cwlocked->nbcongwin.cwin);
179 __u64 CWIN_MUL = (1 << NBCONGWIN_SHIFT);
180 __u32 INCR_PER_RTT = 8192;
182 __u64 cwin_tmp;
183 __u64 incrby;
185 if (nb_cwlocked->nbcongwin.cwin_shrinkto < cwin) {
186 __u64 shrinkby = (bytes_acked << (NBCONGWIN_SHIFT - 2));
188 BUILD_BUG_ON(NBCONGWIN_SHIFT < 2);
190 if (unlikely(shrinkby > cwin))
191 cwin = 0;
192 else
193 cwin -= shrinkby;
195 if (cwin < nb_cwlocked->nbcongwin.cwin_shrinkto)
196 cwin = nb_cwlocked->nbcongwin.cwin_shrinkto;
200 if (cwin * 2 > data_intransit * CWIN_MUL * 3)
201 goto out;
203 cwin_tmp = max(cwin, bytes_acked << NBCONGWIN_SHIFT);
205 if (unlikely(bytes_acked >= U64_MAX / INCR_PER_RTT / CWIN_MUL))
206 incrby = div64_u64(bytes_acked * INCR_PER_RTT,
207 cwin_tmp / CWIN_MUL / CWIN_MUL);
208 else if (unlikely(bytes_acked >=
209 U64_MAX / INCR_PER_RTT / CWIN_MUL / CWIN_MUL))
210 incrby = div64_u64(bytes_acked * INCR_PER_RTT * CWIN_MUL,
211 cwin_tmp / CWIN_MUL);
212 else
213 incrby = div64_u64(bytes_acked * INCR_PER_RTT * CWIN_MUL *
214 CWIN_MUL, cwin_tmp);
216 BUG_ON(incrby > INCR_PER_RTT * CWIN_MUL);
218 if (unlikely(cwin + incrby < cwin))
219 cwin = U64_MAX;
220 else
221 cwin += incrby;
223 if (unlikely(nb_cwlocked->nbcongwin.cwin_shrinkto + incrby <
224 nb_cwlocked->nbcongwin.cwin_shrinkto))
225 nb_cwlocked->nbcongwin.cwin_shrinkto = U64_MAX;
226 else
227 nb_cwlocked->nbcongwin.cwin_shrinkto += incrby;
229 out:
230 atomic64_set(&nb_cwlocked->nbcongwin.cwin, cwin);
231 #endif
234 void cor_nbcongwin_data_acked(struct cor_neighbor *nb, __u64 bytes_acked)
236 unsigned long iflags;
237 ktime_t now;
238 __u64 delay_us = atomic_read(&nb->max_remote_ackconn_delay_us);
239 __u64 ack_now;
240 __u64 ack_delayed;
242 if (delay_us == 0) {
243 ack_now = bytes_acked;
244 } else {
245 __u32 mss = cor_mss_conndata(nb, 1);
246 ack_now = mss + bytes_acked / 8;
247 if (ack_now > bytes_acked)
248 ack_now = bytes_acked;
250 ack_delayed = bytes_acked - ack_now;
252 spin_lock_irqsave(&nb->nbcongwin.lock, iflags);
254 now = ktime_get();
256 cor_nbcongwin_update_intransit(nb, now);
258 /* printk(KERN_ERR "ack now %llu %llu %llu\n",
259 atomic64_read(&nb->nbcongwin.data_intransit),
260 ack_now, ack_delayed); */
262 cor_nbcongwin_update_cwin(nb, bytes_acked);
264 cor_update_atomic_sum(&nb->nbcongwin.data_intransit, ack_now, 0);
266 if (ack_delayed != 0) {
267 nb->nbcongwin.acked_refresh_time = now;
268 nb->nbcongwin.acked_timeout_time =
269 ktime_add_us(now, delay_us);
270 nb->nbcongwin.data_acked += ack_delayed;
271 if (nb->nbcongwin.data_acked == ack_delayed)
272 cor_nbcongwin_sched_timer(nb);
275 cor_nbcongwin_resume(nb);
277 spin_unlock_irqrestore(&nb->nbcongwin.lock, iflags);
280 void cor_nbcongwin_data_sent(struct cor_neighbor *nb, __u32 bytes_sent)
282 atomic64_add(bytes_sent, &nb->nbcongwin.data_intransit);
285 int cor_nbcongwin_send_allowed(struct cor_neighbor *nb)
287 unsigned long iflags;
288 int ret = 1;
289 struct cor_dev *cd = nb->cd;
290 int krefput_cd = 0;
292 #ifdef COR_NBCONGWIN
293 if (atomic64_read(&nb->nbcongwin.data_intransit) <=
294 atomic64_read(&nb->nbcongwin.cwin) >> NBCONGWIN_SHIFT)
295 return 1;
297 spin_lock_irqsave(&nb->nbcongwin.lock, iflags);
299 if (atomic64_read(&nb->nbcongwin.data_intransit) <=
300 atomic64_read(&nb->nbcongwin.cwin) >> NBCONGWIN_SHIFT)
301 goto out_ok;
303 ret = 0;
305 spin_lock(&cd->send_queue.qlock);
306 if (nb->rb.in_queue == RB_INQUEUE_FALSE) {
307 nb->rb.in_queue = RB_INQUEUE_NBCONGWIN;
308 } else if (nb->rb.in_queue == RB_INQUEUE_TRUE) {
309 list_del(&nb->rb.lh);
310 cor_nb_kref_put_bug(nb, "qos_queue_nb");
311 nb->rb.in_queue = RB_INQUEUE_NBCONGWIN;
312 BUG_ON(nb->conns_waiting.cnt > cd->send_queue.numconns);
313 cd->send_queue.numconns -= nb->conns_waiting.cnt;
314 BUG_ON(nb->conns_waiting.priority_sum > cd->send_queue.priority_sum);
315 cd->send_queue.priority_sum -= nb->conns_waiting.priority_sum;
317 if (list_empty(&cd->send_queue.neighbors_waiting) &&
318 list_empty(&cd->send_queue.neighbors_waiting_nextpass)) {
319 BUG_ON(cd->send_queue.numconns != 0);
320 BUG_ON(cd->send_queue.priority_sum != 0);
323 krefput_cd = 1;
325 cor_dev_queue_set_congstatus(cd);
326 } else if (nb->rb.in_queue == RB_INQUEUE_NBCONGWIN) {
327 } else {
328 BUG();
330 spin_unlock(&cd->send_queue.qlock);
332 if (krefput_cd != 0)
333 kref_put(&cd->ref, cor_dev_free);
335 out_ok:
336 spin_unlock_irqrestore(&nb->nbcongwin.lock, iflags);
338 return ret;
339 #else
340 return 1;
341 #endif