2 * net/sched/sch_red.c Random Early Detection queue.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * J Hadi Salim 980914: computation fixes
13 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14 * J Hadi Salim 980816: ECN support
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
27 /* Parameters, settable by user:
28 -----------------------------
30 limit - bytes (must be > qth_max + burst)
32 Hard limit on queue length, should be chosen >qth_max
33 to allow packet bursts. This parameter does not
34 affect the algorithms behaviour and can be chosen
35 arbitrarily high (well, less than ram size)
36 Really, this limit will never be reached
37 if RED works correctly.
40 struct red_sched_data
{
41 u32 limit
; /* HARD maximal queue length */
43 struct timer_list adapt_timer
;
45 struct red_parms parms
;
47 struct red_stats stats
;
51 static inline int red_use_ecn(struct red_sched_data
*q
)
53 return q
->flags
& TC_RED_ECN
;
56 static inline int red_use_harddrop(struct red_sched_data
*q
)
58 return q
->flags
& TC_RED_HARDDROP
;
61 static int red_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
,
62 struct sk_buff
**to_free
)
64 struct red_sched_data
*q
= qdisc_priv(sch
);
65 struct Qdisc
*child
= q
->qdisc
;
68 q
->vars
.qavg
= red_calc_qavg(&q
->parms
,
70 child
->qstats
.backlog
);
72 if (red_is_idling(&q
->vars
))
73 red_end_of_idle_period(&q
->vars
);
75 switch (red_action(&q
->parms
, &q
->vars
, q
->vars
.qavg
)) {
80 qdisc_qstats_overlimit(sch
);
81 if (!red_use_ecn(q
) || !INET_ECN_set_ce(skb
)) {
90 qdisc_qstats_overlimit(sch
);
91 if (red_use_harddrop(q
) || !red_use_ecn(q
) ||
92 !INET_ECN_set_ce(skb
)) {
93 q
->stats
.forced_drop
++;
97 q
->stats
.forced_mark
++;
101 ret
= qdisc_enqueue(skb
, child
, to_free
);
102 if (likely(ret
== NET_XMIT_SUCCESS
)) {
103 qdisc_qstats_backlog_inc(sch
, skb
);
105 } else if (net_xmit_drop_count(ret
)) {
107 qdisc_qstats_drop(sch
);
112 qdisc_drop(skb
, sch
, to_free
);
116 static struct sk_buff
*red_dequeue(struct Qdisc
*sch
)
119 struct red_sched_data
*q
= qdisc_priv(sch
);
120 struct Qdisc
*child
= q
->qdisc
;
122 skb
= child
->dequeue(child
);
124 qdisc_bstats_update(sch
, skb
);
125 qdisc_qstats_backlog_dec(sch
, skb
);
128 if (!red_is_idling(&q
->vars
))
129 red_start_of_idle_period(&q
->vars
);
134 static struct sk_buff
*red_peek(struct Qdisc
*sch
)
136 struct red_sched_data
*q
= qdisc_priv(sch
);
137 struct Qdisc
*child
= q
->qdisc
;
139 return child
->ops
->peek(child
);
142 static void red_reset(struct Qdisc
*sch
)
144 struct red_sched_data
*q
= qdisc_priv(sch
);
146 qdisc_reset(q
->qdisc
);
147 sch
->qstats
.backlog
= 0;
149 red_restart(&q
->vars
);
152 static int red_offload(struct Qdisc
*sch
, bool enable
)
154 struct red_sched_data
*q
= qdisc_priv(sch
);
155 struct net_device
*dev
= qdisc_dev(sch
);
156 struct tc_red_qopt_offload opt
= {
157 .handle
= sch
->handle
,
158 .parent
= sch
->parent
,
161 if (!tc_can_offload(dev
) || !dev
->netdev_ops
->ndo_setup_tc
)
165 opt
.command
= TC_RED_REPLACE
;
166 opt
.set
.min
= q
->parms
.qth_min
>> q
->parms
.Wlog
;
167 opt
.set
.max
= q
->parms
.qth_max
>> q
->parms
.Wlog
;
168 opt
.set
.probability
= q
->parms
.max_P
;
169 opt
.set
.is_ecn
= red_use_ecn(q
);
170 opt
.set
.qstats
= &sch
->qstats
;
172 opt
.command
= TC_RED_DESTROY
;
175 return dev
->netdev_ops
->ndo_setup_tc(dev
, TC_SETUP_QDISC_RED
, &opt
);
178 static void red_destroy(struct Qdisc
*sch
)
180 struct red_sched_data
*q
= qdisc_priv(sch
);
182 del_timer_sync(&q
->adapt_timer
);
183 red_offload(sch
, false);
187 static const struct nla_policy red_policy
[TCA_RED_MAX
+ 1] = {
188 [TCA_RED_PARMS
] = { .len
= sizeof(struct tc_red_qopt
) },
189 [TCA_RED_STAB
] = { .len
= RED_STAB_SIZE
},
190 [TCA_RED_MAX_P
] = { .type
= NLA_U32
},
193 static int red_change(struct Qdisc
*sch
, struct nlattr
*opt
,
194 struct netlink_ext_ack
*extack
)
196 struct red_sched_data
*q
= qdisc_priv(sch
);
197 struct nlattr
*tb
[TCA_RED_MAX
+ 1];
198 struct tc_red_qopt
*ctl
;
199 struct Qdisc
*child
= NULL
;
206 err
= nla_parse_nested(tb
, TCA_RED_MAX
, opt
, red_policy
, NULL
);
210 if (tb
[TCA_RED_PARMS
] == NULL
||
211 tb
[TCA_RED_STAB
] == NULL
)
214 max_P
= tb
[TCA_RED_MAX_P
] ? nla_get_u32(tb
[TCA_RED_MAX_P
]) : 0;
216 ctl
= nla_data(tb
[TCA_RED_PARMS
]);
217 if (!red_check_params(ctl
->qth_min
, ctl
->qth_max
, ctl
->Wlog
))
220 if (ctl
->limit
> 0) {
221 child
= fifo_create_dflt(sch
, &bfifo_qdisc_ops
, ctl
->limit
,
224 return PTR_ERR(child
);
226 /* child is fifo, no need to check for noop_qdisc */
227 qdisc_hash_add(child
, true);
231 q
->flags
= ctl
->flags
;
232 q
->limit
= ctl
->limit
;
234 qdisc_tree_reduce_backlog(q
->qdisc
, q
->qdisc
->q
.qlen
,
235 q
->qdisc
->qstats
.backlog
);
240 red_set_parms(&q
->parms
,
241 ctl
->qth_min
, ctl
->qth_max
, ctl
->Wlog
,
242 ctl
->Plog
, ctl
->Scell_log
,
243 nla_data(tb
[TCA_RED_STAB
]),
245 red_set_vars(&q
->vars
);
247 del_timer(&q
->adapt_timer
);
248 if (ctl
->flags
& TC_RED_ADAPTATIVE
)
249 mod_timer(&q
->adapt_timer
, jiffies
+ HZ
/2);
251 if (!q
->qdisc
->q
.qlen
)
252 red_start_of_idle_period(&q
->vars
);
254 sch_tree_unlock(sch
);
255 red_offload(sch
, true);
259 static inline void red_adaptative_timer(struct timer_list
*t
)
261 struct red_sched_data
*q
= from_timer(q
, t
, adapt_timer
);
262 struct Qdisc
*sch
= q
->sch
;
263 spinlock_t
*root_lock
= qdisc_lock(qdisc_root_sleeping(sch
));
265 spin_lock(root_lock
);
266 red_adaptative_algo(&q
->parms
, &q
->vars
);
267 mod_timer(&q
->adapt_timer
, jiffies
+ HZ
/2);
268 spin_unlock(root_lock
);
271 static int red_init(struct Qdisc
*sch
, struct nlattr
*opt
,
272 struct netlink_ext_ack
*extack
)
274 struct red_sched_data
*q
= qdisc_priv(sch
);
276 q
->qdisc
= &noop_qdisc
;
278 timer_setup(&q
->adapt_timer
, red_adaptative_timer
, 0);
279 return red_change(sch
, opt
, extack
);
282 static int red_dump_offload_stats(struct Qdisc
*sch
, struct tc_red_qopt
*opt
)
284 struct net_device
*dev
= qdisc_dev(sch
);
285 struct tc_red_qopt_offload hw_stats
= {
286 .command
= TC_RED_STATS
,
287 .handle
= sch
->handle
,
288 .parent
= sch
->parent
,
290 .stats
.bstats
= &sch
->bstats
,
291 .stats
.qstats
= &sch
->qstats
,
296 sch
->flags
&= ~TCQ_F_OFFLOADED
;
298 if (!tc_can_offload(dev
) || !dev
->netdev_ops
->ndo_setup_tc
)
301 err
= dev
->netdev_ops
->ndo_setup_tc(dev
, TC_SETUP_QDISC_RED
,
303 if (err
== -EOPNOTSUPP
)
307 sch
->flags
|= TCQ_F_OFFLOADED
;
312 static int red_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
314 struct red_sched_data
*q
= qdisc_priv(sch
);
315 struct nlattr
*opts
= NULL
;
316 struct tc_red_qopt opt
= {
319 .qth_min
= q
->parms
.qth_min
>> q
->parms
.Wlog
,
320 .qth_max
= q
->parms
.qth_max
>> q
->parms
.Wlog
,
321 .Wlog
= q
->parms
.Wlog
,
322 .Plog
= q
->parms
.Plog
,
323 .Scell_log
= q
->parms
.Scell_log
,
327 err
= red_dump_offload_stats(sch
, &opt
);
329 goto nla_put_failure
;
331 opts
= nla_nest_start(skb
, TCA_OPTIONS
);
333 goto nla_put_failure
;
334 if (nla_put(skb
, TCA_RED_PARMS
, sizeof(opt
), &opt
) ||
335 nla_put_u32(skb
, TCA_RED_MAX_P
, q
->parms
.max_P
))
336 goto nla_put_failure
;
337 return nla_nest_end(skb
, opts
);
340 nla_nest_cancel(skb
, opts
);
344 static int red_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
346 struct red_sched_data
*q
= qdisc_priv(sch
);
347 struct net_device
*dev
= qdisc_dev(sch
);
348 struct tc_red_xstats st
= {0};
350 if (sch
->flags
& TCQ_F_OFFLOADED
) {
351 struct tc_red_qopt_offload hw_stats_request
= {
352 .command
= TC_RED_XSTATS
,
353 .handle
= sch
->handle
,
354 .parent
= sch
->parent
,
359 dev
->netdev_ops
->ndo_setup_tc(dev
, TC_SETUP_QDISC_RED
,
362 st
.early
= q
->stats
.prob_drop
+ q
->stats
.forced_drop
;
363 st
.pdrop
= q
->stats
.pdrop
;
364 st
.other
= q
->stats
.other
;
365 st
.marked
= q
->stats
.prob_mark
+ q
->stats
.forced_mark
;
367 return gnet_stats_copy_app(d
, &st
, sizeof(st
));
370 static int red_dump_class(struct Qdisc
*sch
, unsigned long cl
,
371 struct sk_buff
*skb
, struct tcmsg
*tcm
)
373 struct red_sched_data
*q
= qdisc_priv(sch
);
375 tcm
->tcm_handle
|= TC_H_MIN(1);
376 tcm
->tcm_info
= q
->qdisc
->handle
;
380 static int red_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
381 struct Qdisc
**old
, struct netlink_ext_ack
*extack
)
383 struct red_sched_data
*q
= qdisc_priv(sch
);
388 *old
= qdisc_replace(sch
, new, &q
->qdisc
);
392 static struct Qdisc
*red_leaf(struct Qdisc
*sch
, unsigned long arg
)
394 struct red_sched_data
*q
= qdisc_priv(sch
);
398 static unsigned long red_find(struct Qdisc
*sch
, u32 classid
)
403 static void red_walk(struct Qdisc
*sch
, struct qdisc_walker
*walker
)
406 if (walker
->count
>= walker
->skip
)
407 if (walker
->fn(sch
, 1, walker
) < 0) {
415 static const struct Qdisc_class_ops red_class_ops
= {
420 .dump
= red_dump_class
,
423 static struct Qdisc_ops red_qdisc_ops __read_mostly
= {
425 .priv_size
= sizeof(struct red_sched_data
),
426 .cl_ops
= &red_class_ops
,
427 .enqueue
= red_enqueue
,
428 .dequeue
= red_dequeue
,
432 .destroy
= red_destroy
,
433 .change
= red_change
,
435 .dump_stats
= red_dump_stats
,
436 .owner
= THIS_MODULE
,
439 static int __init
red_module_init(void)
441 return register_qdisc(&red_qdisc_ops
);
444 static void __exit
red_module_exit(void)
446 unregister_qdisc(&red_qdisc_ops
);
449 module_init(red_module_init
)
450 module_exit(red_module_exit
)
452 MODULE_LICENSE("GPL");