2 * net/sched/sch_netem.c Network emulator
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Many of the algorithms and ideas for this came from
10 * NIST Net which is not copyrighted.
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/bitops.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/netdevice.h>
23 #include <linux/skbuff.h>
24 #include <linux/rtnetlink.h>
26 #include <net/pkt_sched.h>
28 /* Network Emulation Queuing algorithm.
29 ====================================
31 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
32 Network Emulation Tool
33 [2] Luigi Rizzo, DummyNet for FreeBSD
35 ----------------------------------------------------------------
37 This started out as a simple way to delay outgoing packets to
38 test TCP but has grown to include most of the functionality
39 of a full blown network emulator like NISTnet. It can delay
40 packets and add random jitter (and correlation). The random
41 distribution can be loaded from a table as well to provide
42 normal, Pareto, or experimental curves. Packet loss,
43 duplication, and reordering can also be emulated.
45 This qdisc does not do classification that can be handled in
46 layering other disciplines. It does not need to do bandwidth
47 control either since that can be handled by using token
48 bucket or other rate control.
50 The simulator is limited by the Linux timer resolution
51 and will create packet bursts on the HZ boundary (1ms).
54 struct netem_sched_data
{
56 struct sk_buff_head delayed
;
57 struct timer_list timer
;
70 } delay_cor
, loss_cor
, dup_cor
;
78 /* Time stamp put into socket buffer control block */
80 psched_time_t time_to_send
;
83 /* init_crandom - initialize correlated random number generator
84 * Use entropy source for initial seed.
86 static void init_crandom(struct crndstate
*state
, unsigned long rho
)
89 state
->last
= net_random();
92 /* get_crandom - correlated random number generator
93 * Next number depends on last value.
94 * rho is scaled to avoid floating point.
96 static unsigned long get_crandom(struct crndstate
*state
)
101 if (state
->rho
== 0) /* no correllation */
104 value
= net_random();
105 rho
= (u64
)state
->rho
+ 1;
106 answer
= (value
* ((1ull<<32) - rho
) + state
->last
* rho
) >> 32;
107 state
->last
= answer
;
111 /* tabledist - return a pseudo-randomly distributed value with mean mu and
112 * std deviation sigma. Uses table lookup to approximate the desired
113 * distribution, and a uniformly-distributed pseudo-random source.
115 static long tabledist(unsigned long mu
, long sigma
,
116 struct crndstate
*state
, const struct disttable
*dist
)
124 rnd
= get_crandom(state
);
126 /* default uniform distribution */
128 return (rnd
% (2*sigma
)) - sigma
+ mu
;
130 t
= dist
->table
[rnd
% dist
->size
];
131 x
= (sigma
% NETEM_DIST_SCALE
) * t
;
133 x
+= NETEM_DIST_SCALE
/2;
135 x
-= NETEM_DIST_SCALE
/2;
137 return x
/ NETEM_DIST_SCALE
+ (sigma
/ NETEM_DIST_SCALE
) * t
+ mu
;
140 /* Put skb in the private delayed queue. */
141 static int netem_delay(struct Qdisc
*sch
, struct sk_buff
*skb
)
143 struct netem_sched_data
*q
= qdisc_priv(sch
);
147 PSCHED_GET_TIME(now
);
148 td
= tabledist(q
->latency
, q
->jitter
, &q
->delay_cor
, q
->delay_dist
);
150 /* Always queue at tail to keep packets in order */
151 if (likely(q
->delayed
.qlen
< q
->limit
)) {
152 struct netem_skb_cb
*cb
= (struct netem_skb_cb
*)skb
->cb
;
154 PSCHED_TADD2(now
, td
, cb
->time_to_send
);
156 pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb
,
157 now
, cb
->time_to_send
);
159 __skb_queue_tail(&q
->delayed
, skb
);
160 return NET_XMIT_SUCCESS
;
163 pr_debug("netem_delay: queue over limit %d\n", q
->limit
);
164 sch
->qstats
.overlimits
++;
166 return NET_XMIT_DROP
;
170 * Move a packet that is ready to send from the delay holding
171 * list to the underlying qdisc.
173 static int netem_run(struct Qdisc
*sch
)
175 struct netem_sched_data
*q
= qdisc_priv(sch
);
179 PSCHED_GET_TIME(now
);
181 skb
= skb_peek(&q
->delayed
);
183 const struct netem_skb_cb
*cb
184 = (const struct netem_skb_cb
*)skb
->cb
;
186 = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb
->time_to_send
, now
));
187 pr_debug("netem_run: skb=%p delay=%ld\n", skb
, delay
);
189 /* if more time remaining? */
191 mod_timer(&q
->timer
, jiffies
+ delay
);
195 __skb_unlink(skb
, &q
->delayed
);
197 if (q
->qdisc
->enqueue(skb
, q
->qdisc
)) {
206 static int netem_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
208 struct netem_sched_data
*q
= qdisc_priv(sch
);
211 pr_debug("netem_enqueue skb=%p\n", skb
);
213 /* Random packet drop 0 => none, ~0 => all */
214 if (q
->loss
&& q
->loss
>= get_crandom(&q
->loss_cor
)) {
215 pr_debug("netem_enqueue: random loss\n");
218 return 0; /* lie about loss so TCP doesn't know */
221 /* Random duplication */
222 if (q
->duplicate
&& q
->duplicate
>= get_crandom(&q
->dup_cor
)) {
223 struct sk_buff
*skb2
;
225 skb2
= skb_clone(skb
, GFP_ATOMIC
);
226 if (skb2
&& netem_delay(sch
, skb2
) == NET_XMIT_SUCCESS
) {
229 /* Since one packet can generate two packets in the
230 * queue, the parent's qlen accounting gets confused,
233 qp
= qdisc_lookup(sch
->dev
, TC_H_MAJ(sch
->parent
));
238 sch
->bstats
.bytes
+= skb2
->len
;
239 sch
->bstats
.packets
++;
244 /* If doing simple delay then gap == 0 so all packets
245 * go into the delayed holding queue
246 * otherwise if doing out of order only "1 out of gap"
247 * packets will be delayed.
249 if (q
->counter
< q
->gap
) {
251 ret
= q
->qdisc
->enqueue(skb
, q
->qdisc
);
254 ret
= netem_delay(sch
, skb
);
258 if (likely(ret
== NET_XMIT_SUCCESS
)) {
260 sch
->bstats
.bytes
+= skb
->len
;
261 sch
->bstats
.packets
++;
265 pr_debug("netem: enqueue ret %d\n", ret
);
269 /* Requeue packets but don't change time stamp */
270 static int netem_requeue(struct sk_buff
*skb
, struct Qdisc
*sch
)
272 struct netem_sched_data
*q
= qdisc_priv(sch
);
275 if ((ret
= q
->qdisc
->ops
->requeue(skb
, q
->qdisc
)) == 0) {
277 sch
->qstats
.requeues
++;
283 static unsigned int netem_drop(struct Qdisc
* sch
)
285 struct netem_sched_data
*q
= qdisc_priv(sch
);
288 if ((len
= q
->qdisc
->ops
->drop(q
->qdisc
)) != 0) {
295 static struct sk_buff
*netem_dequeue(struct Qdisc
*sch
)
297 struct netem_sched_data
*q
= qdisc_priv(sch
);
301 pending
= netem_run(sch
);
303 skb
= q
->qdisc
->dequeue(q
->qdisc
);
305 pr_debug("netem_dequeue: return skb=%p\n", skb
);
307 sch
->flags
&= ~TCQ_F_THROTTLED
;
310 pr_debug("netem_dequeue: throttling\n");
311 sch
->flags
|= TCQ_F_THROTTLED
;
317 static void netem_watchdog(unsigned long arg
)
319 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
321 pr_debug("netem_watchdog qlen=%d\n", sch
->q
.qlen
);
322 sch
->flags
&= ~TCQ_F_THROTTLED
;
323 netif_schedule(sch
->dev
);
326 static void netem_reset(struct Qdisc
*sch
)
328 struct netem_sched_data
*q
= qdisc_priv(sch
);
330 qdisc_reset(q
->qdisc
);
331 skb_queue_purge(&q
->delayed
);
334 sch
->flags
&= ~TCQ_F_THROTTLED
;
335 del_timer_sync(&q
->timer
);
338 static int set_fifo_limit(struct Qdisc
*q
, int limit
)
343 rta
= kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt
)), GFP_KERNEL
);
345 rta
->rta_type
= RTM_NEWQDISC
;
346 rta
->rta_len
= RTA_LENGTH(sizeof(struct tc_fifo_qopt
));
347 ((struct tc_fifo_qopt
*)RTA_DATA(rta
))->limit
= limit
;
349 ret
= q
->ops
->change(q
, rta
);
356 * Distribution data is a variable size payload containing
357 * signed 16 bit values.
359 static int get_dist_table(struct Qdisc
*sch
, const struct rtattr
*attr
)
361 struct netem_sched_data
*q
= qdisc_priv(sch
);
362 unsigned long n
= RTA_PAYLOAD(attr
)/sizeof(__s16
);
363 const __s16
*data
= RTA_DATA(attr
);
370 d
= kmalloc(sizeof(*d
) + n
*sizeof(d
->table
[0]), GFP_KERNEL
);
375 for (i
= 0; i
< n
; i
++)
376 d
->table
[i
] = data
[i
];
378 spin_lock_bh(&sch
->dev
->queue_lock
);
379 d
= xchg(&q
->delay_dist
, d
);
380 spin_unlock_bh(&sch
->dev
->queue_lock
);
386 static int get_correlation(struct Qdisc
*sch
, const struct rtattr
*attr
)
388 struct netem_sched_data
*q
= qdisc_priv(sch
);
389 const struct tc_netem_corr
*c
= RTA_DATA(attr
);
391 if (RTA_PAYLOAD(attr
) != sizeof(*c
))
394 init_crandom(&q
->delay_cor
, c
->delay_corr
);
395 init_crandom(&q
->loss_cor
, c
->loss_corr
);
396 init_crandom(&q
->dup_cor
, c
->dup_corr
);
400 static int netem_change(struct Qdisc
*sch
, struct rtattr
*opt
)
402 struct netem_sched_data
*q
= qdisc_priv(sch
);
403 struct tc_netem_qopt
*qopt
;
406 if (opt
== NULL
|| RTA_PAYLOAD(opt
) < sizeof(*qopt
))
409 qopt
= RTA_DATA(opt
);
410 ret
= set_fifo_limit(q
->qdisc
, qopt
->limit
);
412 pr_debug("netem: can't set fifo limit\n");
416 q
->latency
= qopt
->latency
;
417 q
->jitter
= qopt
->jitter
;
418 q
->limit
= qopt
->limit
;
420 q
->loss
= qopt
->loss
;
421 q
->duplicate
= qopt
->duplicate
;
423 /* Handle nested options after initial queue options.
424 * Should have put all options in nested format but too late now.
426 if (RTA_PAYLOAD(opt
) > sizeof(*qopt
)) {
427 struct rtattr
*tb
[TCA_NETEM_MAX
];
428 if (rtattr_parse(tb
, TCA_NETEM_MAX
,
429 RTA_DATA(opt
) + sizeof(*qopt
),
430 RTA_PAYLOAD(opt
) - sizeof(*qopt
)))
433 if (tb
[TCA_NETEM_CORR
-1]) {
434 ret
= get_correlation(sch
, tb
[TCA_NETEM_CORR
-1]);
439 if (tb
[TCA_NETEM_DELAY_DIST
-1]) {
440 ret
= get_dist_table(sch
, tb
[TCA_NETEM_DELAY_DIST
-1]);
450 static int netem_init(struct Qdisc
*sch
, struct rtattr
*opt
)
452 struct netem_sched_data
*q
= qdisc_priv(sch
);
458 skb_queue_head_init(&q
->delayed
);
459 init_timer(&q
->timer
);
460 q
->timer
.function
= netem_watchdog
;
461 q
->timer
.data
= (unsigned long) sch
;
464 q
->qdisc
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
);
466 pr_debug("netem: qdisc create failed\n");
470 ret
= netem_change(sch
, opt
);
472 pr_debug("netem: change failed\n");
473 qdisc_destroy(q
->qdisc
);
478 static void netem_destroy(struct Qdisc
*sch
)
480 struct netem_sched_data
*q
= qdisc_priv(sch
);
482 del_timer_sync(&q
->timer
);
483 qdisc_destroy(q
->qdisc
);
484 kfree(q
->delay_dist
);
487 static int netem_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
489 const struct netem_sched_data
*q
= qdisc_priv(sch
);
490 unsigned char *b
= skb
->tail
;
491 struct rtattr
*rta
= (struct rtattr
*) b
;
492 struct tc_netem_qopt qopt
;
493 struct tc_netem_corr cor
;
495 qopt
.latency
= q
->latency
;
496 qopt
.jitter
= q
->jitter
;
497 qopt
.limit
= q
->limit
;
500 qopt
.duplicate
= q
->duplicate
;
501 RTA_PUT(skb
, TCA_OPTIONS
, sizeof(qopt
), &qopt
);
503 cor
.delay_corr
= q
->delay_cor
.rho
;
504 cor
.loss_corr
= q
->loss_cor
.rho
;
505 cor
.dup_corr
= q
->dup_cor
.rho
;
506 RTA_PUT(skb
, TCA_NETEM_CORR
, sizeof(cor
), &cor
);
507 rta
->rta_len
= skb
->tail
- b
;
512 skb_trim(skb
, b
- skb
->data
);
516 static int netem_dump_class(struct Qdisc
*sch
, unsigned long cl
,
517 struct sk_buff
*skb
, struct tcmsg
*tcm
)
519 struct netem_sched_data
*q
= qdisc_priv(sch
);
521 if (cl
!= 1) /* only one class */
524 tcm
->tcm_handle
|= TC_H_MIN(1);
525 tcm
->tcm_info
= q
->qdisc
->handle
;
530 static int netem_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
533 struct netem_sched_data
*q
= qdisc_priv(sch
);
539 *old
= xchg(&q
->qdisc
, new);
542 sch_tree_unlock(sch
);
547 static struct Qdisc
*netem_leaf(struct Qdisc
*sch
, unsigned long arg
)
549 struct netem_sched_data
*q
= qdisc_priv(sch
);
553 static unsigned long netem_get(struct Qdisc
*sch
, u32 classid
)
558 static void netem_put(struct Qdisc
*sch
, unsigned long arg
)
562 static int netem_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
,
563 struct rtattr
**tca
, unsigned long *arg
)
568 static int netem_delete(struct Qdisc
*sch
, unsigned long arg
)
573 static void netem_walk(struct Qdisc
*sch
, struct qdisc_walker
*walker
)
576 if (walker
->count
>= walker
->skip
)
577 if (walker
->fn(sch
, 1, walker
) < 0) {
585 static struct tcf_proto
**netem_find_tcf(struct Qdisc
*sch
, unsigned long cl
)
590 static struct Qdisc_class_ops netem_class_ops
= {
591 .graft
= netem_graft
,
595 .change
= netem_change_class
,
596 .delete = netem_delete
,
598 .tcf_chain
= netem_find_tcf
,
599 .dump
= netem_dump_class
,
602 static struct Qdisc_ops netem_qdisc_ops
= {
604 .cl_ops
= &netem_class_ops
,
605 .priv_size
= sizeof(struct netem_sched_data
),
606 .enqueue
= netem_enqueue
,
607 .dequeue
= netem_dequeue
,
608 .requeue
= netem_requeue
,
611 .reset
= netem_reset
,
612 .destroy
= netem_destroy
,
613 .change
= netem_change
,
615 .owner
= THIS_MODULE
,
619 static int __init
netem_module_init(void)
621 return register_qdisc(&netem_qdisc_ops
);
623 static void __exit
netem_module_exit(void)
625 unregister_qdisc(&netem_qdisc_ops
);
627 module_init(netem_module_init
)
628 module_exit(netem_module_exit
)
629 MODULE_LICENSE("GPL");