[PATCH] libata: Prevent the interrupt handler from completing a command twice
[linux-2.6/verdex.git] / net / sched / sch_netem.c
blobe0c9fbe73b15cee32b44f4469e1ac5eeb9849267
1 /*
2 * net/sched/sch_netem.c Network emulator
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Many of the algorithms and ideas for this came from
10 * NIST Net which is not copyrighted.
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/bitops.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/netdevice.h>
23 #include <linux/skbuff.h>
24 #include <linux/rtnetlink.h>
26 #include <net/pkt_sched.h>
28 /* Network Emulation Queuing algorithm.
29 ====================================
31 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
32 Network Emulation Tool
33 [2] Luigi Rizzo, DummyNet for FreeBSD
35 ----------------------------------------------------------------
37 This started out as a simple way to delay outgoing packets to
38 test TCP but has grown to include most of the functionality
39 of a full blown network emulator like NISTnet. It can delay
40 packets and add random jitter (and correlation). The random
41 distribution can be loaded from a table as well to provide
42 normal, Pareto, or experimental curves. Packet loss,
43 duplication, and reordering can also be emulated.
45 This qdisc does not do classification that can be handled in
46 layering other disciplines. It does not need to do bandwidth
47 control either since that can be handled by using token
48 bucket or other rate control.
50 The simulator is limited by the Linux timer resolution
51 and will create packet bursts on the HZ boundary (1ms).
54 struct netem_sched_data {
55 struct Qdisc *qdisc;
56 struct sk_buff_head delayed;
57 struct timer_list timer;
59 u32 latency;
60 u32 loss;
61 u32 limit;
62 u32 counter;
63 u32 gap;
64 u32 jitter;
65 u32 duplicate;
67 struct crndstate {
68 unsigned long last;
69 unsigned long rho;
70 } delay_cor, loss_cor, dup_cor;
72 struct disttable {
73 u32 size;
74 s16 table[0];
75 } *delay_dist;
78 /* Time stamp put into socket buffer control block */
79 struct netem_skb_cb {
80 psched_time_t time_to_send;
83 /* init_crandom - initialize correlated random number generator
84 * Use entropy source for initial seed.
86 static void init_crandom(struct crndstate *state, unsigned long rho)
88 state->rho = rho;
89 state->last = net_random();
92 /* get_crandom - correlated random number generator
93 * Next number depends on last value.
94 * rho is scaled to avoid floating point.
96 static unsigned long get_crandom(struct crndstate *state)
98 u64 value, rho;
99 unsigned long answer;
101 if (state->rho == 0) /* no correllation */
102 return net_random();
104 value = net_random();
105 rho = (u64)state->rho + 1;
106 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
107 state->last = answer;
108 return answer;
111 /* tabledist - return a pseudo-randomly distributed value with mean mu and
112 * std deviation sigma. Uses table lookup to approximate the desired
113 * distribution, and a uniformly-distributed pseudo-random source.
115 static long tabledist(unsigned long mu, long sigma,
116 struct crndstate *state, const struct disttable *dist)
118 long t, x;
119 unsigned long rnd;
121 if (sigma == 0)
122 return mu;
124 rnd = get_crandom(state);
126 /* default uniform distribution */
127 if (dist == NULL)
128 return (rnd % (2*sigma)) - sigma + mu;
130 t = dist->table[rnd % dist->size];
131 x = (sigma % NETEM_DIST_SCALE) * t;
132 if (x >= 0)
133 x += NETEM_DIST_SCALE/2;
134 else
135 x -= NETEM_DIST_SCALE/2;
137 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
140 /* Put skb in the private delayed queue. */
141 static int netem_delay(struct Qdisc *sch, struct sk_buff *skb)
143 struct netem_sched_data *q = qdisc_priv(sch);
144 psched_tdiff_t td;
145 psched_time_t now;
147 PSCHED_GET_TIME(now);
148 td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
150 /* Always queue at tail to keep packets in order */
151 if (likely(q->delayed.qlen < q->limit)) {
152 struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
154 PSCHED_TADD2(now, td, cb->time_to_send);
156 pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb,
157 now, cb->time_to_send);
159 __skb_queue_tail(&q->delayed, skb);
160 return NET_XMIT_SUCCESS;
163 pr_debug("netem_delay: queue over limit %d\n", q->limit);
164 sch->qstats.overlimits++;
165 kfree_skb(skb);
166 return NET_XMIT_DROP;
170 * Move a packet that is ready to send from the delay holding
171 * list to the underlying qdisc.
173 static int netem_run(struct Qdisc *sch)
175 struct netem_sched_data *q = qdisc_priv(sch);
176 struct sk_buff *skb;
177 psched_time_t now;
179 PSCHED_GET_TIME(now);
181 skb = skb_peek(&q->delayed);
182 if (skb) {
183 const struct netem_skb_cb *cb
184 = (const struct netem_skb_cb *)skb->cb;
185 long delay
186 = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
187 pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
189 /* if more time remaining? */
190 if (delay > 0) {
191 mod_timer(&q->timer, jiffies + delay);
192 return 1;
195 __skb_unlink(skb, &q->delayed);
197 if (q->qdisc->enqueue(skb, q->qdisc)) {
198 sch->q.qlen--;
199 sch->qstats.drops++;
203 return 0;
206 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
208 struct netem_sched_data *q = qdisc_priv(sch);
209 int ret;
211 pr_debug("netem_enqueue skb=%p\n", skb);
213 /* Random packet drop 0 => none, ~0 => all */
214 if (q->loss && q->loss >= get_crandom(&q->loss_cor)) {
215 pr_debug("netem_enqueue: random loss\n");
216 sch->qstats.drops++;
217 kfree_skb(skb);
218 return 0; /* lie about loss so TCP doesn't know */
221 /* Random duplication */
222 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) {
223 struct sk_buff *skb2;
225 skb2 = skb_clone(skb, GFP_ATOMIC);
226 if (skb2 && netem_delay(sch, skb2) == NET_XMIT_SUCCESS) {
227 struct Qdisc *qp;
229 /* Since one packet can generate two packets in the
230 * queue, the parent's qlen accounting gets confused,
231 * so fix it.
233 qp = qdisc_lookup(sch->dev, TC_H_MAJ(sch->parent));
234 if (qp)
235 qp->q.qlen++;
237 sch->q.qlen++;
238 sch->bstats.bytes += skb2->len;
239 sch->bstats.packets++;
240 } else
241 sch->qstats.drops++;
244 /* If doing simple delay then gap == 0 so all packets
245 * go into the delayed holding queue
246 * otherwise if doing out of order only "1 out of gap"
247 * packets will be delayed.
249 if (q->counter < q->gap) {
250 ++q->counter;
251 ret = q->qdisc->enqueue(skb, q->qdisc);
252 } else {
253 q->counter = 0;
254 ret = netem_delay(sch, skb);
255 netem_run(sch);
258 if (likely(ret == NET_XMIT_SUCCESS)) {
259 sch->q.qlen++;
260 sch->bstats.bytes += skb->len;
261 sch->bstats.packets++;
262 } else
263 sch->qstats.drops++;
265 pr_debug("netem: enqueue ret %d\n", ret);
266 return ret;
269 /* Requeue packets but don't change time stamp */
270 static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
272 struct netem_sched_data *q = qdisc_priv(sch);
273 int ret;
275 if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
276 sch->q.qlen++;
277 sch->qstats.requeues++;
280 return ret;
283 static unsigned int netem_drop(struct Qdisc* sch)
285 struct netem_sched_data *q = qdisc_priv(sch);
286 unsigned int len;
288 if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) {
289 sch->q.qlen--;
290 sch->qstats.drops++;
292 return len;
295 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
297 struct netem_sched_data *q = qdisc_priv(sch);
298 struct sk_buff *skb;
299 int pending;
301 pending = netem_run(sch);
303 skb = q->qdisc->dequeue(q->qdisc);
304 if (skb) {
305 pr_debug("netem_dequeue: return skb=%p\n", skb);
306 sch->q.qlen--;
307 sch->flags &= ~TCQ_F_THROTTLED;
309 else if (pending) {
310 pr_debug("netem_dequeue: throttling\n");
311 sch->flags |= TCQ_F_THROTTLED;
314 return skb;
317 static void netem_watchdog(unsigned long arg)
319 struct Qdisc *sch = (struct Qdisc *)arg;
321 pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen);
322 sch->flags &= ~TCQ_F_THROTTLED;
323 netif_schedule(sch->dev);
326 static void netem_reset(struct Qdisc *sch)
328 struct netem_sched_data *q = qdisc_priv(sch);
330 qdisc_reset(q->qdisc);
331 skb_queue_purge(&q->delayed);
333 sch->q.qlen = 0;
334 sch->flags &= ~TCQ_F_THROTTLED;
335 del_timer_sync(&q->timer);
338 static int set_fifo_limit(struct Qdisc *q, int limit)
340 struct rtattr *rta;
341 int ret = -ENOMEM;
343 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
344 if (rta) {
345 rta->rta_type = RTM_NEWQDISC;
346 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
347 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
349 ret = q->ops->change(q, rta);
350 kfree(rta);
352 return ret;
356 * Distribution data is a variable size payload containing
357 * signed 16 bit values.
359 static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
361 struct netem_sched_data *q = qdisc_priv(sch);
362 unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
363 const __s16 *data = RTA_DATA(attr);
364 struct disttable *d;
365 int i;
367 if (n > 65536)
368 return -EINVAL;
370 d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
371 if (!d)
372 return -ENOMEM;
374 d->size = n;
375 for (i = 0; i < n; i++)
376 d->table[i] = data[i];
378 spin_lock_bh(&sch->dev->queue_lock);
379 d = xchg(&q->delay_dist, d);
380 spin_unlock_bh(&sch->dev->queue_lock);
382 kfree(d);
383 return 0;
386 static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
388 struct netem_sched_data *q = qdisc_priv(sch);
389 const struct tc_netem_corr *c = RTA_DATA(attr);
391 if (RTA_PAYLOAD(attr) != sizeof(*c))
392 return -EINVAL;
394 init_crandom(&q->delay_cor, c->delay_corr);
395 init_crandom(&q->loss_cor, c->loss_corr);
396 init_crandom(&q->dup_cor, c->dup_corr);
397 return 0;
400 static int netem_change(struct Qdisc *sch, struct rtattr *opt)
402 struct netem_sched_data *q = qdisc_priv(sch);
403 struct tc_netem_qopt *qopt;
404 int ret;
406 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
407 return -EINVAL;
409 qopt = RTA_DATA(opt);
410 ret = set_fifo_limit(q->qdisc, qopt->limit);
411 if (ret) {
412 pr_debug("netem: can't set fifo limit\n");
413 return ret;
416 q->latency = qopt->latency;
417 q->jitter = qopt->jitter;
418 q->limit = qopt->limit;
419 q->gap = qopt->gap;
420 q->loss = qopt->loss;
421 q->duplicate = qopt->duplicate;
423 /* Handle nested options after initial queue options.
424 * Should have put all options in nested format but too late now.
426 if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
427 struct rtattr *tb[TCA_NETEM_MAX];
428 if (rtattr_parse(tb, TCA_NETEM_MAX,
429 RTA_DATA(opt) + sizeof(*qopt),
430 RTA_PAYLOAD(opt) - sizeof(*qopt)))
431 return -EINVAL;
433 if (tb[TCA_NETEM_CORR-1]) {
434 ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
435 if (ret)
436 return ret;
439 if (tb[TCA_NETEM_DELAY_DIST-1]) {
440 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
441 if (ret)
442 return ret;
447 return 0;
450 static int netem_init(struct Qdisc *sch, struct rtattr *opt)
452 struct netem_sched_data *q = qdisc_priv(sch);
453 int ret;
455 if (!opt)
456 return -EINVAL;
458 skb_queue_head_init(&q->delayed);
459 init_timer(&q->timer);
460 q->timer.function = netem_watchdog;
461 q->timer.data = (unsigned long) sch;
462 q->counter = 0;
464 q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
465 if (!q->qdisc) {
466 pr_debug("netem: qdisc create failed\n");
467 return -ENOMEM;
470 ret = netem_change(sch, opt);
471 if (ret) {
472 pr_debug("netem: change failed\n");
473 qdisc_destroy(q->qdisc);
475 return ret;
478 static void netem_destroy(struct Qdisc *sch)
480 struct netem_sched_data *q = qdisc_priv(sch);
482 del_timer_sync(&q->timer);
483 qdisc_destroy(q->qdisc);
484 kfree(q->delay_dist);
487 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
489 const struct netem_sched_data *q = qdisc_priv(sch);
490 unsigned char *b = skb->tail;
491 struct rtattr *rta = (struct rtattr *) b;
492 struct tc_netem_qopt qopt;
493 struct tc_netem_corr cor;
495 qopt.latency = q->latency;
496 qopt.jitter = q->jitter;
497 qopt.limit = q->limit;
498 qopt.loss = q->loss;
499 qopt.gap = q->gap;
500 qopt.duplicate = q->duplicate;
501 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
503 cor.delay_corr = q->delay_cor.rho;
504 cor.loss_corr = q->loss_cor.rho;
505 cor.dup_corr = q->dup_cor.rho;
506 RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
507 rta->rta_len = skb->tail - b;
509 return skb->len;
511 rtattr_failure:
512 skb_trim(skb, b - skb->data);
513 return -1;
516 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
517 struct sk_buff *skb, struct tcmsg *tcm)
519 struct netem_sched_data *q = qdisc_priv(sch);
521 if (cl != 1) /* only one class */
522 return -ENOENT;
524 tcm->tcm_handle |= TC_H_MIN(1);
525 tcm->tcm_info = q->qdisc->handle;
527 return 0;
530 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
531 struct Qdisc **old)
533 struct netem_sched_data *q = qdisc_priv(sch);
535 if (new == NULL)
536 new = &noop_qdisc;
538 sch_tree_lock(sch);
539 *old = xchg(&q->qdisc, new);
540 qdisc_reset(*old);
541 sch->q.qlen = 0;
542 sch_tree_unlock(sch);
544 return 0;
547 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
549 struct netem_sched_data *q = qdisc_priv(sch);
550 return q->qdisc;
553 static unsigned long netem_get(struct Qdisc *sch, u32 classid)
555 return 1;
558 static void netem_put(struct Qdisc *sch, unsigned long arg)
562 static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
563 struct rtattr **tca, unsigned long *arg)
565 return -ENOSYS;
568 static int netem_delete(struct Qdisc *sch, unsigned long arg)
570 return -ENOSYS;
573 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
575 if (!walker->stop) {
576 if (walker->count >= walker->skip)
577 if (walker->fn(sch, 1, walker) < 0) {
578 walker->stop = 1;
579 return;
581 walker->count++;
585 static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
587 return NULL;
590 static struct Qdisc_class_ops netem_class_ops = {
591 .graft = netem_graft,
592 .leaf = netem_leaf,
593 .get = netem_get,
594 .put = netem_put,
595 .change = netem_change_class,
596 .delete = netem_delete,
597 .walk = netem_walk,
598 .tcf_chain = netem_find_tcf,
599 .dump = netem_dump_class,
602 static struct Qdisc_ops netem_qdisc_ops = {
603 .id = "netem",
604 .cl_ops = &netem_class_ops,
605 .priv_size = sizeof(struct netem_sched_data),
606 .enqueue = netem_enqueue,
607 .dequeue = netem_dequeue,
608 .requeue = netem_requeue,
609 .drop = netem_drop,
610 .init = netem_init,
611 .reset = netem_reset,
612 .destroy = netem_destroy,
613 .change = netem_change,
614 .dump = netem_dump,
615 .owner = THIS_MODULE,
619 static int __init netem_module_init(void)
621 return register_qdisc(&netem_qdisc_ops);
623 static void __exit netem_module_exit(void)
625 unregister_qdisc(&netem_qdisc_ops);
627 module_init(netem_module_init)
628 module_exit(netem_module_exit)
629 MODULE_LICENSE("GPL");