2 * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing)
4 * Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Meant to be mostly used for locally generated traffic :
12 * Fast classification depends on skb->sk being set before reaching us.
13 * If not, (router workload), we use rxhash as fallback, with 32 bits wide hash.
14 * All packets belonging to a socket are considered as a 'flow'.
16 * Flows are dynamically allocated and stored in a hash table of RB trees
17 * They are also part of one Round Robin 'queues' (new or old flows)
19 * Burst avoidance (aka pacing) capability :
21 * Transport (eg TCP) can set in sk->sk_pacing_rate a rate, enqueue a
22 * bunch of packets, and this packet scheduler adds delay between
23 * packets to respect rate limitation.
26 * - lookup one RB tree (out of 1024 or more) to find the flow.
27 * If non existent flow, create it, add it to the tree.
28 * Add skb to the per flow list of skb (fifo).
29 * - Use a special fifo for high prio packets
31 * dequeue() : serves flows in Round Robin
32 * Note : When a flow becomes empty, we do not immediately remove it from
33 * rb trees, for performance reasons (its expected to send additional packets,
34 * or SLAB cache will reuse socket for another flow)
37 #include <linux/module.h>
38 #include <linux/types.h>
39 #include <linux/kernel.h>
40 #include <linux/jiffies.h>
41 #include <linux/string.h>
43 #include <linux/errno.h>
44 #include <linux/init.h>
45 #include <linux/skbuff.h>
46 #include <linux/slab.h>
47 #include <linux/rbtree.h>
48 #include <linux/hash.h>
49 #include <linux/prefetch.h>
50 #include <linux/vmalloc.h>
51 #include <net/netlink.h>
52 #include <net/pkt_sched.h>
54 #include <net/tcp_states.h>
58 * Per flow structure, dynamically allocated
61 struct sk_buff
*head
; /* list of skbs for this flow : first skb */
63 struct sk_buff
*tail
; /* last skb in the list */
64 unsigned long age
; /* jiffies when flow was emptied, for gc */
66 struct rb_node fq_node
; /* anchor in fq_root[] trees */
68 int qlen
; /* number of packets in flow queue */
70 u32 socket_hash
; /* sk_hash */
71 struct fq_flow
*next
; /* next pointer in RR lists, or &detached */
73 struct rb_node rate_node
; /* anchor in q->delayed tree */
78 struct fq_flow
*first
;
82 struct fq_sched_data
{
83 struct fq_flow_head new_flows
;
85 struct fq_flow_head old_flows
;
87 struct rb_root delayed
; /* for rate limited flows */
88 u64 time_next_delayed_flow
;
89 unsigned long unthrottle_latency_ns
;
91 struct fq_flow internal
; /* for non classified or high prio packets */
94 u32 flow_refill_delay
;
95 u32 flow_plimit
; /* max packets per flow */
96 unsigned long flow_max_rate
; /* optional max rate per flow */
97 u32 orphan_mask
; /* mask for orphaned skb */
98 u32 low_rate_threshold
;
99 struct rb_root
*fq_root
;
108 u64 stat_internal_packets
;
110 u64 stat_flows_plimit
;
111 u64 stat_pkts_too_long
;
112 u64 stat_allocation_errors
;
113 struct qdisc_watchdog watchdog
;
116 /* special value to mark a detached flow (not on old/new list) */
117 static struct fq_flow detached
, throttled
;
119 static void fq_flow_set_detached(struct fq_flow
*f
)
125 static bool fq_flow_is_detached(const struct fq_flow
*f
)
127 return f
->next
== &detached
;
130 static bool fq_flow_is_throttled(const struct fq_flow
*f
)
132 return f
->next
== &throttled
;
135 static void fq_flow_add_tail(struct fq_flow_head
*head
, struct fq_flow
*flow
)
138 head
->last
->next
= flow
;
145 static void fq_flow_unset_throttled(struct fq_sched_data
*q
, struct fq_flow
*f
)
147 rb_erase(&f
->rate_node
, &q
->delayed
);
148 q
->throttled_flows
--;
149 fq_flow_add_tail(&q
->old_flows
, f
);
152 static void fq_flow_set_throttled(struct fq_sched_data
*q
, struct fq_flow
*f
)
154 struct rb_node
**p
= &q
->delayed
.rb_node
, *parent
= NULL
;
160 aux
= rb_entry(parent
, struct fq_flow
, rate_node
);
161 if (f
->time_next_packet
>= aux
->time_next_packet
)
162 p
= &parent
->rb_right
;
164 p
= &parent
->rb_left
;
166 rb_link_node(&f
->rate_node
, parent
, p
);
167 rb_insert_color(&f
->rate_node
, &q
->delayed
);
168 q
->throttled_flows
++;
171 f
->next
= &throttled
;
172 if (q
->time_next_delayed_flow
> f
->time_next_packet
)
173 q
->time_next_delayed_flow
= f
->time_next_packet
;
177 static struct kmem_cache
*fq_flow_cachep __read_mostly
;
180 /* limit number of collected flows per round */
182 #define FQ_GC_AGE (3*HZ)
184 static bool fq_gc_candidate(const struct fq_flow
*f
)
186 return fq_flow_is_detached(f
) &&
187 time_after(jiffies
, f
->age
+ FQ_GC_AGE
);
190 static void fq_gc(struct fq_sched_data
*q
,
191 struct rb_root
*root
,
194 struct fq_flow
*f
, *tofree
[FQ_GC_MAX
];
195 struct rb_node
**p
, *parent
;
203 f
= rb_entry(parent
, struct fq_flow
, fq_node
);
207 if (fq_gc_candidate(f
)) {
209 if (fcnt
== FQ_GC_MAX
)
214 p
= &parent
->rb_right
;
216 p
= &parent
->rb_left
;
220 q
->inactive_flows
-= fcnt
;
221 q
->stat_gc_flows
+= fcnt
;
223 struct fq_flow
*f
= tofree
[--fcnt
];
225 rb_erase(&f
->fq_node
, root
);
226 kmem_cache_free(fq_flow_cachep
, f
);
230 static struct fq_flow
*fq_classify(struct sk_buff
*skb
, struct fq_sched_data
*q
)
232 struct rb_node
**p
, *parent
;
233 struct sock
*sk
= skb
->sk
;
234 struct rb_root
*root
;
237 /* warning: no starvation prevention... */
238 if (unlikely((skb
->priority
& TC_PRIO_MAX
) == TC_PRIO_CONTROL
))
241 /* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket
242 * or a listener (SYNCOOKIE mode)
243 * 1) request sockets are not full blown,
244 * they do not contain sk_pacing_rate
245 * 2) They are not part of a 'flow' yet
246 * 3) We do not want to rate limit them (eg SYNFLOOD attack),
247 * especially if the listener set SO_MAX_PACING_RATE
248 * 4) We pretend they are orphaned
250 if (!sk
|| sk_listener(sk
)) {
251 unsigned long hash
= skb_get_hash(skb
) & q
->orphan_mask
;
253 /* By forcing low order bit to 1, we make sure to not
254 * collide with a local flow (socket pointers are word aligned)
256 sk
= (struct sock
*)((hash
<< 1) | 1UL);
260 root
= &q
->fq_root
[hash_ptr(sk
, q
->fq_trees_log
)];
262 if (q
->flows
>= (2U << q
->fq_trees_log
) &&
263 q
->inactive_flows
> q
->flows
/2)
271 f
= rb_entry(parent
, struct fq_flow
, fq_node
);
273 /* socket might have been reallocated, so check
274 * if its sk_hash is the same.
275 * It not, we need to refill credit with
278 if (unlikely(skb
->sk
&&
279 f
->socket_hash
!= sk
->sk_hash
)) {
280 f
->credit
= q
->initial_quantum
;
281 f
->socket_hash
= sk
->sk_hash
;
282 if (fq_flow_is_throttled(f
))
283 fq_flow_unset_throttled(q
, f
);
284 f
->time_next_packet
= 0ULL;
289 p
= &parent
->rb_right
;
291 p
= &parent
->rb_left
;
294 f
= kmem_cache_zalloc(fq_flow_cachep
, GFP_ATOMIC
| __GFP_NOWARN
);
296 q
->stat_allocation_errors
++;
299 fq_flow_set_detached(f
);
302 f
->socket_hash
= sk
->sk_hash
;
303 f
->credit
= q
->initial_quantum
;
305 rb_link_node(&f
->fq_node
, parent
, p
);
306 rb_insert_color(&f
->fq_node
, root
);
314 /* remove one skb from head of flow queue */
315 static struct sk_buff
*fq_dequeue_head(struct Qdisc
*sch
, struct fq_flow
*flow
)
317 struct sk_buff
*skb
= flow
->head
;
320 flow
->head
= skb
->next
;
321 skb_mark_not_on_list(skb
);
323 qdisc_qstats_backlog_dec(sch
, skb
);
329 static void flow_queue_add(struct fq_flow
*flow
, struct sk_buff
*skb
)
331 struct sk_buff
*head
= flow
->head
;
337 flow
->tail
->next
= skb
;
342 static int fq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
,
343 struct sk_buff
**to_free
)
345 struct fq_sched_data
*q
= qdisc_priv(sch
);
348 if (unlikely(sch
->q
.qlen
>= sch
->limit
))
349 return qdisc_drop(skb
, sch
, to_free
);
351 f
= fq_classify(skb
, q
);
352 if (unlikely(f
->qlen
>= q
->flow_plimit
&& f
!= &q
->internal
)) {
353 q
->stat_flows_plimit
++;
354 return qdisc_drop(skb
, sch
, to_free
);
358 qdisc_qstats_backlog_inc(sch
, skb
);
359 if (fq_flow_is_detached(f
)) {
360 struct sock
*sk
= skb
->sk
;
362 fq_flow_add_tail(&q
->new_flows
, f
);
363 if (time_after(jiffies
, f
->age
+ q
->flow_refill_delay
))
364 f
->credit
= max_t(u32
, f
->credit
, q
->quantum
);
365 if (sk
&& q
->rate_enable
) {
366 if (unlikely(smp_load_acquire(&sk
->sk_pacing_status
) !=
368 smp_store_release(&sk
->sk_pacing_status
,
374 /* Note: this overwrites f->age */
375 flow_queue_add(f
, skb
);
377 if (unlikely(f
== &q
->internal
)) {
378 q
->stat_internal_packets
++;
382 return NET_XMIT_SUCCESS
;
385 static void fq_check_throttled(struct fq_sched_data
*q
, u64 now
)
387 unsigned long sample
;
390 if (q
->time_next_delayed_flow
> now
)
393 /* Update unthrottle latency EWMA.
394 * This is cheap and can help diagnosing timer/latency problems.
396 sample
= (unsigned long)(now
- q
->time_next_delayed_flow
);
397 q
->unthrottle_latency_ns
-= q
->unthrottle_latency_ns
>> 3;
398 q
->unthrottle_latency_ns
+= sample
>> 3;
400 q
->time_next_delayed_flow
= ~0ULL;
401 while ((p
= rb_first(&q
->delayed
)) != NULL
) {
402 struct fq_flow
*f
= rb_entry(p
, struct fq_flow
, rate_node
);
404 if (f
->time_next_packet
> now
) {
405 q
->time_next_delayed_flow
= f
->time_next_packet
;
408 fq_flow_unset_throttled(q
, f
);
412 static struct sk_buff
*fq_dequeue(struct Qdisc
*sch
)
414 struct fq_sched_data
*q
= qdisc_priv(sch
);
415 u64 now
= ktime_get_ns();
416 struct fq_flow_head
*head
;
422 skb
= fq_dequeue_head(sch
, &q
->internal
);
425 fq_check_throttled(q
, now
);
427 head
= &q
->new_flows
;
429 head
= &q
->old_flows
;
431 if (q
->time_next_delayed_flow
!= ~0ULL)
432 qdisc_watchdog_schedule_ns(&q
->watchdog
,
433 q
->time_next_delayed_flow
);
439 if (f
->credit
<= 0) {
440 f
->credit
+= q
->quantum
;
441 head
->first
= f
->next
;
442 fq_flow_add_tail(&q
->old_flows
, f
);
448 u64 time_next_packet
= max_t(u64
, ktime_to_ns(skb
->tstamp
),
449 f
->time_next_packet
);
451 if (now
< time_next_packet
) {
452 head
->first
= f
->next
;
453 f
->time_next_packet
= time_next_packet
;
454 fq_flow_set_throttled(q
, f
);
459 skb
= fq_dequeue_head(sch
, f
);
461 head
->first
= f
->next
;
462 /* force a pass through old_flows to prevent starvation */
463 if ((head
== &q
->new_flows
) && q
->old_flows
.first
) {
464 fq_flow_add_tail(&q
->old_flows
, f
);
466 fq_flow_set_detached(f
);
472 plen
= qdisc_pkt_len(skb
);
478 rate
= q
->flow_max_rate
;
480 /* If EDT time was provided for this skb, we need to
481 * update f->time_next_packet only if this qdisc enforces
486 rate
= min(skb
->sk
->sk_pacing_rate
, rate
);
488 if (rate
<= q
->low_rate_threshold
) {
491 plen
= max(plen
, q
->quantum
);
497 u64 len
= (u64
)plen
* NSEC_PER_SEC
;
500 len
= div64_ul(len
, rate
);
501 /* Since socket rate can change later,
502 * clamp the delay to 1 second.
503 * Really, providers of too big packets should be fixed !
505 if (unlikely(len
> NSEC_PER_SEC
)) {
507 q
->stat_pkts_too_long
++;
509 /* Account for schedule/timers drifts.
510 * f->time_next_packet was set when prior packet was sent,
511 * and current time (@now) can be too late by tens of us.
513 if (f
->time_next_packet
)
514 len
-= min(len
/2, now
- f
->time_next_packet
);
515 f
->time_next_packet
= now
+ len
;
518 qdisc_bstats_update(sch
, skb
);
522 static void fq_flow_purge(struct fq_flow
*flow
)
524 rtnl_kfree_skbs(flow
->head
, flow
->tail
);
529 static void fq_reset(struct Qdisc
*sch
)
531 struct fq_sched_data
*q
= qdisc_priv(sch
);
532 struct rb_root
*root
;
538 sch
->qstats
.backlog
= 0;
540 fq_flow_purge(&q
->internal
);
545 for (idx
= 0; idx
< (1U << q
->fq_trees_log
); idx
++) {
546 root
= &q
->fq_root
[idx
];
547 while ((p
= rb_first(root
)) != NULL
) {
548 f
= rb_entry(p
, struct fq_flow
, fq_node
);
553 kmem_cache_free(fq_flow_cachep
, f
);
556 q
->new_flows
.first
= NULL
;
557 q
->old_flows
.first
= NULL
;
558 q
->delayed
= RB_ROOT
;
560 q
->inactive_flows
= 0;
561 q
->throttled_flows
= 0;
564 static void fq_rehash(struct fq_sched_data
*q
,
565 struct rb_root
*old_array
, u32 old_log
,
566 struct rb_root
*new_array
, u32 new_log
)
568 struct rb_node
*op
, **np
, *parent
;
569 struct rb_root
*oroot
, *nroot
;
570 struct fq_flow
*of
, *nf
;
574 for (idx
= 0; idx
< (1U << old_log
); idx
++) {
575 oroot
= &old_array
[idx
];
576 while ((op
= rb_first(oroot
)) != NULL
) {
578 of
= rb_entry(op
, struct fq_flow
, fq_node
);
579 if (fq_gc_candidate(of
)) {
581 kmem_cache_free(fq_flow_cachep
, of
);
584 nroot
= &new_array
[hash_ptr(of
->sk
, new_log
)];
586 np
= &nroot
->rb_node
;
591 nf
= rb_entry(parent
, struct fq_flow
, fq_node
);
592 BUG_ON(nf
->sk
== of
->sk
);
595 np
= &parent
->rb_right
;
597 np
= &parent
->rb_left
;
600 rb_link_node(&of
->fq_node
, parent
, np
);
601 rb_insert_color(&of
->fq_node
, nroot
);
605 q
->inactive_flows
-= fcnt
;
606 q
->stat_gc_flows
+= fcnt
;
609 static void fq_free(void *addr
)
614 static int fq_resize(struct Qdisc
*sch
, u32 log
)
616 struct fq_sched_data
*q
= qdisc_priv(sch
);
617 struct rb_root
*array
;
621 if (q
->fq_root
&& log
== q
->fq_trees_log
)
624 /* If XPS was setup, we can allocate memory on right NUMA node */
625 array
= kvmalloc_node(sizeof(struct rb_root
) << log
, GFP_KERNEL
| __GFP_RETRY_MAYFAIL
,
626 netdev_queue_numa_node_read(sch
->dev_queue
));
630 for (idx
= 0; idx
< (1U << log
); idx
++)
631 array
[idx
] = RB_ROOT
;
635 old_fq_root
= q
->fq_root
;
637 fq_rehash(q
, old_fq_root
, q
->fq_trees_log
, array
, log
);
640 q
->fq_trees_log
= log
;
642 sch_tree_unlock(sch
);
644 fq_free(old_fq_root
);
649 static const struct nla_policy fq_policy
[TCA_FQ_MAX
+ 1] = {
650 [TCA_FQ_PLIMIT
] = { .type
= NLA_U32
},
651 [TCA_FQ_FLOW_PLIMIT
] = { .type
= NLA_U32
},
652 [TCA_FQ_QUANTUM
] = { .type
= NLA_U32
},
653 [TCA_FQ_INITIAL_QUANTUM
] = { .type
= NLA_U32
},
654 [TCA_FQ_RATE_ENABLE
] = { .type
= NLA_U32
},
655 [TCA_FQ_FLOW_DEFAULT_RATE
] = { .type
= NLA_U32
},
656 [TCA_FQ_FLOW_MAX_RATE
] = { .type
= NLA_U32
},
657 [TCA_FQ_BUCKETS_LOG
] = { .type
= NLA_U32
},
658 [TCA_FQ_FLOW_REFILL_DELAY
] = { .type
= NLA_U32
},
659 [TCA_FQ_LOW_RATE_THRESHOLD
] = { .type
= NLA_U32
},
662 static int fq_change(struct Qdisc
*sch
, struct nlattr
*opt
,
663 struct netlink_ext_ack
*extack
)
665 struct fq_sched_data
*q
= qdisc_priv(sch
);
666 struct nlattr
*tb
[TCA_FQ_MAX
+ 1];
667 int err
, drop_count
= 0;
668 unsigned drop_len
= 0;
674 err
= nla_parse_nested(tb
, TCA_FQ_MAX
, opt
, fq_policy
, NULL
);
680 fq_log
= q
->fq_trees_log
;
682 if (tb
[TCA_FQ_BUCKETS_LOG
]) {
683 u32 nval
= nla_get_u32(tb
[TCA_FQ_BUCKETS_LOG
]);
685 if (nval
>= 1 && nval
<= ilog2(256*1024))
690 if (tb
[TCA_FQ_PLIMIT
])
691 sch
->limit
= nla_get_u32(tb
[TCA_FQ_PLIMIT
]);
693 if (tb
[TCA_FQ_FLOW_PLIMIT
])
694 q
->flow_plimit
= nla_get_u32(tb
[TCA_FQ_FLOW_PLIMIT
]);
696 if (tb
[TCA_FQ_QUANTUM
]) {
697 u32 quantum
= nla_get_u32(tb
[TCA_FQ_QUANTUM
]);
700 q
->quantum
= quantum
;
705 if (tb
[TCA_FQ_INITIAL_QUANTUM
])
706 q
->initial_quantum
= nla_get_u32(tb
[TCA_FQ_INITIAL_QUANTUM
]);
708 if (tb
[TCA_FQ_FLOW_DEFAULT_RATE
])
709 pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
710 nla_get_u32(tb
[TCA_FQ_FLOW_DEFAULT_RATE
]));
712 if (tb
[TCA_FQ_FLOW_MAX_RATE
]) {
713 u32 rate
= nla_get_u32(tb
[TCA_FQ_FLOW_MAX_RATE
]);
715 q
->flow_max_rate
= (rate
== ~0U) ? ~0UL : rate
;
717 if (tb
[TCA_FQ_LOW_RATE_THRESHOLD
])
718 q
->low_rate_threshold
=
719 nla_get_u32(tb
[TCA_FQ_LOW_RATE_THRESHOLD
]);
721 if (tb
[TCA_FQ_RATE_ENABLE
]) {
722 u32 enable
= nla_get_u32(tb
[TCA_FQ_RATE_ENABLE
]);
725 q
->rate_enable
= enable
;
730 if (tb
[TCA_FQ_FLOW_REFILL_DELAY
]) {
731 u32 usecs_delay
= nla_get_u32(tb
[TCA_FQ_FLOW_REFILL_DELAY
]) ;
733 q
->flow_refill_delay
= usecs_to_jiffies(usecs_delay
);
736 if (tb
[TCA_FQ_ORPHAN_MASK
])
737 q
->orphan_mask
= nla_get_u32(tb
[TCA_FQ_ORPHAN_MASK
]);
740 sch_tree_unlock(sch
);
741 err
= fq_resize(sch
, fq_log
);
744 while (sch
->q
.qlen
> sch
->limit
) {
745 struct sk_buff
*skb
= fq_dequeue(sch
);
749 drop_len
+= qdisc_pkt_len(skb
);
750 rtnl_kfree_skbs(skb
, skb
);
753 qdisc_tree_reduce_backlog(sch
, drop_count
, drop_len
);
755 sch_tree_unlock(sch
);
759 static void fq_destroy(struct Qdisc
*sch
)
761 struct fq_sched_data
*q
= qdisc_priv(sch
);
765 qdisc_watchdog_cancel(&q
->watchdog
);
768 static int fq_init(struct Qdisc
*sch
, struct nlattr
*opt
,
769 struct netlink_ext_ack
*extack
)
771 struct fq_sched_data
*q
= qdisc_priv(sch
);
775 q
->flow_plimit
= 100;
776 q
->quantum
= 2 * psched_mtu(qdisc_dev(sch
));
777 q
->initial_quantum
= 10 * psched_mtu(qdisc_dev(sch
));
778 q
->flow_refill_delay
= msecs_to_jiffies(40);
779 q
->flow_max_rate
= ~0UL;
780 q
->time_next_delayed_flow
= ~0ULL;
782 q
->new_flows
.first
= NULL
;
783 q
->old_flows
.first
= NULL
;
784 q
->delayed
= RB_ROOT
;
786 q
->fq_trees_log
= ilog2(1024);
787 q
->orphan_mask
= 1024 - 1;
788 q
->low_rate_threshold
= 550000 / 8;
789 qdisc_watchdog_init_clockid(&q
->watchdog
, sch
, CLOCK_MONOTONIC
);
792 err
= fq_change(sch
, opt
, extack
);
794 err
= fq_resize(sch
, q
->fq_trees_log
);
799 static int fq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
801 struct fq_sched_data
*q
= qdisc_priv(sch
);
804 opts
= nla_nest_start(skb
, TCA_OPTIONS
);
806 goto nla_put_failure
;
808 /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
810 if (nla_put_u32(skb
, TCA_FQ_PLIMIT
, sch
->limit
) ||
811 nla_put_u32(skb
, TCA_FQ_FLOW_PLIMIT
, q
->flow_plimit
) ||
812 nla_put_u32(skb
, TCA_FQ_QUANTUM
, q
->quantum
) ||
813 nla_put_u32(skb
, TCA_FQ_INITIAL_QUANTUM
, q
->initial_quantum
) ||
814 nla_put_u32(skb
, TCA_FQ_RATE_ENABLE
, q
->rate_enable
) ||
815 nla_put_u32(skb
, TCA_FQ_FLOW_MAX_RATE
,
816 min_t(unsigned long, q
->flow_max_rate
, ~0U)) ||
817 nla_put_u32(skb
, TCA_FQ_FLOW_REFILL_DELAY
,
818 jiffies_to_usecs(q
->flow_refill_delay
)) ||
819 nla_put_u32(skb
, TCA_FQ_ORPHAN_MASK
, q
->orphan_mask
) ||
820 nla_put_u32(skb
, TCA_FQ_LOW_RATE_THRESHOLD
,
821 q
->low_rate_threshold
) ||
822 nla_put_u32(skb
, TCA_FQ_BUCKETS_LOG
, q
->fq_trees_log
))
823 goto nla_put_failure
;
825 return nla_nest_end(skb
, opts
);
831 static int fq_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
833 struct fq_sched_data
*q
= qdisc_priv(sch
);
834 struct tc_fq_qd_stats st
;
838 st
.gc_flows
= q
->stat_gc_flows
;
839 st
.highprio_packets
= q
->stat_internal_packets
;
841 st
.throttled
= q
->stat_throttled
;
842 st
.flows_plimit
= q
->stat_flows_plimit
;
843 st
.pkts_too_long
= q
->stat_pkts_too_long
;
844 st
.allocation_errors
= q
->stat_allocation_errors
;
845 st
.time_next_delayed_flow
= q
->time_next_delayed_flow
- ktime_get_ns();
847 st
.inactive_flows
= q
->inactive_flows
;
848 st
.throttled_flows
= q
->throttled_flows
;
849 st
.unthrottle_latency_ns
= min_t(unsigned long,
850 q
->unthrottle_latency_ns
, ~0U);
851 sch_tree_unlock(sch
);
853 return gnet_stats_copy_app(d
, &st
, sizeof(st
));
856 static struct Qdisc_ops fq_qdisc_ops __read_mostly
= {
858 .priv_size
= sizeof(struct fq_sched_data
),
860 .enqueue
= fq_enqueue
,
861 .dequeue
= fq_dequeue
,
862 .peek
= qdisc_peek_dequeued
,
865 .destroy
= fq_destroy
,
868 .dump_stats
= fq_dump_stats
,
869 .owner
= THIS_MODULE
,
872 static int __init
fq_module_init(void)
876 fq_flow_cachep
= kmem_cache_create("fq_flow_cache",
877 sizeof(struct fq_flow
),
882 ret
= register_qdisc(&fq_qdisc_ops
);
884 kmem_cache_destroy(fq_flow_cachep
);
888 static void __exit
fq_module_exit(void)
890 unregister_qdisc(&fq_qdisc_ops
);
891 kmem_cache_destroy(fq_flow_cachep
);
894 module_init(fq_module_init
)
895 module_exit(fq_module_exit
)
896 MODULE_AUTHOR("Eric Dumazet");
897 MODULE_LICENSE("GPL");