1 // SPDX-License-Identifier: GPL-2.0-only
2 /* net/sched/sch_hhf.c Heavy-Hitter Filter (HHF)
4 * Copyright (C) 2013 Terry Lam <vtlam@google.com>
5 * Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com>
8 #include <linux/jiffies.h>
9 #include <linux/module.h>
10 #include <linux/skbuff.h>
11 #include <linux/vmalloc.h>
12 #include <linux/siphash.h>
13 #include <net/pkt_sched.h>
16 /* Heavy-Hitter Filter (HHF)
19 * Flows are classified into two buckets: non-heavy-hitter and heavy-hitter
20 * buckets. Initially, a new flow starts as non-heavy-hitter. Once classified
21 * as heavy-hitter, it is immediately switched to the heavy-hitter bucket.
22 * The buckets are dequeued by a Weighted Deficit Round Robin (WDRR) scheduler,
23 * in which the heavy-hitter bucket is served with less weight.
24 * In other words, non-heavy-hitters (e.g., short bursts of critical traffic)
25 * are isolated from heavy-hitters (e.g., persistent bulk traffic) and also have
26 * higher share of bandwidth.
28 * To capture heavy-hitters, we use the "multi-stage filter" algorithm in the
30 * [EV02] C. Estan and G. Varghese, "New Directions in Traffic Measurement and
31 * Accounting", in ACM SIGCOMM, 2002.
33 * Conceptually, a multi-stage filter comprises k independent hash functions
34 * and k counter arrays. Packets are indexed into k counter arrays by k hash
35 * functions, respectively. The counters are then increased by the packet sizes.
37 * - For a heavy-hitter flow: *all* of its k array counters must be large.
38 * - For a non-heavy-hitter flow: some of its k array counters can be large
39 * due to hash collision with other small flows; however, with high
40 * probability, not *all* k counters are large.
42 * By the design of the multi-stage filter algorithm, the false negative rate
43 * (heavy-hitters getting away uncaptured) is zero. However, the algorithm is
44 * susceptible to false positives (non-heavy-hitters mistakenly classified as
46 * Therefore, we also implement the following optimizations to reduce false
47 * positives by avoiding unnecessary increment of the counter values:
48 * - Optimization O1: once a heavy-hitter is identified, its bytes are not
49 * accounted in the array counters. This technique is called "shielding"
50 * in Section 3.3.1 of [EV02].
51 * - Optimization O2: conservative update of counters
52 * (Section 3.3.2 of [EV02]),
53 * New counter value = max {old counter value,
54 * smallest counter value + packet bytes}
56 * Finally, we refresh the counters periodically since otherwise the counter
57 * values will keep accumulating.
59 * Once a flow is classified as heavy-hitter, we also save its per-flow state
60 * in an exact-matching flow table so that its subsequent packets can be
61 * dispatched to the heavy-hitter bucket accordingly.
64 * At a high level, this qdisc works as follows:
66 * - If the flow-id of p (e.g., TCP 5-tuple) is already in the exact-matching
67 * heavy-hitter flow table, denoted table T, then send p to the heavy-hitter
69 * - Otherwise, forward p to the multi-stage filter, denoted filter F
70 * + If F decides that p belongs to a non-heavy-hitter flow, then send p
71 * to the non-heavy-hitter bucket.
72 * + Otherwise, if F decides that p belongs to a new heavy-hitter flow,
73 * then set up a new flow entry for the flow-id of p in the table T and
74 * send p to the heavy-hitter bucket.
76 * In this implementation:
77 * - T is a fixed-size hash-table with 1024 entries. Hash collision is
78 * resolved by linked-list chaining.
79 * - F has four counter arrays, each array containing 1024 32-bit counters.
80 * That means 4 * 1024 * 32 bits = 16KB of memory.
81 * - Since each array in F contains 1024 counters, 10 bits are sufficient to
82 * index into each array.
83 * Hence, instead of having four hash functions, we chop the 32-bit
84 * skb-hash into three 10-bit chunks, and the remaining 10-bit chunk is
85 * computed as XOR sum of those three chunks.
86 * - We need to clear the counter arrays periodically; however, directly
87 * memsetting 16KB of memory can lead to cache eviction and unwanted delay.
88 * So by representing each counter by a valid bit, we only need to reset
89 * 4K of 1 bit (i.e. 512 bytes) instead of 16KB of memory.
90 * - The Deficit Round Robin engine is taken from fq_codel implementation
91 * (net/sched/sch_fq_codel.c). Note that wdrr_bucket corresponds to
92 * fq_codel_flow in fq_codel implementation.
96 /* Non-configurable parameters */
97 #define HH_FLOWS_CNT 1024 /* number of entries in exact-matching table T */
98 #define HHF_ARRAYS_CNT 4 /* number of arrays in multi-stage filter F */
99 #define HHF_ARRAYS_LEN 1024 /* number of counters in each array of F */
100 #define HHF_BIT_MASK_LEN 10 /* masking 10 bits */
101 #define HHF_BIT_MASK 0x3FF /* bitmask of 10 bits */
103 #define WDRR_BUCKET_CNT 2 /* two buckets for Weighted DRR */
104 enum wdrr_bucket_idx
{
105 WDRR_BUCKET_FOR_HH
= 0, /* bucket id for heavy-hitters */
106 WDRR_BUCKET_FOR_NON_HH
= 1 /* bucket id for non-heavy-hitters */
109 #define hhf_time_before(a, b) \
110 (typecheck(u32, a) && typecheck(u32, b) && ((s32)((a) - (b)) < 0))
112 /* Heavy-hitter per-flow state */
113 struct hh_flow_state
{
114 u32 hash_id
; /* hash of flow-id (e.g. TCP 5-tuple) */
115 u32 hit_timestamp
; /* last time heavy-hitter was seen */
116 struct list_head flowchain
; /* chaining under hash collision */
119 /* Weighted Deficit Round Robin (WDRR) scheduler */
121 struct sk_buff
*head
;
122 struct sk_buff
*tail
;
123 struct list_head bucketchain
;
127 struct hhf_sched_data
{
128 struct wdrr_bucket buckets
[WDRR_BUCKET_CNT
];
129 siphash_key_t perturbation
; /* hash perturbation */
130 u32 quantum
; /* psched_mtu(qdisc_dev(sch)); */
131 u32 drop_overlimit
; /* number of times max qdisc packet
134 struct list_head
*hh_flows
; /* table T (currently active HHs) */
135 u32 hh_flows_limit
; /* max active HH allocs */
136 u32 hh_flows_overlimit
; /* num of disallowed HH allocs */
137 u32 hh_flows_total_cnt
; /* total admitted HHs */
138 u32 hh_flows_current_cnt
; /* total current HHs */
139 u32
*hhf_arrays
[HHF_ARRAYS_CNT
]; /* HH filter F */
140 u32 hhf_arrays_reset_timestamp
; /* last time hhf_arrays
143 unsigned long *hhf_valid_bits
[HHF_ARRAYS_CNT
]; /* shadow valid bits
146 /* Similar to the "new_flows" vs. "old_flows" concept in fq_codel DRR */
147 struct list_head new_buckets
; /* list of new buckets */
148 struct list_head old_buckets
; /* list of old buckets */
150 /* Configurable HHF parameters */
151 u32 hhf_reset_timeout
; /* interval to reset counter
155 u32 hhf_admit_bytes
; /* counter thresh to classify as
156 * HH (default 128KB).
157 * With these default values,
158 * 128KB / 40ms = 25 Mbps
159 * i.e., we expect to capture HHs
162 u32 hhf_evict_timeout
; /* aging threshold to evict idle
163 * HHs out of table T. This should
164 * be large enough to avoid
165 * reordering during HH eviction.
168 u32 hhf_non_hh_weight
; /* WDRR weight for non-HHs
170 * i.e., non-HH : HH = 2 : 1)
174 static u32
hhf_time_stamp(void)
179 /* Looks up a heavy-hitter flow in a chaining list of table T. */
180 static struct hh_flow_state
*seek_list(const u32 hash
,
181 struct list_head
*head
,
182 struct hhf_sched_data
*q
)
184 struct hh_flow_state
*flow
, *next
;
185 u32 now
= hhf_time_stamp();
187 if (list_empty(head
))
190 list_for_each_entry_safe(flow
, next
, head
, flowchain
) {
191 u32 prev
= flow
->hit_timestamp
+ q
->hhf_evict_timeout
;
193 if (hhf_time_before(prev
, now
)) {
194 /* Delete expired heavy-hitters, but preserve one entry
195 * to avoid kzalloc() when next time this slot is hit.
197 if (list_is_last(&flow
->flowchain
, head
))
199 list_del(&flow
->flowchain
);
201 q
->hh_flows_current_cnt
--;
202 } else if (flow
->hash_id
== hash
) {
209 /* Returns a flow state entry for a new heavy-hitter. Either reuses an expired
210 * entry or dynamically alloc a new entry.
212 static struct hh_flow_state
*alloc_new_hh(struct list_head
*head
,
213 struct hhf_sched_data
*q
)
215 struct hh_flow_state
*flow
;
216 u32 now
= hhf_time_stamp();
218 if (!list_empty(head
)) {
219 /* Find an expired heavy-hitter flow entry. */
220 list_for_each_entry(flow
, head
, flowchain
) {
221 u32 prev
= flow
->hit_timestamp
+ q
->hhf_evict_timeout
;
223 if (hhf_time_before(prev
, now
))
228 if (q
->hh_flows_current_cnt
>= q
->hh_flows_limit
) {
229 q
->hh_flows_overlimit
++;
232 /* Create new entry. */
233 flow
= kzalloc(sizeof(struct hh_flow_state
), GFP_ATOMIC
);
237 q
->hh_flows_current_cnt
++;
238 INIT_LIST_HEAD(&flow
->flowchain
);
239 list_add_tail(&flow
->flowchain
, head
);
244 /* Assigns packets to WDRR buckets. Implements a multi-stage filter to
245 * classify heavy-hitters.
247 static enum wdrr_bucket_idx
hhf_classify(struct sk_buff
*skb
, struct Qdisc
*sch
)
249 struct hhf_sched_data
*q
= qdisc_priv(sch
);
251 u32 xorsum
, filter_pos
[HHF_ARRAYS_CNT
], flow_pos
;
252 struct hh_flow_state
*flow
;
253 u32 pkt_len
, min_hhf_val
;
256 u32 now
= hhf_time_stamp();
258 /* Reset the HHF counter arrays if this is the right time. */
259 prev
= q
->hhf_arrays_reset_timestamp
+ q
->hhf_reset_timeout
;
260 if (hhf_time_before(prev
, now
)) {
261 for (i
= 0; i
< HHF_ARRAYS_CNT
; i
++)
262 bitmap_zero(q
->hhf_valid_bits
[i
], HHF_ARRAYS_LEN
);
263 q
->hhf_arrays_reset_timestamp
= now
;
266 /* Get hashed flow-id of the skb. */
267 hash
= skb_get_hash_perturb(skb
, &q
->perturbation
);
269 /* Check if this packet belongs to an already established HH flow. */
270 flow_pos
= hash
& HHF_BIT_MASK
;
271 flow
= seek_list(hash
, &q
->hh_flows
[flow_pos
], q
);
272 if (flow
) { /* found its HH flow */
273 flow
->hit_timestamp
= now
;
274 return WDRR_BUCKET_FOR_HH
;
277 /* Now pass the packet through the multi-stage filter. */
280 for (i
= 0; i
< HHF_ARRAYS_CNT
- 1; i
++) {
281 /* Split the skb_hash into three 10-bit chunks. */
282 filter_pos
[i
] = tmp_hash
& HHF_BIT_MASK
;
283 xorsum
^= filter_pos
[i
];
284 tmp_hash
>>= HHF_BIT_MASK_LEN
;
286 /* The last chunk is computed as XOR sum of other chunks. */
287 filter_pos
[HHF_ARRAYS_CNT
- 1] = xorsum
^ tmp_hash
;
289 pkt_len
= qdisc_pkt_len(skb
);
291 for (i
= 0; i
< HHF_ARRAYS_CNT
; i
++) {
294 if (!test_bit(filter_pos
[i
], q
->hhf_valid_bits
[i
])) {
295 q
->hhf_arrays
[i
][filter_pos
[i
]] = 0;
296 __set_bit(filter_pos
[i
], q
->hhf_valid_bits
[i
]);
299 val
= q
->hhf_arrays
[i
][filter_pos
[i
]] + pkt_len
;
300 if (min_hhf_val
> val
)
304 /* Found a new HH iff all counter values > HH admit threshold. */
305 if (min_hhf_val
> q
->hhf_admit_bytes
) {
306 /* Just captured a new heavy-hitter. */
307 flow
= alloc_new_hh(&q
->hh_flows
[flow_pos
], q
);
308 if (!flow
) /* memory alloc problem */
309 return WDRR_BUCKET_FOR_NON_HH
;
310 flow
->hash_id
= hash
;
311 flow
->hit_timestamp
= now
;
312 q
->hh_flows_total_cnt
++;
314 /* By returning without updating counters in q->hhf_arrays,
315 * we implicitly implement "shielding" (see Optimization O1).
317 return WDRR_BUCKET_FOR_HH
;
320 /* Conservative update of HHF arrays (see Optimization O2). */
321 for (i
= 0; i
< HHF_ARRAYS_CNT
; i
++) {
322 if (q
->hhf_arrays
[i
][filter_pos
[i
]] < min_hhf_val
)
323 q
->hhf_arrays
[i
][filter_pos
[i
]] = min_hhf_val
;
325 return WDRR_BUCKET_FOR_NON_HH
;
328 /* Removes one skb from head of bucket. */
329 static struct sk_buff
*dequeue_head(struct wdrr_bucket
*bucket
)
331 struct sk_buff
*skb
= bucket
->head
;
333 bucket
->head
= skb
->next
;
334 skb_mark_not_on_list(skb
);
338 /* Tail-adds skb to bucket. */
339 static void bucket_add(struct wdrr_bucket
*bucket
, struct sk_buff
*skb
)
341 if (bucket
->head
== NULL
)
344 bucket
->tail
->next
= skb
;
349 static unsigned int hhf_drop(struct Qdisc
*sch
, struct sk_buff
**to_free
)
351 struct hhf_sched_data
*q
= qdisc_priv(sch
);
352 struct wdrr_bucket
*bucket
;
354 /* Always try to drop from heavy-hitters first. */
355 bucket
= &q
->buckets
[WDRR_BUCKET_FOR_HH
];
357 bucket
= &q
->buckets
[WDRR_BUCKET_FOR_NON_HH
];
360 struct sk_buff
*skb
= dequeue_head(bucket
);
363 qdisc_qstats_backlog_dec(sch
, skb
);
364 qdisc_drop(skb
, sch
, to_free
);
367 /* Return id of the bucket from which the packet was dropped. */
368 return bucket
- q
->buckets
;
371 static int hhf_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
,
372 struct sk_buff
**to_free
)
374 struct hhf_sched_data
*q
= qdisc_priv(sch
);
375 enum wdrr_bucket_idx idx
;
376 struct wdrr_bucket
*bucket
;
377 unsigned int prev_backlog
;
379 idx
= hhf_classify(skb
, sch
);
381 bucket
= &q
->buckets
[idx
];
382 bucket_add(bucket
, skb
);
383 qdisc_qstats_backlog_inc(sch
, skb
);
385 if (list_empty(&bucket
->bucketchain
)) {
388 /* The logic of new_buckets vs. old_buckets is the same as
389 * new_flows vs. old_flows in the implementation of fq_codel,
390 * i.e., short bursts of non-HHs should have strict priority.
392 if (idx
== WDRR_BUCKET_FOR_HH
) {
393 /* Always move heavy-hitters to old bucket. */
395 list_add_tail(&bucket
->bucketchain
, &q
->old_buckets
);
397 weight
= q
->hhf_non_hh_weight
;
398 list_add_tail(&bucket
->bucketchain
, &q
->new_buckets
);
400 bucket
->deficit
= weight
* q
->quantum
;
402 if (++sch
->q
.qlen
<= sch
->limit
)
403 return NET_XMIT_SUCCESS
;
405 prev_backlog
= sch
->qstats
.backlog
;
407 /* Return Congestion Notification only if we dropped a packet from this
410 if (hhf_drop(sch
, to_free
) == idx
)
413 /* As we dropped a packet, better let upper stack know this. */
414 qdisc_tree_reduce_backlog(sch
, 1, prev_backlog
- sch
->qstats
.backlog
);
415 return NET_XMIT_SUCCESS
;
418 static struct sk_buff
*hhf_dequeue(struct Qdisc
*sch
)
420 struct hhf_sched_data
*q
= qdisc_priv(sch
);
421 struct sk_buff
*skb
= NULL
;
422 struct wdrr_bucket
*bucket
;
423 struct list_head
*head
;
426 head
= &q
->new_buckets
;
427 if (list_empty(head
)) {
428 head
= &q
->old_buckets
;
429 if (list_empty(head
))
432 bucket
= list_first_entry(head
, struct wdrr_bucket
, bucketchain
);
434 if (bucket
->deficit
<= 0) {
435 int weight
= (bucket
- q
->buckets
== WDRR_BUCKET_FOR_HH
) ?
436 1 : q
->hhf_non_hh_weight
;
438 bucket
->deficit
+= weight
* q
->quantum
;
439 list_move_tail(&bucket
->bucketchain
, &q
->old_buckets
);
444 skb
= dequeue_head(bucket
);
446 qdisc_qstats_backlog_dec(sch
, skb
);
450 /* Force a pass through old_buckets to prevent starvation. */
451 if ((head
== &q
->new_buckets
) && !list_empty(&q
->old_buckets
))
452 list_move_tail(&bucket
->bucketchain
, &q
->old_buckets
);
454 list_del_init(&bucket
->bucketchain
);
457 qdisc_bstats_update(sch
, skb
);
458 bucket
->deficit
-= qdisc_pkt_len(skb
);
463 static void hhf_reset(struct Qdisc
*sch
)
467 while ((skb
= hhf_dequeue(sch
)) != NULL
)
468 rtnl_kfree_skbs(skb
, skb
);
471 static void hhf_destroy(struct Qdisc
*sch
)
474 struct hhf_sched_data
*q
= qdisc_priv(sch
);
476 for (i
= 0; i
< HHF_ARRAYS_CNT
; i
++) {
477 kvfree(q
->hhf_arrays
[i
]);
478 kvfree(q
->hhf_valid_bits
[i
]);
484 for (i
= 0; i
< HH_FLOWS_CNT
; i
++) {
485 struct hh_flow_state
*flow
, *next
;
486 struct list_head
*head
= &q
->hh_flows
[i
];
488 if (list_empty(head
))
490 list_for_each_entry_safe(flow
, next
, head
, flowchain
) {
491 list_del(&flow
->flowchain
);
498 static const struct nla_policy hhf_policy
[TCA_HHF_MAX
+ 1] = {
499 [TCA_HHF_BACKLOG_LIMIT
] = { .type
= NLA_U32
},
500 [TCA_HHF_QUANTUM
] = { .type
= NLA_U32
},
501 [TCA_HHF_HH_FLOWS_LIMIT
] = { .type
= NLA_U32
},
502 [TCA_HHF_RESET_TIMEOUT
] = { .type
= NLA_U32
},
503 [TCA_HHF_ADMIT_BYTES
] = { .type
= NLA_U32
},
504 [TCA_HHF_EVICT_TIMEOUT
] = { .type
= NLA_U32
},
505 [TCA_HHF_NON_HH_WEIGHT
] = { .type
= NLA_U32
},
508 static int hhf_change(struct Qdisc
*sch
, struct nlattr
*opt
,
509 struct netlink_ext_ack
*extack
)
511 struct hhf_sched_data
*q
= qdisc_priv(sch
);
512 struct nlattr
*tb
[TCA_HHF_MAX
+ 1];
513 unsigned int qlen
, prev_backlog
;
516 u32 new_quantum
= q
->quantum
;
517 u32 new_hhf_non_hh_weight
= q
->hhf_non_hh_weight
;
519 err
= nla_parse_nested_deprecated(tb
, TCA_HHF_MAX
, opt
, hhf_policy
,
524 if (tb
[TCA_HHF_QUANTUM
])
525 new_quantum
= nla_get_u32(tb
[TCA_HHF_QUANTUM
]);
527 if (tb
[TCA_HHF_NON_HH_WEIGHT
])
528 new_hhf_non_hh_weight
= nla_get_u32(tb
[TCA_HHF_NON_HH_WEIGHT
]);
530 non_hh_quantum
= (u64
)new_quantum
* new_hhf_non_hh_weight
;
531 if (non_hh_quantum
== 0 || non_hh_quantum
> INT_MAX
)
536 if (tb
[TCA_HHF_BACKLOG_LIMIT
])
537 WRITE_ONCE(sch
->limit
, nla_get_u32(tb
[TCA_HHF_BACKLOG_LIMIT
]));
539 WRITE_ONCE(q
->quantum
, new_quantum
);
540 WRITE_ONCE(q
->hhf_non_hh_weight
, new_hhf_non_hh_weight
);
542 if (tb
[TCA_HHF_HH_FLOWS_LIMIT
])
543 WRITE_ONCE(q
->hh_flows_limit
,
544 nla_get_u32(tb
[TCA_HHF_HH_FLOWS_LIMIT
]));
546 if (tb
[TCA_HHF_RESET_TIMEOUT
]) {
547 u32 us
= nla_get_u32(tb
[TCA_HHF_RESET_TIMEOUT
]);
549 WRITE_ONCE(q
->hhf_reset_timeout
,
550 usecs_to_jiffies(us
));
553 if (tb
[TCA_HHF_ADMIT_BYTES
])
554 WRITE_ONCE(q
->hhf_admit_bytes
,
555 nla_get_u32(tb
[TCA_HHF_ADMIT_BYTES
]));
557 if (tb
[TCA_HHF_EVICT_TIMEOUT
]) {
558 u32 us
= nla_get_u32(tb
[TCA_HHF_EVICT_TIMEOUT
]);
560 WRITE_ONCE(q
->hhf_evict_timeout
,
561 usecs_to_jiffies(us
));
565 prev_backlog
= sch
->qstats
.backlog
;
566 while (sch
->q
.qlen
> sch
->limit
) {
567 struct sk_buff
*skb
= hhf_dequeue(sch
);
569 rtnl_kfree_skbs(skb
, skb
);
571 qdisc_tree_reduce_backlog(sch
, qlen
- sch
->q
.qlen
,
572 prev_backlog
- sch
->qstats
.backlog
);
574 sch_tree_unlock(sch
);
578 static int hhf_init(struct Qdisc
*sch
, struct nlattr
*opt
,
579 struct netlink_ext_ack
*extack
)
581 struct hhf_sched_data
*q
= qdisc_priv(sch
);
585 q
->quantum
= psched_mtu(qdisc_dev(sch
));
586 get_random_bytes(&q
->perturbation
, sizeof(q
->perturbation
));
587 INIT_LIST_HEAD(&q
->new_buckets
);
588 INIT_LIST_HEAD(&q
->old_buckets
);
590 /* Configurable HHF parameters */
591 q
->hhf_reset_timeout
= HZ
/ 25; /* 40 ms */
592 q
->hhf_admit_bytes
= 131072; /* 128 KB */
593 q
->hhf_evict_timeout
= HZ
; /* 1 sec */
594 q
->hhf_non_hh_weight
= 2;
597 int err
= hhf_change(sch
, opt
, extack
);
604 /* Initialize heavy-hitter flow table. */
605 q
->hh_flows
= kvcalloc(HH_FLOWS_CNT
, sizeof(struct list_head
),
609 for (i
= 0; i
< HH_FLOWS_CNT
; i
++)
610 INIT_LIST_HEAD(&q
->hh_flows
[i
]);
612 /* Cap max active HHs at twice len of hh_flows table. */
613 q
->hh_flows_limit
= 2 * HH_FLOWS_CNT
;
614 q
->hh_flows_overlimit
= 0;
615 q
->hh_flows_total_cnt
= 0;
616 q
->hh_flows_current_cnt
= 0;
618 /* Initialize heavy-hitter filter arrays. */
619 for (i
= 0; i
< HHF_ARRAYS_CNT
; i
++) {
620 q
->hhf_arrays
[i
] = kvcalloc(HHF_ARRAYS_LEN
,
623 if (!q
->hhf_arrays
[i
]) {
624 /* Note: hhf_destroy() will be called
630 q
->hhf_arrays_reset_timestamp
= hhf_time_stamp();
632 /* Initialize valid bits of heavy-hitter filter arrays. */
633 for (i
= 0; i
< HHF_ARRAYS_CNT
; i
++) {
634 q
->hhf_valid_bits
[i
] = kvzalloc(HHF_ARRAYS_LEN
/
635 BITS_PER_BYTE
, GFP_KERNEL
);
636 if (!q
->hhf_valid_bits
[i
]) {
637 /* Note: hhf_destroy() will be called
644 /* Initialize Weighted DRR buckets. */
645 for (i
= 0; i
< WDRR_BUCKET_CNT
; i
++) {
646 struct wdrr_bucket
*bucket
= q
->buckets
+ i
;
648 INIT_LIST_HEAD(&bucket
->bucketchain
);
655 static int hhf_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
657 struct hhf_sched_data
*q
= qdisc_priv(sch
);
660 opts
= nla_nest_start_noflag(skb
, TCA_OPTIONS
);
662 goto nla_put_failure
;
664 if (nla_put_u32(skb
, TCA_HHF_BACKLOG_LIMIT
, READ_ONCE(sch
->limit
)) ||
665 nla_put_u32(skb
, TCA_HHF_QUANTUM
, READ_ONCE(q
->quantum
)) ||
666 nla_put_u32(skb
, TCA_HHF_HH_FLOWS_LIMIT
,
667 READ_ONCE(q
->hh_flows_limit
)) ||
668 nla_put_u32(skb
, TCA_HHF_RESET_TIMEOUT
,
669 jiffies_to_usecs(READ_ONCE(q
->hhf_reset_timeout
))) ||
670 nla_put_u32(skb
, TCA_HHF_ADMIT_BYTES
,
671 READ_ONCE(q
->hhf_admit_bytes
)) ||
672 nla_put_u32(skb
, TCA_HHF_EVICT_TIMEOUT
,
673 jiffies_to_usecs(READ_ONCE(q
->hhf_evict_timeout
))) ||
674 nla_put_u32(skb
, TCA_HHF_NON_HH_WEIGHT
,
675 READ_ONCE(q
->hhf_non_hh_weight
)))
676 goto nla_put_failure
;
678 return nla_nest_end(skb
, opts
);
684 static int hhf_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
686 struct hhf_sched_data
*q
= qdisc_priv(sch
);
687 struct tc_hhf_xstats st
= {
688 .drop_overlimit
= q
->drop_overlimit
,
689 .hh_overlimit
= q
->hh_flows_overlimit
,
690 .hh_tot_count
= q
->hh_flows_total_cnt
,
691 .hh_cur_count
= q
->hh_flows_current_cnt
,
694 return gnet_stats_copy_app(d
, &st
, sizeof(st
));
697 static struct Qdisc_ops hhf_qdisc_ops __read_mostly
= {
699 .priv_size
= sizeof(struct hhf_sched_data
),
701 .enqueue
= hhf_enqueue
,
702 .dequeue
= hhf_dequeue
,
703 .peek
= qdisc_peek_dequeued
,
706 .destroy
= hhf_destroy
,
707 .change
= hhf_change
,
709 .dump_stats
= hhf_dump_stats
,
710 .owner
= THIS_MODULE
,
712 MODULE_ALIAS_NET_SCH("hhf");
714 static int __init
hhf_module_init(void)
716 return register_qdisc(&hhf_qdisc_ops
);
719 static void __exit
hhf_module_exit(void)
721 unregister_qdisc(&hhf_qdisc_ops
);
724 module_init(hhf_module_init
)
725 module_exit(hhf_module_exit
)
726 MODULE_AUTHOR("Terry Lam");
727 MODULE_AUTHOR("Nandita Dukkipati");
728 MODULE_LICENSE("GPL");
729 MODULE_DESCRIPTION("Heavy-Hitter Filter (HHF)");