2 * net/sched/police.c Input police filter.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * J Hadi Salim (action changes)
13 #include <asm/uaccess.h>
14 #include <asm/system.h>
15 #include <linux/bitops.h>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/module.h>
31 #include <linux/rtnetlink.h>
32 #include <linux/init.h>
34 #include <net/act_api.h>
36 #define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
37 #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
38 #define PRIV(a) ((struct tcf_police *) (a)->priv)
40 /* use generic hash table */
41 #define MY_TAB_SIZE 16
42 #define MY_TAB_MASK 15
44 static struct tcf_police
*tcf_police_ht
[MY_TAB_SIZE
];
45 /* Policer hash table lock */
46 static DEFINE_RWLOCK(police_lock
);
48 /* Each policer is serialized by its individual spinlock */
50 static __inline__
unsigned tcf_police_hash(u32 index
)
55 static __inline__
struct tcf_police
* tcf_police_lookup(u32 index
)
59 read_lock(&police_lock
);
60 for (p
= tcf_police_ht
[tcf_police_hash(index
)]; p
; p
= p
->next
) {
61 if (p
->index
== index
)
64 read_unlock(&police_lock
);
68 #ifdef CONFIG_NET_CLS_ACT
69 static int tcf_generic_walker(struct sk_buff
*skb
, struct netlink_callback
*cb
,
70 int type
, struct tc_action
*a
)
73 int err
= 0, index
= -1, i
= 0, s_i
= 0, n_i
= 0;
76 read_lock(&police_lock
);
80 for (i
= 0; i
< MY_TAB_SIZE
; i
++) {
81 p
= tcf_police_ht
[tcf_police_hash(i
)];
83 for (; p
; p
= p
->next
) {
89 r
= (struct rtattr
*) skb
->tail
;
90 RTA_PUT(skb
, a
->order
, 0, NULL
);
91 if (type
== RTM_DELACTION
)
92 err
= tcf_action_dump_1(skb
, a
, 0, 1);
94 err
= tcf_action_dump_1(skb
, a
, 0, 0);
97 skb_trim(skb
, (u8
*)r
- skb
->data
);
100 r
->rta_len
= skb
->tail
- (u8
*)r
;
105 read_unlock(&police_lock
);
111 skb_trim(skb
, (u8
*)r
- skb
->data
);
116 tcf_hash_search(struct tc_action
*a
, u32 index
)
118 struct tcf_police
*p
= tcf_police_lookup(index
);
129 static inline u32
tcf_police_new_index(void)
134 } while (tcf_police_lookup(idx_gen
));
139 void tcf_police_destroy(struct tcf_police
*p
)
141 unsigned h
= tcf_police_hash(p
->index
);
142 struct tcf_police
**p1p
;
144 for (p1p
= &tcf_police_ht
[h
]; *p1p
; p1p
= &(*p1p
)->next
) {
146 write_lock_bh(&police_lock
);
148 write_unlock_bh(&police_lock
);
149 #ifdef CONFIG_NET_ESTIMATOR
150 gen_kill_estimator(&p
->bstats
, &p
->rate_est
);
153 qdisc_put_rtab(p
->R_tab
);
155 qdisc_put_rtab(p
->P_tab
);
163 #ifdef CONFIG_NET_CLS_ACT
164 static int tcf_act_police_locate(struct rtattr
*rta
, struct rtattr
*est
,
165 struct tc_action
*a
, int ovr
, int bind
)
169 struct rtattr
*tb
[TCA_POLICE_MAX
];
170 struct tc_police
*parm
;
171 struct tcf_police
*p
;
172 struct qdisc_rate_table
*R_tab
= NULL
, *P_tab
= NULL
;
174 if (rta
== NULL
|| rtattr_parse_nested(tb
, TCA_POLICE_MAX
, rta
) < 0)
177 if (tb
[TCA_POLICE_TBF
-1] == NULL
||
178 RTA_PAYLOAD(tb
[TCA_POLICE_TBF
-1]) != sizeof(*parm
))
180 parm
= RTA_DATA(tb
[TCA_POLICE_TBF
-1]);
182 if (tb
[TCA_POLICE_RESULT
-1] != NULL
&&
183 RTA_PAYLOAD(tb
[TCA_POLICE_RESULT
-1]) != sizeof(u32
))
185 if (tb
[TCA_POLICE_RESULT
-1] != NULL
&&
186 RTA_PAYLOAD(tb
[TCA_POLICE_RESULT
-1]) != sizeof(u32
))
189 if (parm
->index
&& (p
= tcf_police_lookup(parm
->index
)) != NULL
) {
200 p
= kmalloc(sizeof(*p
), GFP_KERNEL
);
203 memset(p
, 0, sizeof(*p
));
207 spin_lock_init(&p
->lock
);
208 p
->stats_lock
= &p
->lock
;
212 if (parm
->rate
.rate
) {
214 R_tab
= qdisc_get_rtab(&parm
->rate
, tb
[TCA_POLICE_RATE
-1]);
217 if (parm
->peakrate
.rate
) {
218 P_tab
= qdisc_get_rtab(&parm
->peakrate
,
219 tb
[TCA_POLICE_PEAKRATE
-1]);
220 if (p
->P_tab
== NULL
) {
221 qdisc_put_rtab(R_tab
);
226 /* No failure allowed after this point */
227 spin_lock_bh(&p
->lock
);
229 qdisc_put_rtab(p
->R_tab
);
233 qdisc_put_rtab(p
->P_tab
);
237 if (tb
[TCA_POLICE_RESULT
-1])
238 p
->result
= *(u32
*)RTA_DATA(tb
[TCA_POLICE_RESULT
-1]);
239 p
->toks
= p
->burst
= parm
->burst
;
244 p
->mtu
= 255<<p
->R_tab
->rate
.cell_log
;
247 p
->ptoks
= L2T_P(p
, p
->mtu
);
248 p
->action
= parm
->action
;
250 #ifdef CONFIG_NET_ESTIMATOR
251 if (tb
[TCA_POLICE_AVRATE
-1])
252 p
->ewma_rate
= *(u32
*)RTA_DATA(tb
[TCA_POLICE_AVRATE
-1]);
254 gen_replace_estimator(&p
->bstats
, &p
->rate_est
, p
->stats_lock
, est
);
257 spin_unlock_bh(&p
->lock
);
258 if (ret
!= ACT_P_CREATED
)
261 PSCHED_GET_TIME(p
->t_c
);
262 p
->index
= parm
->index
? : tcf_police_new_index();
263 h
= tcf_police_hash(p
->index
);
264 write_lock_bh(&police_lock
);
265 p
->next
= tcf_police_ht
[h
];
266 tcf_police_ht
[h
] = p
;
267 write_unlock_bh(&police_lock
);
273 if (ret
== ACT_P_CREATED
)
278 static int tcf_act_police_cleanup(struct tc_action
*a
, int bind
)
280 struct tcf_police
*p
= PRIV(a
);
283 return tcf_police_release(p
, bind
);
287 static int tcf_act_police(struct sk_buff
**pskb
, struct tc_action
*a
)
290 struct sk_buff
*skb
= *pskb
;
291 struct tcf_police
*p
= PRIV(a
);
297 p
->bstats
.bytes
+= skb
->len
;
300 #ifdef CONFIG_NET_ESTIMATOR
301 if (p
->ewma_rate
&& p
->rate_est
.bps
>= p
->ewma_rate
) {
302 p
->qstats
.overlimits
++;
303 spin_unlock(&p
->lock
);
308 if (skb
->len
<= p
->mtu
) {
309 if (p
->R_tab
== NULL
) {
310 spin_unlock(&p
->lock
);
314 PSCHED_GET_TIME(now
);
316 toks
= PSCHED_TDIFF_SAFE(now
, p
->t_c
, p
->burst
);
319 ptoks
= toks
+ p
->ptoks
;
320 if (ptoks
> (long)L2T_P(p
, p
->mtu
))
321 ptoks
= (long)L2T_P(p
, p
->mtu
);
322 ptoks
-= L2T_P(p
, skb
->len
);
325 if (toks
> (long)p
->burst
)
327 toks
-= L2T(p
, skb
->len
);
329 if ((toks
|ptoks
) >= 0) {
333 spin_unlock(&p
->lock
);
338 p
->qstats
.overlimits
++;
339 spin_unlock(&p
->lock
);
344 tcf_act_police_dump(struct sk_buff
*skb
, struct tc_action
*a
, int bind
, int ref
)
346 unsigned char *b
= skb
->tail
;
347 struct tc_police opt
;
348 struct tcf_police
*p
= PRIV(a
);
350 opt
.index
= p
->index
;
351 opt
.action
= p
->action
;
353 opt
.burst
= p
->burst
;
354 opt
.refcnt
= p
->refcnt
- ref
;
355 opt
.bindcnt
= p
->bindcnt
- bind
;
357 opt
.rate
= p
->R_tab
->rate
;
359 memset(&opt
.rate
, 0, sizeof(opt
.rate
));
361 opt
.peakrate
= p
->P_tab
->rate
;
363 memset(&opt
.peakrate
, 0, sizeof(opt
.peakrate
));
364 RTA_PUT(skb
, TCA_POLICE_TBF
, sizeof(opt
), &opt
);
366 RTA_PUT(skb
, TCA_POLICE_RESULT
, sizeof(int), &p
->result
);
367 #ifdef CONFIG_NET_ESTIMATOR
369 RTA_PUT(skb
, TCA_POLICE_AVRATE
, 4, &p
->ewma_rate
);
374 skb_trim(skb
, b
- skb
->data
);
378 MODULE_AUTHOR("Alexey Kuznetsov");
379 MODULE_DESCRIPTION("Policing actions");
380 MODULE_LICENSE("GPL");
382 static struct tc_action_ops act_police_ops
= {
384 .type
= TCA_ID_POLICE
,
385 .capab
= TCA_CAP_NONE
,
386 .owner
= THIS_MODULE
,
387 .act
= tcf_act_police
,
388 .dump
= tcf_act_police_dump
,
389 .cleanup
= tcf_act_police_cleanup
,
390 .lookup
= tcf_hash_search
,
391 .init
= tcf_act_police_locate
,
392 .walk
= tcf_generic_walker
396 police_init_module(void)
398 return tcf_register_action(&act_police_ops
);
402 police_cleanup_module(void)
404 tcf_unregister_action(&act_police_ops
);
407 module_init(police_init_module
);
408 module_exit(police_cleanup_module
);
412 struct tcf_police
* tcf_police_locate(struct rtattr
*rta
, struct rtattr
*est
)
415 struct tcf_police
*p
;
416 struct rtattr
*tb
[TCA_POLICE_MAX
];
417 struct tc_police
*parm
;
419 if (rtattr_parse_nested(tb
, TCA_POLICE_MAX
, rta
) < 0)
422 if (tb
[TCA_POLICE_TBF
-1] == NULL
||
423 RTA_PAYLOAD(tb
[TCA_POLICE_TBF
-1]) != sizeof(*parm
))
426 parm
= RTA_DATA(tb
[TCA_POLICE_TBF
-1]);
428 if (parm
->index
&& (p
= tcf_police_lookup(parm
->index
)) != NULL
) {
433 p
= kmalloc(sizeof(*p
), GFP_KERNEL
);
437 memset(p
, 0, sizeof(*p
));
439 spin_lock_init(&p
->lock
);
440 p
->stats_lock
= &p
->lock
;
441 if (parm
->rate
.rate
) {
442 p
->R_tab
= qdisc_get_rtab(&parm
->rate
, tb
[TCA_POLICE_RATE
-1]);
443 if (p
->R_tab
== NULL
)
445 if (parm
->peakrate
.rate
) {
446 p
->P_tab
= qdisc_get_rtab(&parm
->peakrate
,
447 tb
[TCA_POLICE_PEAKRATE
-1]);
448 if (p
->P_tab
== NULL
)
452 if (tb
[TCA_POLICE_RESULT
-1]) {
453 if (RTA_PAYLOAD(tb
[TCA_POLICE_RESULT
-1]) != sizeof(u32
))
455 p
->result
= *(u32
*)RTA_DATA(tb
[TCA_POLICE_RESULT
-1]);
457 #ifdef CONFIG_NET_ESTIMATOR
458 if (tb
[TCA_POLICE_AVRATE
-1]) {
459 if (RTA_PAYLOAD(tb
[TCA_POLICE_AVRATE
-1]) != sizeof(u32
))
461 p
->ewma_rate
= *(u32
*)RTA_DATA(tb
[TCA_POLICE_AVRATE
-1]);
464 p
->toks
= p
->burst
= parm
->burst
;
469 p
->mtu
= 255<<p
->R_tab
->rate
.cell_log
;
472 p
->ptoks
= L2T_P(p
, p
->mtu
);
473 PSCHED_GET_TIME(p
->t_c
);
474 p
->index
= parm
->index
? : tcf_police_new_index();
475 p
->action
= parm
->action
;
476 #ifdef CONFIG_NET_ESTIMATOR
478 gen_new_estimator(&p
->bstats
, &p
->rate_est
, p
->stats_lock
, est
);
480 h
= tcf_police_hash(p
->index
);
481 write_lock_bh(&police_lock
);
482 p
->next
= tcf_police_ht
[h
];
483 tcf_police_ht
[h
] = p
;
484 write_unlock_bh(&police_lock
);
489 qdisc_put_rtab(p
->R_tab
);
494 int tcf_police(struct sk_buff
*skb
, struct tcf_police
*p
)
502 p
->bstats
.bytes
+= skb
->len
;
505 #ifdef CONFIG_NET_ESTIMATOR
506 if (p
->ewma_rate
&& p
->rate_est
.bps
>= p
->ewma_rate
) {
507 p
->qstats
.overlimits
++;
508 spin_unlock(&p
->lock
);
513 if (skb
->len
<= p
->mtu
) {
514 if (p
->R_tab
== NULL
) {
515 spin_unlock(&p
->lock
);
519 PSCHED_GET_TIME(now
);
521 toks
= PSCHED_TDIFF_SAFE(now
, p
->t_c
, p
->burst
);
524 ptoks
= toks
+ p
->ptoks
;
525 if (ptoks
> (long)L2T_P(p
, p
->mtu
))
526 ptoks
= (long)L2T_P(p
, p
->mtu
);
527 ptoks
-= L2T_P(p
, skb
->len
);
530 if (toks
> (long)p
->burst
)
532 toks
-= L2T(p
, skb
->len
);
534 if ((toks
|ptoks
) >= 0) {
538 spin_unlock(&p
->lock
);
543 p
->qstats
.overlimits
++;
544 spin_unlock(&p
->lock
);
548 int tcf_police_dump(struct sk_buff
*skb
, struct tcf_police
*p
)
550 unsigned char *b
= skb
->tail
;
551 struct tc_police opt
;
553 opt
.index
= p
->index
;
554 opt
.action
= p
->action
;
556 opt
.burst
= p
->burst
;
558 opt
.rate
= p
->R_tab
->rate
;
560 memset(&opt
.rate
, 0, sizeof(opt
.rate
));
562 opt
.peakrate
= p
->P_tab
->rate
;
564 memset(&opt
.peakrate
, 0, sizeof(opt
.peakrate
));
565 RTA_PUT(skb
, TCA_POLICE_TBF
, sizeof(opt
), &opt
);
567 RTA_PUT(skb
, TCA_POLICE_RESULT
, sizeof(int), &p
->result
);
568 #ifdef CONFIG_NET_ESTIMATOR
570 RTA_PUT(skb
, TCA_POLICE_AVRATE
, 4, &p
->ewma_rate
);
575 skb_trim(skb
, b
- skb
->data
);
579 int tcf_police_dump_stats(struct sk_buff
*skb
, struct tcf_police
*p
)
583 if (gnet_stats_start_copy_compat(skb
, TCA_STATS2
, TCA_STATS
,
584 TCA_XSTATS
, p
->stats_lock
, &d
) < 0)
587 if (gnet_stats_copy_basic(&d
, &p
->bstats
) < 0 ||
588 #ifdef CONFIG_NET_ESTIMATOR
589 gnet_stats_copy_rate_est(&d
, &p
->rate_est
) < 0 ||
591 gnet_stats_copy_queue(&d
, &p
->qstats
) < 0)
594 if (gnet_stats_finish_copy(&d
) < 0)
604 EXPORT_SYMBOL(tcf_police
);
605 EXPORT_SYMBOL(tcf_police_destroy
);
606 EXPORT_SYMBOL(tcf_police_dump
);
607 EXPORT_SYMBOL(tcf_police_dump_stats
);
608 EXPORT_SYMBOL(tcf_police_hash
);
609 EXPORT_SYMBOL(tcf_police_ht
);
610 EXPORT_SYMBOL(tcf_police_locate
);
611 EXPORT_SYMBOL(tcf_police_lookup
);
612 EXPORT_SYMBOL(tcf_police_new_index
);