2 * net/sched/police.c Input police filter.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * J Hadi Salim (action changes)
13 #include <asm/uaccess.h>
14 #include <asm/system.h>
15 #include <linux/bitops.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/sched.h>
20 #include <linux/string.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
25 #include <linux/errno.h>
26 #include <linux/interrupt.h>
27 #include <linux/netdevice.h>
28 #include <linux/skbuff.h>
29 #include <linux/module.h>
30 #include <linux/rtnetlink.h>
31 #include <linux/init.h>
33 #include <net/act_api.h>
35 #define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
36 #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
37 #define PRIV(a) ((struct tcf_police *) (a)->priv)
39 /* use generic hash table */
40 #define MY_TAB_SIZE 16
41 #define MY_TAB_MASK 15
43 static struct tcf_police
*tcf_police_ht
[MY_TAB_SIZE
];
44 /* Policer hash table lock */
45 static DEFINE_RWLOCK(police_lock
);
47 /* Each policer is serialized by its individual spinlock */
49 static __inline__
unsigned tcf_police_hash(u32 index
)
54 static __inline__
struct tcf_police
* tcf_police_lookup(u32 index
)
58 read_lock(&police_lock
);
59 for (p
= tcf_police_ht
[tcf_police_hash(index
)]; p
; p
= p
->next
) {
60 if (p
->index
== index
)
63 read_unlock(&police_lock
);
67 #ifdef CONFIG_NET_CLS_ACT
68 static int tcf_act_police_walker(struct sk_buff
*skb
, struct netlink_callback
*cb
,
69 int type
, struct tc_action
*a
)
72 int err
= 0, index
= -1, i
= 0, s_i
= 0, n_i
= 0;
75 read_lock(&police_lock
);
79 for (i
= 0; i
< MY_TAB_SIZE
; i
++) {
80 p
= tcf_police_ht
[tcf_police_hash(i
)];
82 for (; p
; p
= p
->next
) {
88 r
= (struct rtattr
*) skb
->tail
;
89 RTA_PUT(skb
, a
->order
, 0, NULL
);
90 if (type
== RTM_DELACTION
)
91 err
= tcf_action_dump_1(skb
, a
, 0, 1);
93 err
= tcf_action_dump_1(skb
, a
, 0, 0);
96 skb_trim(skb
, (u8
*)r
- skb
->data
);
99 r
->rta_len
= skb
->tail
- (u8
*)r
;
104 read_unlock(&police_lock
);
110 skb_trim(skb
, (u8
*)r
- skb
->data
);
115 tcf_act_police_hash_search(struct tc_action
*a
, u32 index
)
117 struct tcf_police
*p
= tcf_police_lookup(index
);
128 static inline u32
tcf_police_new_index(void)
133 } while (tcf_police_lookup(idx_gen
));
138 void tcf_police_destroy(struct tcf_police
*p
)
140 unsigned h
= tcf_police_hash(p
->index
);
141 struct tcf_police
**p1p
;
143 for (p1p
= &tcf_police_ht
[h
]; *p1p
; p1p
= &(*p1p
)->next
) {
145 write_lock_bh(&police_lock
);
147 write_unlock_bh(&police_lock
);
148 #ifdef CONFIG_NET_ESTIMATOR
149 gen_kill_estimator(&p
->bstats
, &p
->rate_est
);
152 qdisc_put_rtab(p
->R_tab
);
154 qdisc_put_rtab(p
->P_tab
);
162 #ifdef CONFIG_NET_CLS_ACT
163 static int tcf_act_police_locate(struct rtattr
*rta
, struct rtattr
*est
,
164 struct tc_action
*a
, int ovr
, int bind
)
168 struct rtattr
*tb
[TCA_POLICE_MAX
];
169 struct tc_police
*parm
;
170 struct tcf_police
*p
;
171 struct qdisc_rate_table
*R_tab
= NULL
, *P_tab
= NULL
;
173 if (rta
== NULL
|| rtattr_parse_nested(tb
, TCA_POLICE_MAX
, rta
) < 0)
176 if (tb
[TCA_POLICE_TBF
-1] == NULL
||
177 RTA_PAYLOAD(tb
[TCA_POLICE_TBF
-1]) != sizeof(*parm
))
179 parm
= RTA_DATA(tb
[TCA_POLICE_TBF
-1]);
181 if (tb
[TCA_POLICE_RESULT
-1] != NULL
&&
182 RTA_PAYLOAD(tb
[TCA_POLICE_RESULT
-1]) != sizeof(u32
))
184 if (tb
[TCA_POLICE_RESULT
-1] != NULL
&&
185 RTA_PAYLOAD(tb
[TCA_POLICE_RESULT
-1]) != sizeof(u32
))
188 if (parm
->index
&& (p
= tcf_police_lookup(parm
->index
)) != NULL
) {
199 p
= kmalloc(sizeof(*p
), GFP_KERNEL
);
202 memset(p
, 0, sizeof(*p
));
206 spin_lock_init(&p
->lock
);
207 p
->stats_lock
= &p
->lock
;
211 if (parm
->rate
.rate
) {
213 R_tab
= qdisc_get_rtab(&parm
->rate
, tb
[TCA_POLICE_RATE
-1]);
216 if (parm
->peakrate
.rate
) {
217 P_tab
= qdisc_get_rtab(&parm
->peakrate
,
218 tb
[TCA_POLICE_PEAKRATE
-1]);
219 if (p
->P_tab
== NULL
) {
220 qdisc_put_rtab(R_tab
);
225 /* No failure allowed after this point */
226 spin_lock_bh(&p
->lock
);
228 qdisc_put_rtab(p
->R_tab
);
232 qdisc_put_rtab(p
->P_tab
);
236 if (tb
[TCA_POLICE_RESULT
-1])
237 p
->result
= *(u32
*)RTA_DATA(tb
[TCA_POLICE_RESULT
-1]);
238 p
->toks
= p
->burst
= parm
->burst
;
243 p
->mtu
= 255<<p
->R_tab
->rate
.cell_log
;
246 p
->ptoks
= L2T_P(p
, p
->mtu
);
247 p
->action
= parm
->action
;
249 #ifdef CONFIG_NET_ESTIMATOR
250 if (tb
[TCA_POLICE_AVRATE
-1])
251 p
->ewma_rate
= *(u32
*)RTA_DATA(tb
[TCA_POLICE_AVRATE
-1]);
253 gen_replace_estimator(&p
->bstats
, &p
->rate_est
, p
->stats_lock
, est
);
256 spin_unlock_bh(&p
->lock
);
257 if (ret
!= ACT_P_CREATED
)
260 PSCHED_GET_TIME(p
->t_c
);
261 p
->index
= parm
->index
? : tcf_police_new_index();
262 h
= tcf_police_hash(p
->index
);
263 write_lock_bh(&police_lock
);
264 p
->next
= tcf_police_ht
[h
];
265 tcf_police_ht
[h
] = p
;
266 write_unlock_bh(&police_lock
);
272 if (ret
== ACT_P_CREATED
)
277 static int tcf_act_police_cleanup(struct tc_action
*a
, int bind
)
279 struct tcf_police
*p
= PRIV(a
);
282 return tcf_police_release(p
, bind
);
286 static int tcf_act_police(struct sk_buff
*skb
, struct tc_action
*a
,
287 struct tcf_result
*res
)
290 struct tcf_police
*p
= PRIV(a
);
296 p
->bstats
.bytes
+= skb
->len
;
299 #ifdef CONFIG_NET_ESTIMATOR
300 if (p
->ewma_rate
&& p
->rate_est
.bps
>= p
->ewma_rate
) {
301 p
->qstats
.overlimits
++;
302 spin_unlock(&p
->lock
);
307 if (skb
->len
<= p
->mtu
) {
308 if (p
->R_tab
== NULL
) {
309 spin_unlock(&p
->lock
);
313 PSCHED_GET_TIME(now
);
315 toks
= PSCHED_TDIFF_SAFE(now
, p
->t_c
, p
->burst
);
318 ptoks
= toks
+ p
->ptoks
;
319 if (ptoks
> (long)L2T_P(p
, p
->mtu
))
320 ptoks
= (long)L2T_P(p
, p
->mtu
);
321 ptoks
-= L2T_P(p
, skb
->len
);
324 if (toks
> (long)p
->burst
)
326 toks
-= L2T(p
, skb
->len
);
328 if ((toks
|ptoks
) >= 0) {
332 spin_unlock(&p
->lock
);
337 p
->qstats
.overlimits
++;
338 spin_unlock(&p
->lock
);
343 tcf_act_police_dump(struct sk_buff
*skb
, struct tc_action
*a
, int bind
, int ref
)
345 unsigned char *b
= skb
->tail
;
346 struct tc_police opt
;
347 struct tcf_police
*p
= PRIV(a
);
349 opt
.index
= p
->index
;
350 opt
.action
= p
->action
;
352 opt
.burst
= p
->burst
;
353 opt
.refcnt
= p
->refcnt
- ref
;
354 opt
.bindcnt
= p
->bindcnt
- bind
;
356 opt
.rate
= p
->R_tab
->rate
;
358 memset(&opt
.rate
, 0, sizeof(opt
.rate
));
360 opt
.peakrate
= p
->P_tab
->rate
;
362 memset(&opt
.peakrate
, 0, sizeof(opt
.peakrate
));
363 RTA_PUT(skb
, TCA_POLICE_TBF
, sizeof(opt
), &opt
);
365 RTA_PUT(skb
, TCA_POLICE_RESULT
, sizeof(int), &p
->result
);
366 #ifdef CONFIG_NET_ESTIMATOR
368 RTA_PUT(skb
, TCA_POLICE_AVRATE
, 4, &p
->ewma_rate
);
373 skb_trim(skb
, b
- skb
->data
);
377 MODULE_AUTHOR("Alexey Kuznetsov");
378 MODULE_DESCRIPTION("Policing actions");
379 MODULE_LICENSE("GPL");
381 static struct tc_action_ops act_police_ops
= {
383 .type
= TCA_ID_POLICE
,
384 .capab
= TCA_CAP_NONE
,
385 .owner
= THIS_MODULE
,
386 .act
= tcf_act_police
,
387 .dump
= tcf_act_police_dump
,
388 .cleanup
= tcf_act_police_cleanup
,
389 .lookup
= tcf_act_police_hash_search
,
390 .init
= tcf_act_police_locate
,
391 .walk
= tcf_act_police_walker
395 police_init_module(void)
397 return tcf_register_action(&act_police_ops
);
401 police_cleanup_module(void)
403 tcf_unregister_action(&act_police_ops
);
406 module_init(police_init_module
);
407 module_exit(police_cleanup_module
);
409 #else /* CONFIG_NET_CLS_ACT */
411 struct tcf_police
* tcf_police_locate(struct rtattr
*rta
, struct rtattr
*est
)
414 struct tcf_police
*p
;
415 struct rtattr
*tb
[TCA_POLICE_MAX
];
416 struct tc_police
*parm
;
418 if (rtattr_parse_nested(tb
, TCA_POLICE_MAX
, rta
) < 0)
421 if (tb
[TCA_POLICE_TBF
-1] == NULL
||
422 RTA_PAYLOAD(tb
[TCA_POLICE_TBF
-1]) != sizeof(*parm
))
425 parm
= RTA_DATA(tb
[TCA_POLICE_TBF
-1]);
427 if (parm
->index
&& (p
= tcf_police_lookup(parm
->index
)) != NULL
) {
432 p
= kmalloc(sizeof(*p
), GFP_KERNEL
);
436 memset(p
, 0, sizeof(*p
));
438 spin_lock_init(&p
->lock
);
439 p
->stats_lock
= &p
->lock
;
440 if (parm
->rate
.rate
) {
441 p
->R_tab
= qdisc_get_rtab(&parm
->rate
, tb
[TCA_POLICE_RATE
-1]);
442 if (p
->R_tab
== NULL
)
444 if (parm
->peakrate
.rate
) {
445 p
->P_tab
= qdisc_get_rtab(&parm
->peakrate
,
446 tb
[TCA_POLICE_PEAKRATE
-1]);
447 if (p
->P_tab
== NULL
)
451 if (tb
[TCA_POLICE_RESULT
-1]) {
452 if (RTA_PAYLOAD(tb
[TCA_POLICE_RESULT
-1]) != sizeof(u32
))
454 p
->result
= *(u32
*)RTA_DATA(tb
[TCA_POLICE_RESULT
-1]);
456 #ifdef CONFIG_NET_ESTIMATOR
457 if (tb
[TCA_POLICE_AVRATE
-1]) {
458 if (RTA_PAYLOAD(tb
[TCA_POLICE_AVRATE
-1]) != sizeof(u32
))
460 p
->ewma_rate
= *(u32
*)RTA_DATA(tb
[TCA_POLICE_AVRATE
-1]);
463 p
->toks
= p
->burst
= parm
->burst
;
468 p
->mtu
= 255<<p
->R_tab
->rate
.cell_log
;
471 p
->ptoks
= L2T_P(p
, p
->mtu
);
472 PSCHED_GET_TIME(p
->t_c
);
473 p
->index
= parm
->index
? : tcf_police_new_index();
474 p
->action
= parm
->action
;
475 #ifdef CONFIG_NET_ESTIMATOR
477 gen_new_estimator(&p
->bstats
, &p
->rate_est
, p
->stats_lock
, est
);
479 h
= tcf_police_hash(p
->index
);
480 write_lock_bh(&police_lock
);
481 p
->next
= tcf_police_ht
[h
];
482 tcf_police_ht
[h
] = p
;
483 write_unlock_bh(&police_lock
);
488 qdisc_put_rtab(p
->R_tab
);
493 int tcf_police(struct sk_buff
*skb
, struct tcf_police
*p
)
501 p
->bstats
.bytes
+= skb
->len
;
504 #ifdef CONFIG_NET_ESTIMATOR
505 if (p
->ewma_rate
&& p
->rate_est
.bps
>= p
->ewma_rate
) {
506 p
->qstats
.overlimits
++;
507 spin_unlock(&p
->lock
);
512 if (skb
->len
<= p
->mtu
) {
513 if (p
->R_tab
== NULL
) {
514 spin_unlock(&p
->lock
);
518 PSCHED_GET_TIME(now
);
520 toks
= PSCHED_TDIFF_SAFE(now
, p
->t_c
, p
->burst
);
523 ptoks
= toks
+ p
->ptoks
;
524 if (ptoks
> (long)L2T_P(p
, p
->mtu
))
525 ptoks
= (long)L2T_P(p
, p
->mtu
);
526 ptoks
-= L2T_P(p
, skb
->len
);
529 if (toks
> (long)p
->burst
)
531 toks
-= L2T(p
, skb
->len
);
533 if ((toks
|ptoks
) >= 0) {
537 spin_unlock(&p
->lock
);
542 p
->qstats
.overlimits
++;
543 spin_unlock(&p
->lock
);
546 EXPORT_SYMBOL(tcf_police
);
548 int tcf_police_dump(struct sk_buff
*skb
, struct tcf_police
*p
)
550 unsigned char *b
= skb
->tail
;
551 struct tc_police opt
;
553 opt
.index
= p
->index
;
554 opt
.action
= p
->action
;
556 opt
.burst
= p
->burst
;
558 opt
.rate
= p
->R_tab
->rate
;
560 memset(&opt
.rate
, 0, sizeof(opt
.rate
));
562 opt
.peakrate
= p
->P_tab
->rate
;
564 memset(&opt
.peakrate
, 0, sizeof(opt
.peakrate
));
565 RTA_PUT(skb
, TCA_POLICE_TBF
, sizeof(opt
), &opt
);
567 RTA_PUT(skb
, TCA_POLICE_RESULT
, sizeof(int), &p
->result
);
568 #ifdef CONFIG_NET_ESTIMATOR
570 RTA_PUT(skb
, TCA_POLICE_AVRATE
, 4, &p
->ewma_rate
);
575 skb_trim(skb
, b
- skb
->data
);
579 int tcf_police_dump_stats(struct sk_buff
*skb
, struct tcf_police
*p
)
583 if (gnet_stats_start_copy_compat(skb
, TCA_STATS2
, TCA_STATS
,
584 TCA_XSTATS
, p
->stats_lock
, &d
) < 0)
587 if (gnet_stats_copy_basic(&d
, &p
->bstats
) < 0 ||
588 #ifdef CONFIG_NET_ESTIMATOR
589 gnet_stats_copy_rate_est(&d
, &p
->rate_est
) < 0 ||
591 gnet_stats_copy_queue(&d
, &p
->qstats
) < 0)
594 if (gnet_stats_finish_copy(&d
) < 0)
603 #endif /* CONFIG_NET_CLS_ACT */