mm: Use static initialization for "srcu"
[linux/fpc-iii.git] / net / sched / cls_rsvp.h
blob322438fb3ffcb426194be6c1dd05893b27cdf51c
1 /*
2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
22 IMPLEMENTATION.
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
43 should be wildcard.
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
69 struct rsvp_head {
70 u32 tmap[256/32];
71 u32 hgenerator;
72 u8 tgenerator;
73 struct rsvp_session __rcu *ht[256];
74 struct rcu_head rcu;
77 struct rsvp_session {
78 struct rsvp_session __rcu *next;
79 __be32 dst[RSVP_DST_LEN];
80 struct tc_rsvp_gpi dpi;
81 u8 protocol;
82 u8 tunnelid;
83 /* 16 (src,sport) hash slots, and one wildcard source slot */
84 struct rsvp_filter __rcu *ht[16 + 1];
85 struct rcu_head rcu;
89 struct rsvp_filter {
90 struct rsvp_filter __rcu *next;
91 __be32 src[RSVP_DST_LEN];
92 struct tc_rsvp_gpi spi;
93 u8 tunnelhdr;
95 struct tcf_result res;
96 struct tcf_exts exts;
98 u32 handle;
99 struct rsvp_session *sess;
100 struct rcu_head rcu;
103 static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
105 unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
107 h ^= h>>16;
108 h ^= h>>8;
109 return (h ^ protocol ^ tunnelid) & 0xFF;
112 static inline unsigned int hash_src(__be32 *src)
114 unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
116 h ^= h>>16;
117 h ^= h>>8;
118 h ^= h>>4;
119 return h & 0xF;
122 #define RSVP_APPLY_RESULT() \
124 int r = tcf_exts_exec(skb, &f->exts, res); \
125 if (r < 0) \
126 continue; \
127 else if (r > 0) \
128 return r; \
131 static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
132 struct tcf_result *res)
134 struct rsvp_head *head = rcu_dereference_bh(tp->root);
135 struct rsvp_session *s;
136 struct rsvp_filter *f;
137 unsigned int h1, h2;
138 __be32 *dst, *src;
139 u8 protocol;
140 u8 tunnelid = 0;
141 u8 *xprt;
142 #if RSVP_DST_LEN == 4
143 struct ipv6hdr *nhptr;
145 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
146 return -1;
147 nhptr = ipv6_hdr(skb);
148 #else
149 struct iphdr *nhptr;
151 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
152 return -1;
153 nhptr = ip_hdr(skb);
154 #endif
155 if (unlikely(!head))
156 return -1;
157 restart:
159 #if RSVP_DST_LEN == 4
160 src = &nhptr->saddr.s6_addr32[0];
161 dst = &nhptr->daddr.s6_addr32[0];
162 protocol = nhptr->nexthdr;
163 xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
164 #else
165 src = &nhptr->saddr;
166 dst = &nhptr->daddr;
167 protocol = nhptr->protocol;
168 xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
169 if (ip_is_fragment(nhptr))
170 return -1;
171 #endif
173 h1 = hash_dst(dst, protocol, tunnelid);
174 h2 = hash_src(src);
176 for (s = rcu_dereference_bh(head->ht[h1]); s;
177 s = rcu_dereference_bh(s->next)) {
178 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
179 protocol == s->protocol &&
180 !(s->dpi.mask &
181 (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
182 #if RSVP_DST_LEN == 4
183 dst[0] == s->dst[0] &&
184 dst[1] == s->dst[1] &&
185 dst[2] == s->dst[2] &&
186 #endif
187 tunnelid == s->tunnelid) {
189 for (f = rcu_dereference_bh(s->ht[h2]); f;
190 f = rcu_dereference_bh(f->next)) {
191 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
192 !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
193 #if RSVP_DST_LEN == 4
195 src[0] == f->src[0] &&
196 src[1] == f->src[1] &&
197 src[2] == f->src[2]
198 #endif
200 *res = f->res;
201 RSVP_APPLY_RESULT();
203 matched:
204 if (f->tunnelhdr == 0)
205 return 0;
207 tunnelid = f->res.classid;
208 nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
209 goto restart;
213 /* And wildcard bucket... */
214 for (f = rcu_dereference_bh(s->ht[16]); f;
215 f = rcu_dereference_bh(f->next)) {
216 *res = f->res;
217 RSVP_APPLY_RESULT();
218 goto matched;
220 return -1;
223 return -1;
226 static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
228 struct rsvp_head *head = rtnl_dereference(tp->root);
229 struct rsvp_session *s;
230 struct rsvp_filter __rcu **ins;
231 struct rsvp_filter *pins;
232 unsigned int h1 = h & 0xFF;
233 unsigned int h2 = (h >> 8) & 0xFF;
235 for (s = rtnl_dereference(head->ht[h1]); s;
236 s = rtnl_dereference(s->next)) {
237 for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
238 ins = &pins->next, pins = rtnl_dereference(*ins)) {
239 if (pins->handle == h) {
240 RCU_INIT_POINTER(n->next, pins->next);
241 rcu_assign_pointer(*ins, n);
242 return;
247 /* Something went wrong if we are trying to replace a non-existant
248 * node. Mind as well halt instead of silently failing.
250 BUG_ON(1);
253 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
255 struct rsvp_head *head = rtnl_dereference(tp->root);
256 struct rsvp_session *s;
257 struct rsvp_filter *f;
258 unsigned int h1 = handle & 0xFF;
259 unsigned int h2 = (handle >> 8) & 0xFF;
261 if (h2 > 16)
262 return 0;
264 for (s = rtnl_dereference(head->ht[h1]); s;
265 s = rtnl_dereference(s->next)) {
266 for (f = rtnl_dereference(s->ht[h2]); f;
267 f = rtnl_dereference(f->next)) {
268 if (f->handle == handle)
269 return (unsigned long)f;
272 return 0;
275 static int rsvp_init(struct tcf_proto *tp)
277 struct rsvp_head *data;
279 data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
280 if (data) {
281 rcu_assign_pointer(tp->root, data);
282 return 0;
284 return -ENOBUFS;
287 static void rsvp_delete_filter_rcu(struct rcu_head *head)
289 struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
291 tcf_exts_destroy(&f->exts);
292 kfree(f);
295 static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
297 tcf_unbind_filter(tp, &f->res);
298 /* all classifiers are required to call tcf_exts_destroy() after rcu
299 * grace period, since converted-to-rcu actions are relying on that
300 * in cleanup() callback
302 call_rcu(&f->rcu, rsvp_delete_filter_rcu);
305 static bool rsvp_destroy(struct tcf_proto *tp, bool force)
307 struct rsvp_head *data = rtnl_dereference(tp->root);
308 int h1, h2;
310 if (data == NULL)
311 return true;
313 if (!force) {
314 for (h1 = 0; h1 < 256; h1++) {
315 if (rcu_access_pointer(data->ht[h1]))
316 return false;
320 RCU_INIT_POINTER(tp->root, NULL);
322 for (h1 = 0; h1 < 256; h1++) {
323 struct rsvp_session *s;
325 while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
326 RCU_INIT_POINTER(data->ht[h1], s->next);
328 for (h2 = 0; h2 <= 16; h2++) {
329 struct rsvp_filter *f;
331 while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
332 rcu_assign_pointer(s->ht[h2], f->next);
333 rsvp_delete_filter(tp, f);
336 kfree_rcu(s, rcu);
339 kfree_rcu(data, rcu);
340 return true;
343 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
345 struct rsvp_head *head = rtnl_dereference(tp->root);
346 struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
347 struct rsvp_filter __rcu **fp;
348 unsigned int h = f->handle;
349 struct rsvp_session __rcu **sp;
350 struct rsvp_session *nsp, *s = f->sess;
351 int i;
353 fp = &s->ht[(h >> 8) & 0xFF];
354 for (nfp = rtnl_dereference(*fp); nfp;
355 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
356 if (nfp == f) {
357 RCU_INIT_POINTER(*fp, f->next);
358 rsvp_delete_filter(tp, f);
360 /* Strip tree */
362 for (i = 0; i <= 16; i++)
363 if (s->ht[i])
364 return 0;
366 /* OK, session has no flows */
367 sp = &head->ht[h & 0xFF];
368 for (nsp = rtnl_dereference(*sp); nsp;
369 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
370 if (nsp == s) {
371 RCU_INIT_POINTER(*sp, s->next);
372 kfree_rcu(s, rcu);
373 return 0;
377 return 0;
380 return 0;
383 static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
385 struct rsvp_head *data = rtnl_dereference(tp->root);
386 int i = 0xFFFF;
388 while (i-- > 0) {
389 u32 h;
391 if ((data->hgenerator += 0x10000) == 0)
392 data->hgenerator = 0x10000;
393 h = data->hgenerator|salt;
394 if (rsvp_get(tp, h) == 0)
395 return h;
397 return 0;
400 static int tunnel_bts(struct rsvp_head *data)
402 int n = data->tgenerator >> 5;
403 u32 b = 1 << (data->tgenerator & 0x1F);
405 if (data->tmap[n] & b)
406 return 0;
407 data->tmap[n] |= b;
408 return 1;
411 static void tunnel_recycle(struct rsvp_head *data)
413 struct rsvp_session __rcu **sht = data->ht;
414 u32 tmap[256/32];
415 int h1, h2;
417 memset(tmap, 0, sizeof(tmap));
419 for (h1 = 0; h1 < 256; h1++) {
420 struct rsvp_session *s;
421 for (s = rtnl_dereference(sht[h1]); s;
422 s = rtnl_dereference(s->next)) {
423 for (h2 = 0; h2 <= 16; h2++) {
424 struct rsvp_filter *f;
426 for (f = rtnl_dereference(s->ht[h2]); f;
427 f = rtnl_dereference(f->next)) {
428 if (f->tunnelhdr == 0)
429 continue;
430 data->tgenerator = f->res.classid;
431 tunnel_bts(data);
437 memcpy(data->tmap, tmap, sizeof(tmap));
440 static u32 gen_tunnel(struct rsvp_head *data)
442 int i, k;
444 for (k = 0; k < 2; k++) {
445 for (i = 255; i > 0; i--) {
446 if (++data->tgenerator == 0)
447 data->tgenerator = 1;
448 if (tunnel_bts(data))
449 return data->tgenerator;
451 tunnel_recycle(data);
453 return 0;
456 static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
457 [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
458 [TCA_RSVP_DST] = { .type = NLA_BINARY,
459 .len = RSVP_DST_LEN * sizeof(u32) },
460 [TCA_RSVP_SRC] = { .type = NLA_BINARY,
461 .len = RSVP_DST_LEN * sizeof(u32) },
462 [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
465 static int rsvp_change(struct net *net, struct sk_buff *in_skb,
466 struct tcf_proto *tp, unsigned long base,
467 u32 handle,
468 struct nlattr **tca,
469 unsigned long *arg, bool ovr)
471 struct rsvp_head *data = rtnl_dereference(tp->root);
472 struct rsvp_filter *f, *nfp;
473 struct rsvp_filter __rcu **fp;
474 struct rsvp_session *nsp, *s;
475 struct rsvp_session __rcu **sp;
476 struct tc_rsvp_pinfo *pinfo = NULL;
477 struct nlattr *opt = tca[TCA_OPTIONS];
478 struct nlattr *tb[TCA_RSVP_MAX + 1];
479 struct tcf_exts e;
480 unsigned int h1, h2;
481 __be32 *dst;
482 int err;
484 if (opt == NULL)
485 return handle ? -EINVAL : 0;
487 err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
488 if (err < 0)
489 return err;
491 err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
492 if (err < 0)
493 return err;
494 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
495 if (err < 0)
496 goto errout2;
498 f = (struct rsvp_filter *)*arg;
499 if (f) {
500 /* Node exists: adjust only classid */
501 struct rsvp_filter *n;
503 if (f->handle != handle && handle)
504 goto errout2;
506 n = kmemdup(f, sizeof(*f), GFP_KERNEL);
507 if (!n) {
508 err = -ENOMEM;
509 goto errout2;
512 err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
513 if (err < 0) {
514 kfree(n);
515 goto errout2;
518 if (tb[TCA_RSVP_CLASSID]) {
519 n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
520 tcf_bind_filter(tp, &n->res, base);
523 tcf_exts_change(tp, &n->exts, &e);
524 rsvp_replace(tp, n, handle);
525 return 0;
528 /* Now more serious part... */
529 err = -EINVAL;
530 if (handle)
531 goto errout2;
532 if (tb[TCA_RSVP_DST] == NULL)
533 goto errout2;
535 err = -ENOBUFS;
536 f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
537 if (f == NULL)
538 goto errout2;
540 err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
541 if (err < 0)
542 goto errout;
543 h2 = 16;
544 if (tb[TCA_RSVP_SRC]) {
545 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
546 h2 = hash_src(f->src);
548 if (tb[TCA_RSVP_PINFO]) {
549 pinfo = nla_data(tb[TCA_RSVP_PINFO]);
550 f->spi = pinfo->spi;
551 f->tunnelhdr = pinfo->tunnelhdr;
553 if (tb[TCA_RSVP_CLASSID])
554 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
556 dst = nla_data(tb[TCA_RSVP_DST]);
557 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
559 err = -ENOMEM;
560 if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
561 goto errout;
563 if (f->tunnelhdr) {
564 err = -EINVAL;
565 if (f->res.classid > 255)
566 goto errout;
568 err = -ENOMEM;
569 if (f->res.classid == 0 &&
570 (f->res.classid = gen_tunnel(data)) == 0)
571 goto errout;
574 for (sp = &data->ht[h1];
575 (s = rtnl_dereference(*sp)) != NULL;
576 sp = &s->next) {
577 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
578 pinfo && pinfo->protocol == s->protocol &&
579 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
580 #if RSVP_DST_LEN == 4
581 dst[0] == s->dst[0] &&
582 dst[1] == s->dst[1] &&
583 dst[2] == s->dst[2] &&
584 #endif
585 pinfo->tunnelid == s->tunnelid) {
587 insert:
588 /* OK, we found appropriate session */
590 fp = &s->ht[h2];
592 f->sess = s;
593 if (f->tunnelhdr == 0)
594 tcf_bind_filter(tp, &f->res, base);
596 tcf_exts_change(tp, &f->exts, &e);
598 fp = &s->ht[h2];
599 for (nfp = rtnl_dereference(*fp); nfp;
600 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
601 __u32 mask = nfp->spi.mask & f->spi.mask;
603 if (mask != f->spi.mask)
604 break;
606 RCU_INIT_POINTER(f->next, nfp);
607 rcu_assign_pointer(*fp, f);
609 *arg = (unsigned long)f;
610 return 0;
614 /* No session found. Create new one. */
616 err = -ENOBUFS;
617 s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
618 if (s == NULL)
619 goto errout;
620 memcpy(s->dst, dst, sizeof(s->dst));
622 if (pinfo) {
623 s->dpi = pinfo->dpi;
624 s->protocol = pinfo->protocol;
625 s->tunnelid = pinfo->tunnelid;
627 sp = &data->ht[h1];
628 for (nsp = rtnl_dereference(*sp); nsp;
629 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
630 if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
631 break;
633 RCU_INIT_POINTER(s->next, nsp);
634 rcu_assign_pointer(*sp, s);
636 goto insert;
638 errout:
639 tcf_exts_destroy(&f->exts);
640 kfree(f);
641 errout2:
642 tcf_exts_destroy(&e);
643 return err;
646 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
648 struct rsvp_head *head = rtnl_dereference(tp->root);
649 unsigned int h, h1;
651 if (arg->stop)
652 return;
654 for (h = 0; h < 256; h++) {
655 struct rsvp_session *s;
657 for (s = rtnl_dereference(head->ht[h]); s;
658 s = rtnl_dereference(s->next)) {
659 for (h1 = 0; h1 <= 16; h1++) {
660 struct rsvp_filter *f;
662 for (f = rtnl_dereference(s->ht[h1]); f;
663 f = rtnl_dereference(f->next)) {
664 if (arg->count < arg->skip) {
665 arg->count++;
666 continue;
668 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
669 arg->stop = 1;
670 return;
672 arg->count++;
679 static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
680 struct sk_buff *skb, struct tcmsg *t)
682 struct rsvp_filter *f = (struct rsvp_filter *)fh;
683 struct rsvp_session *s;
684 struct nlattr *nest;
685 struct tc_rsvp_pinfo pinfo;
687 if (f == NULL)
688 return skb->len;
689 s = f->sess;
691 t->tcm_handle = f->handle;
693 nest = nla_nest_start(skb, TCA_OPTIONS);
694 if (nest == NULL)
695 goto nla_put_failure;
697 if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
698 goto nla_put_failure;
699 pinfo.dpi = s->dpi;
700 pinfo.spi = f->spi;
701 pinfo.protocol = s->protocol;
702 pinfo.tunnelid = s->tunnelid;
703 pinfo.tunnelhdr = f->tunnelhdr;
704 pinfo.pad = 0;
705 if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
706 goto nla_put_failure;
707 if (f->res.classid &&
708 nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
709 goto nla_put_failure;
710 if (((f->handle >> 8) & 0xFF) != 16 &&
711 nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
712 goto nla_put_failure;
714 if (tcf_exts_dump(skb, &f->exts) < 0)
715 goto nla_put_failure;
717 nla_nest_end(skb, nest);
719 if (tcf_exts_dump_stats(skb, &f->exts) < 0)
720 goto nla_put_failure;
721 return skb->len;
723 nla_put_failure:
724 nla_nest_cancel(skb, nest);
725 return -1;
728 static struct tcf_proto_ops RSVP_OPS __read_mostly = {
729 .kind = RSVP_ID,
730 .classify = rsvp_classify,
731 .init = rsvp_init,
732 .destroy = rsvp_destroy,
733 .get = rsvp_get,
734 .change = rsvp_change,
735 .delete = rsvp_delete,
736 .walk = rsvp_walk,
737 .dump = rsvp_dump,
738 .owner = THIS_MODULE,
741 static int __init init_rsvp(void)
743 return register_tcf_proto_ops(&RSVP_OPS);
746 static void __exit exit_rsvp(void)
748 unregister_tcf_proto_ops(&RSVP_OPS);
751 module_init(init_rsvp)
752 module_exit(exit_rsvp)