2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
73 struct rsvp_session __rcu
*ht
[256];
78 struct rsvp_session __rcu
*next
;
79 __be32 dst
[RSVP_DST_LEN
];
80 struct tc_rsvp_gpi dpi
;
83 /* 16 (src,sport) hash slots, and one wildcard source slot */
84 struct rsvp_filter __rcu
*ht
[16 + 1];
90 struct rsvp_filter __rcu
*next
;
91 __be32 src
[RSVP_DST_LEN
];
92 struct tc_rsvp_gpi spi
;
95 struct tcf_result res
;
99 struct rsvp_session
*sess
;
101 struct work_struct work
;
106 static inline unsigned int hash_dst(__be32
*dst
, u8 protocol
, u8 tunnelid
)
108 unsigned int h
= (__force __u32
)dst
[RSVP_DST_LEN
- 1];
112 return (h
^ protocol
^ tunnelid
) & 0xFF;
115 static inline unsigned int hash_src(__be32
*src
)
117 unsigned int h
= (__force __u32
)src
[RSVP_DST_LEN
-1];
125 #define RSVP_APPLY_RESULT() \
127 int r = tcf_exts_exec(skb, &f->exts, res); \
134 static int rsvp_classify(struct sk_buff
*skb
, const struct tcf_proto
*tp
,
135 struct tcf_result
*res
)
137 struct rsvp_head
*head
= rcu_dereference_bh(tp
->root
);
138 struct rsvp_session
*s
;
139 struct rsvp_filter
*f
;
145 #if RSVP_DST_LEN == 4
146 struct ipv6hdr
*nhptr
;
148 if (!pskb_network_may_pull(skb
, sizeof(*nhptr
)))
150 nhptr
= ipv6_hdr(skb
);
154 if (!pskb_network_may_pull(skb
, sizeof(*nhptr
)))
160 #if RSVP_DST_LEN == 4
161 src
= &nhptr
->saddr
.s6_addr32
[0];
162 dst
= &nhptr
->daddr
.s6_addr32
[0];
163 protocol
= nhptr
->nexthdr
;
164 xprt
= ((u8
*)nhptr
) + sizeof(struct ipv6hdr
);
168 protocol
= nhptr
->protocol
;
169 xprt
= ((u8
*)nhptr
) + (nhptr
->ihl
<<2);
170 if (ip_is_fragment(nhptr
))
174 h1
= hash_dst(dst
, protocol
, tunnelid
);
177 for (s
= rcu_dereference_bh(head
->ht
[h1
]); s
;
178 s
= rcu_dereference_bh(s
->next
)) {
179 if (dst
[RSVP_DST_LEN
-1] == s
->dst
[RSVP_DST_LEN
- 1] &&
180 protocol
== s
->protocol
&&
182 (*(u32
*)(xprt
+ s
->dpi
.offset
) ^ s
->dpi
.key
)) &&
183 #if RSVP_DST_LEN == 4
184 dst
[0] == s
->dst
[0] &&
185 dst
[1] == s
->dst
[1] &&
186 dst
[2] == s
->dst
[2] &&
188 tunnelid
== s
->tunnelid
) {
190 for (f
= rcu_dereference_bh(s
->ht
[h2
]); f
;
191 f
= rcu_dereference_bh(f
->next
)) {
192 if (src
[RSVP_DST_LEN
-1] == f
->src
[RSVP_DST_LEN
- 1] &&
193 !(f
->spi
.mask
& (*(u32
*)(xprt
+ f
->spi
.offset
) ^ f
->spi
.key
))
194 #if RSVP_DST_LEN == 4
196 src
[0] == f
->src
[0] &&
197 src
[1] == f
->src
[1] &&
205 if (f
->tunnelhdr
== 0)
208 tunnelid
= f
->res
.classid
;
209 nhptr
= (void *)(xprt
+ f
->tunnelhdr
- sizeof(*nhptr
));
214 /* And wildcard bucket... */
215 for (f
= rcu_dereference_bh(s
->ht
[16]); f
;
216 f
= rcu_dereference_bh(f
->next
)) {
227 static void rsvp_replace(struct tcf_proto
*tp
, struct rsvp_filter
*n
, u32 h
)
229 struct rsvp_head
*head
= rtnl_dereference(tp
->root
);
230 struct rsvp_session
*s
;
231 struct rsvp_filter __rcu
**ins
;
232 struct rsvp_filter
*pins
;
233 unsigned int h1
= h
& 0xFF;
234 unsigned int h2
= (h
>> 8) & 0xFF;
236 for (s
= rtnl_dereference(head
->ht
[h1
]); s
;
237 s
= rtnl_dereference(s
->next
)) {
238 for (ins
= &s
->ht
[h2
], pins
= rtnl_dereference(*ins
); ;
239 ins
= &pins
->next
, pins
= rtnl_dereference(*ins
)) {
240 if (pins
->handle
== h
) {
241 RCU_INIT_POINTER(n
->next
, pins
->next
);
242 rcu_assign_pointer(*ins
, n
);
248 /* Something went wrong if we are trying to replace a non-existant
249 * node. Mind as well halt instead of silently failing.
254 static void *rsvp_get(struct tcf_proto
*tp
, u32 handle
)
256 struct rsvp_head
*head
= rtnl_dereference(tp
->root
);
257 struct rsvp_session
*s
;
258 struct rsvp_filter
*f
;
259 unsigned int h1
= handle
& 0xFF;
260 unsigned int h2
= (handle
>> 8) & 0xFF;
265 for (s
= rtnl_dereference(head
->ht
[h1
]); s
;
266 s
= rtnl_dereference(s
->next
)) {
267 for (f
= rtnl_dereference(s
->ht
[h2
]); f
;
268 f
= rtnl_dereference(f
->next
)) {
269 if (f
->handle
== handle
)
276 static int rsvp_init(struct tcf_proto
*tp
)
278 struct rsvp_head
*data
;
280 data
= kzalloc(sizeof(struct rsvp_head
), GFP_KERNEL
);
282 rcu_assign_pointer(tp
->root
, data
);
288 static void __rsvp_delete_filter(struct rsvp_filter
*f
)
290 tcf_exts_destroy(&f
->exts
);
291 tcf_exts_put_net(&f
->exts
);
295 static void rsvp_delete_filter_work(struct work_struct
*work
)
297 struct rsvp_filter
*f
= container_of(work
, struct rsvp_filter
, work
);
300 __rsvp_delete_filter(f
);
304 static void rsvp_delete_filter_rcu(struct rcu_head
*head
)
306 struct rsvp_filter
*f
= container_of(head
, struct rsvp_filter
, rcu
);
308 INIT_WORK(&f
->work
, rsvp_delete_filter_work
);
309 tcf_queue_work(&f
->work
);
312 static void rsvp_delete_filter(struct tcf_proto
*tp
, struct rsvp_filter
*f
)
314 tcf_unbind_filter(tp
, &f
->res
);
315 /* all classifiers are required to call tcf_exts_destroy() after rcu
316 * grace period, since converted-to-rcu actions are relying on that
317 * in cleanup() callback
319 if (tcf_exts_get_net(&f
->exts
))
320 call_rcu(&f
->rcu
, rsvp_delete_filter_rcu
);
322 __rsvp_delete_filter(f
);
325 static void rsvp_destroy(struct tcf_proto
*tp
, struct netlink_ext_ack
*extack
)
327 struct rsvp_head
*data
= rtnl_dereference(tp
->root
);
333 for (h1
= 0; h1
< 256; h1
++) {
334 struct rsvp_session
*s
;
336 while ((s
= rtnl_dereference(data
->ht
[h1
])) != NULL
) {
337 RCU_INIT_POINTER(data
->ht
[h1
], s
->next
);
339 for (h2
= 0; h2
<= 16; h2
++) {
340 struct rsvp_filter
*f
;
342 while ((f
= rtnl_dereference(s
->ht
[h2
])) != NULL
) {
343 rcu_assign_pointer(s
->ht
[h2
], f
->next
);
344 rsvp_delete_filter(tp
, f
);
350 kfree_rcu(data
, rcu
);
353 static int rsvp_delete(struct tcf_proto
*tp
, void *arg
, bool *last
,
354 struct netlink_ext_ack
*extack
)
356 struct rsvp_head
*head
= rtnl_dereference(tp
->root
);
357 struct rsvp_filter
*nfp
, *f
= arg
;
358 struct rsvp_filter __rcu
**fp
;
359 unsigned int h
= f
->handle
;
360 struct rsvp_session __rcu
**sp
;
361 struct rsvp_session
*nsp
, *s
= f
->sess
;
364 fp
= &s
->ht
[(h
>> 8) & 0xFF];
365 for (nfp
= rtnl_dereference(*fp
); nfp
;
366 fp
= &nfp
->next
, nfp
= rtnl_dereference(*fp
)) {
368 RCU_INIT_POINTER(*fp
, f
->next
);
369 rsvp_delete_filter(tp
, f
);
373 for (i
= 0; i
<= 16; i
++)
377 /* OK, session has no flows */
378 sp
= &head
->ht
[h
& 0xFF];
379 for (nsp
= rtnl_dereference(*sp
); nsp
;
380 sp
= &nsp
->next
, nsp
= rtnl_dereference(*sp
)) {
382 RCU_INIT_POINTER(*sp
, s
->next
);
394 for (h1
= 0; h1
< 256; h1
++) {
395 if (rcu_access_pointer(head
->ht
[h1
])) {
404 static unsigned int gen_handle(struct tcf_proto
*tp
, unsigned salt
)
406 struct rsvp_head
*data
= rtnl_dereference(tp
->root
);
412 if ((data
->hgenerator
+= 0x10000) == 0)
413 data
->hgenerator
= 0x10000;
414 h
= data
->hgenerator
|salt
;
415 if (!rsvp_get(tp
, h
))
421 static int tunnel_bts(struct rsvp_head
*data
)
423 int n
= data
->tgenerator
>> 5;
424 u32 b
= 1 << (data
->tgenerator
& 0x1F);
426 if (data
->tmap
[n
] & b
)
432 static void tunnel_recycle(struct rsvp_head
*data
)
434 struct rsvp_session __rcu
**sht
= data
->ht
;
438 memset(tmap
, 0, sizeof(tmap
));
440 for (h1
= 0; h1
< 256; h1
++) {
441 struct rsvp_session
*s
;
442 for (s
= rtnl_dereference(sht
[h1
]); s
;
443 s
= rtnl_dereference(s
->next
)) {
444 for (h2
= 0; h2
<= 16; h2
++) {
445 struct rsvp_filter
*f
;
447 for (f
= rtnl_dereference(s
->ht
[h2
]); f
;
448 f
= rtnl_dereference(f
->next
)) {
449 if (f
->tunnelhdr
== 0)
451 data
->tgenerator
= f
->res
.classid
;
458 memcpy(data
->tmap
, tmap
, sizeof(tmap
));
461 static u32
gen_tunnel(struct rsvp_head
*data
)
465 for (k
= 0; k
< 2; k
++) {
466 for (i
= 255; i
> 0; i
--) {
467 if (++data
->tgenerator
== 0)
468 data
->tgenerator
= 1;
469 if (tunnel_bts(data
))
470 return data
->tgenerator
;
472 tunnel_recycle(data
);
477 static const struct nla_policy rsvp_policy
[TCA_RSVP_MAX
+ 1] = {
478 [TCA_RSVP_CLASSID
] = { .type
= NLA_U32
},
479 [TCA_RSVP_DST
] = { .type
= NLA_BINARY
,
480 .len
= RSVP_DST_LEN
* sizeof(u32
) },
481 [TCA_RSVP_SRC
] = { .type
= NLA_BINARY
,
482 .len
= RSVP_DST_LEN
* sizeof(u32
) },
483 [TCA_RSVP_PINFO
] = { .len
= sizeof(struct tc_rsvp_pinfo
) },
486 static int rsvp_change(struct net
*net
, struct sk_buff
*in_skb
,
487 struct tcf_proto
*tp
, unsigned long base
,
490 void **arg
, bool ovr
, struct netlink_ext_ack
*extack
)
492 struct rsvp_head
*data
= rtnl_dereference(tp
->root
);
493 struct rsvp_filter
*f
, *nfp
;
494 struct rsvp_filter __rcu
**fp
;
495 struct rsvp_session
*nsp
, *s
;
496 struct rsvp_session __rcu
**sp
;
497 struct tc_rsvp_pinfo
*pinfo
= NULL
;
498 struct nlattr
*opt
= tca
[TCA_OPTIONS
];
499 struct nlattr
*tb
[TCA_RSVP_MAX
+ 1];
506 return handle
? -EINVAL
: 0;
508 err
= nla_parse_nested(tb
, TCA_RSVP_MAX
, opt
, rsvp_policy
, NULL
);
512 err
= tcf_exts_init(&e
, TCA_RSVP_ACT
, TCA_RSVP_POLICE
);
515 err
= tcf_exts_validate(net
, tp
, tb
, tca
[TCA_RATE
], &e
, ovr
, extack
);
521 /* Node exists: adjust only classid */
522 struct rsvp_filter
*n
;
524 if (f
->handle
!= handle
&& handle
)
527 n
= kmemdup(f
, sizeof(*f
), GFP_KERNEL
);
533 err
= tcf_exts_init(&n
->exts
, TCA_RSVP_ACT
, TCA_RSVP_POLICE
);
539 if (tb
[TCA_RSVP_CLASSID
]) {
540 n
->res
.classid
= nla_get_u32(tb
[TCA_RSVP_CLASSID
]);
541 tcf_bind_filter(tp
, &n
->res
, base
);
544 tcf_exts_change(&n
->exts
, &e
);
545 rsvp_replace(tp
, n
, handle
);
549 /* Now more serious part... */
553 if (tb
[TCA_RSVP_DST
] == NULL
)
557 f
= kzalloc(sizeof(struct rsvp_filter
), GFP_KERNEL
);
561 err
= tcf_exts_init(&f
->exts
, TCA_RSVP_ACT
, TCA_RSVP_POLICE
);
565 if (tb
[TCA_RSVP_SRC
]) {
566 memcpy(f
->src
, nla_data(tb
[TCA_RSVP_SRC
]), sizeof(f
->src
));
567 h2
= hash_src(f
->src
);
569 if (tb
[TCA_RSVP_PINFO
]) {
570 pinfo
= nla_data(tb
[TCA_RSVP_PINFO
]);
572 f
->tunnelhdr
= pinfo
->tunnelhdr
;
574 if (tb
[TCA_RSVP_CLASSID
])
575 f
->res
.classid
= nla_get_u32(tb
[TCA_RSVP_CLASSID
]);
577 dst
= nla_data(tb
[TCA_RSVP_DST
]);
578 h1
= hash_dst(dst
, pinfo
? pinfo
->protocol
: 0, pinfo
? pinfo
->tunnelid
: 0);
581 if ((f
->handle
= gen_handle(tp
, h1
| (h2
<<8))) == 0)
586 if (f
->res
.classid
> 255)
590 if (f
->res
.classid
== 0 &&
591 (f
->res
.classid
= gen_tunnel(data
)) == 0)
595 for (sp
= &data
->ht
[h1
];
596 (s
= rtnl_dereference(*sp
)) != NULL
;
598 if (dst
[RSVP_DST_LEN
-1] == s
->dst
[RSVP_DST_LEN
-1] &&
599 pinfo
&& pinfo
->protocol
== s
->protocol
&&
600 memcmp(&pinfo
->dpi
, &s
->dpi
, sizeof(s
->dpi
)) == 0 &&
601 #if RSVP_DST_LEN == 4
602 dst
[0] == s
->dst
[0] &&
603 dst
[1] == s
->dst
[1] &&
604 dst
[2] == s
->dst
[2] &&
606 pinfo
->tunnelid
== s
->tunnelid
) {
609 /* OK, we found appropriate session */
614 if (f
->tunnelhdr
== 0)
615 tcf_bind_filter(tp
, &f
->res
, base
);
617 tcf_exts_change(&f
->exts
, &e
);
620 for (nfp
= rtnl_dereference(*fp
); nfp
;
621 fp
= &nfp
->next
, nfp
= rtnl_dereference(*fp
)) {
622 __u32 mask
= nfp
->spi
.mask
& f
->spi
.mask
;
624 if (mask
!= f
->spi
.mask
)
627 RCU_INIT_POINTER(f
->next
, nfp
);
628 rcu_assign_pointer(*fp
, f
);
635 /* No session found. Create new one. */
638 s
= kzalloc(sizeof(struct rsvp_session
), GFP_KERNEL
);
641 memcpy(s
->dst
, dst
, sizeof(s
->dst
));
645 s
->protocol
= pinfo
->protocol
;
646 s
->tunnelid
= pinfo
->tunnelid
;
649 for (nsp
= rtnl_dereference(*sp
); nsp
;
650 sp
= &nsp
->next
, nsp
= rtnl_dereference(*sp
)) {
651 if ((nsp
->dpi
.mask
& s
->dpi
.mask
) != s
->dpi
.mask
)
654 RCU_INIT_POINTER(s
->next
, nsp
);
655 rcu_assign_pointer(*sp
, s
);
660 tcf_exts_destroy(&f
->exts
);
663 tcf_exts_destroy(&e
);
667 static void rsvp_walk(struct tcf_proto
*tp
, struct tcf_walker
*arg
)
669 struct rsvp_head
*head
= rtnl_dereference(tp
->root
);
675 for (h
= 0; h
< 256; h
++) {
676 struct rsvp_session
*s
;
678 for (s
= rtnl_dereference(head
->ht
[h
]); s
;
679 s
= rtnl_dereference(s
->next
)) {
680 for (h1
= 0; h1
<= 16; h1
++) {
681 struct rsvp_filter
*f
;
683 for (f
= rtnl_dereference(s
->ht
[h1
]); f
;
684 f
= rtnl_dereference(f
->next
)) {
685 if (arg
->count
< arg
->skip
) {
689 if (arg
->fn(tp
, f
, arg
) < 0) {
700 static int rsvp_dump(struct net
*net
, struct tcf_proto
*tp
, void *fh
,
701 struct sk_buff
*skb
, struct tcmsg
*t
)
703 struct rsvp_filter
*f
= fh
;
704 struct rsvp_session
*s
;
706 struct tc_rsvp_pinfo pinfo
;
712 t
->tcm_handle
= f
->handle
;
714 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
716 goto nla_put_failure
;
718 if (nla_put(skb
, TCA_RSVP_DST
, sizeof(s
->dst
), &s
->dst
))
719 goto nla_put_failure
;
722 pinfo
.protocol
= s
->protocol
;
723 pinfo
.tunnelid
= s
->tunnelid
;
724 pinfo
.tunnelhdr
= f
->tunnelhdr
;
726 if (nla_put(skb
, TCA_RSVP_PINFO
, sizeof(pinfo
), &pinfo
))
727 goto nla_put_failure
;
728 if (f
->res
.classid
&&
729 nla_put_u32(skb
, TCA_RSVP_CLASSID
, f
->res
.classid
))
730 goto nla_put_failure
;
731 if (((f
->handle
>> 8) & 0xFF) != 16 &&
732 nla_put(skb
, TCA_RSVP_SRC
, sizeof(f
->src
), f
->src
))
733 goto nla_put_failure
;
735 if (tcf_exts_dump(skb
, &f
->exts
) < 0)
736 goto nla_put_failure
;
738 nla_nest_end(skb
, nest
);
740 if (tcf_exts_dump_stats(skb
, &f
->exts
) < 0)
741 goto nla_put_failure
;
745 nla_nest_cancel(skb
, nest
);
749 static void rsvp_bind_class(void *fh
, u32 classid
, unsigned long cl
)
751 struct rsvp_filter
*f
= fh
;
753 if (f
&& f
->res
.classid
== classid
)
757 static struct tcf_proto_ops RSVP_OPS __read_mostly
= {
759 .classify
= rsvp_classify
,
761 .destroy
= rsvp_destroy
,
763 .change
= rsvp_change
,
764 .delete = rsvp_delete
,
767 .bind_class
= rsvp_bind_class
,
768 .owner
= THIS_MODULE
,
771 static int __init
init_rsvp(void)
773 return register_tcf_proto_ops(&RSVP_OPS
);
776 static void __exit
exit_rsvp(void)
778 unregister_tcf_proto_ops(&RSVP_OPS
);
781 module_init(init_rsvp
)
782 module_exit(exit_rsvp
)