2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
73 struct rsvp_session __rcu
*ht
[256];
78 struct rsvp_session __rcu
*next
;
79 __be32 dst
[RSVP_DST_LEN
];
80 struct tc_rsvp_gpi dpi
;
83 /* 16 (src,sport) hash slots, and one wildcard source slot */
84 struct rsvp_filter __rcu
*ht
[16 + 1];
90 struct rsvp_filter __rcu
*next
;
91 __be32 src
[RSVP_DST_LEN
];
92 struct tc_rsvp_gpi spi
;
95 struct tcf_result res
;
99 struct rsvp_session
*sess
;
100 struct rcu_work rwork
;
103 static inline unsigned int hash_dst(__be32
*dst
, u8 protocol
, u8 tunnelid
)
105 unsigned int h
= (__force __u32
)dst
[RSVP_DST_LEN
- 1];
109 return (h
^ protocol
^ tunnelid
) & 0xFF;
112 static inline unsigned int hash_src(__be32
*src
)
114 unsigned int h
= (__force __u32
)src
[RSVP_DST_LEN
-1];
122 #define RSVP_APPLY_RESULT() \
124 int r = tcf_exts_exec(skb, &f->exts, res); \
131 static int rsvp_classify(struct sk_buff
*skb
, const struct tcf_proto
*tp
,
132 struct tcf_result
*res
)
134 struct rsvp_head
*head
= rcu_dereference_bh(tp
->root
);
135 struct rsvp_session
*s
;
136 struct rsvp_filter
*f
;
142 #if RSVP_DST_LEN == 4
143 struct ipv6hdr
*nhptr
;
145 if (!pskb_network_may_pull(skb
, sizeof(*nhptr
)))
147 nhptr
= ipv6_hdr(skb
);
151 if (!pskb_network_may_pull(skb
, sizeof(*nhptr
)))
157 #if RSVP_DST_LEN == 4
158 src
= &nhptr
->saddr
.s6_addr32
[0];
159 dst
= &nhptr
->daddr
.s6_addr32
[0];
160 protocol
= nhptr
->nexthdr
;
161 xprt
= ((u8
*)nhptr
) + sizeof(struct ipv6hdr
);
165 protocol
= nhptr
->protocol
;
166 xprt
= ((u8
*)nhptr
) + (nhptr
->ihl
<<2);
167 if (ip_is_fragment(nhptr
))
171 h1
= hash_dst(dst
, protocol
, tunnelid
);
174 for (s
= rcu_dereference_bh(head
->ht
[h1
]); s
;
175 s
= rcu_dereference_bh(s
->next
)) {
176 if (dst
[RSVP_DST_LEN
-1] == s
->dst
[RSVP_DST_LEN
- 1] &&
177 protocol
== s
->protocol
&&
179 (*(u32
*)(xprt
+ s
->dpi
.offset
) ^ s
->dpi
.key
)) &&
180 #if RSVP_DST_LEN == 4
181 dst
[0] == s
->dst
[0] &&
182 dst
[1] == s
->dst
[1] &&
183 dst
[2] == s
->dst
[2] &&
185 tunnelid
== s
->tunnelid
) {
187 for (f
= rcu_dereference_bh(s
->ht
[h2
]); f
;
188 f
= rcu_dereference_bh(f
->next
)) {
189 if (src
[RSVP_DST_LEN
-1] == f
->src
[RSVP_DST_LEN
- 1] &&
190 !(f
->spi
.mask
& (*(u32
*)(xprt
+ f
->spi
.offset
) ^ f
->spi
.key
))
191 #if RSVP_DST_LEN == 4
193 src
[0] == f
->src
[0] &&
194 src
[1] == f
->src
[1] &&
202 if (f
->tunnelhdr
== 0)
205 tunnelid
= f
->res
.classid
;
206 nhptr
= (void *)(xprt
+ f
->tunnelhdr
- sizeof(*nhptr
));
211 /* And wildcard bucket... */
212 for (f
= rcu_dereference_bh(s
->ht
[16]); f
;
213 f
= rcu_dereference_bh(f
->next
)) {
224 static void rsvp_replace(struct tcf_proto
*tp
, struct rsvp_filter
*n
, u32 h
)
226 struct rsvp_head
*head
= rtnl_dereference(tp
->root
);
227 struct rsvp_session
*s
;
228 struct rsvp_filter __rcu
**ins
;
229 struct rsvp_filter
*pins
;
230 unsigned int h1
= h
& 0xFF;
231 unsigned int h2
= (h
>> 8) & 0xFF;
233 for (s
= rtnl_dereference(head
->ht
[h1
]); s
;
234 s
= rtnl_dereference(s
->next
)) {
235 for (ins
= &s
->ht
[h2
], pins
= rtnl_dereference(*ins
); ;
236 ins
= &pins
->next
, pins
= rtnl_dereference(*ins
)) {
237 if (pins
->handle
== h
) {
238 RCU_INIT_POINTER(n
->next
, pins
->next
);
239 rcu_assign_pointer(*ins
, n
);
245 /* Something went wrong if we are trying to replace a non-existant
246 * node. Mind as well halt instead of silently failing.
251 static void *rsvp_get(struct tcf_proto
*tp
, u32 handle
)
253 struct rsvp_head
*head
= rtnl_dereference(tp
->root
);
254 struct rsvp_session
*s
;
255 struct rsvp_filter
*f
;
256 unsigned int h1
= handle
& 0xFF;
257 unsigned int h2
= (handle
>> 8) & 0xFF;
262 for (s
= rtnl_dereference(head
->ht
[h1
]); s
;
263 s
= rtnl_dereference(s
->next
)) {
264 for (f
= rtnl_dereference(s
->ht
[h2
]); f
;
265 f
= rtnl_dereference(f
->next
)) {
266 if (f
->handle
== handle
)
273 static int rsvp_init(struct tcf_proto
*tp
)
275 struct rsvp_head
*data
;
277 data
= kzalloc(sizeof(struct rsvp_head
), GFP_KERNEL
);
279 rcu_assign_pointer(tp
->root
, data
);
285 static void __rsvp_delete_filter(struct rsvp_filter
*f
)
287 tcf_exts_destroy(&f
->exts
);
288 tcf_exts_put_net(&f
->exts
);
292 static void rsvp_delete_filter_work(struct work_struct
*work
)
294 struct rsvp_filter
*f
= container_of(to_rcu_work(work
),
298 __rsvp_delete_filter(f
);
302 static void rsvp_delete_filter(struct tcf_proto
*tp
, struct rsvp_filter
*f
)
304 tcf_unbind_filter(tp
, &f
->res
);
305 /* all classifiers are required to call tcf_exts_destroy() after rcu
306 * grace period, since converted-to-rcu actions are relying on that
307 * in cleanup() callback
309 if (tcf_exts_get_net(&f
->exts
))
310 tcf_queue_work(&f
->rwork
, rsvp_delete_filter_work
);
312 __rsvp_delete_filter(f
);
315 static void rsvp_destroy(struct tcf_proto
*tp
, struct netlink_ext_ack
*extack
)
317 struct rsvp_head
*data
= rtnl_dereference(tp
->root
);
323 for (h1
= 0; h1
< 256; h1
++) {
324 struct rsvp_session
*s
;
326 while ((s
= rtnl_dereference(data
->ht
[h1
])) != NULL
) {
327 RCU_INIT_POINTER(data
->ht
[h1
], s
->next
);
329 for (h2
= 0; h2
<= 16; h2
++) {
330 struct rsvp_filter
*f
;
332 while ((f
= rtnl_dereference(s
->ht
[h2
])) != NULL
) {
333 rcu_assign_pointer(s
->ht
[h2
], f
->next
);
334 rsvp_delete_filter(tp
, f
);
340 kfree_rcu(data
, rcu
);
343 static int rsvp_delete(struct tcf_proto
*tp
, void *arg
, bool *last
,
344 struct netlink_ext_ack
*extack
)
346 struct rsvp_head
*head
= rtnl_dereference(tp
->root
);
347 struct rsvp_filter
*nfp
, *f
= arg
;
348 struct rsvp_filter __rcu
**fp
;
349 unsigned int h
= f
->handle
;
350 struct rsvp_session __rcu
**sp
;
351 struct rsvp_session
*nsp
, *s
= f
->sess
;
354 fp
= &s
->ht
[(h
>> 8) & 0xFF];
355 for (nfp
= rtnl_dereference(*fp
); nfp
;
356 fp
= &nfp
->next
, nfp
= rtnl_dereference(*fp
)) {
358 RCU_INIT_POINTER(*fp
, f
->next
);
359 rsvp_delete_filter(tp
, f
);
363 for (i
= 0; i
<= 16; i
++)
367 /* OK, session has no flows */
368 sp
= &head
->ht
[h
& 0xFF];
369 for (nsp
= rtnl_dereference(*sp
); nsp
;
370 sp
= &nsp
->next
, nsp
= rtnl_dereference(*sp
)) {
372 RCU_INIT_POINTER(*sp
, s
->next
);
384 for (h1
= 0; h1
< 256; h1
++) {
385 if (rcu_access_pointer(head
->ht
[h1
])) {
394 static unsigned int gen_handle(struct tcf_proto
*tp
, unsigned salt
)
396 struct rsvp_head
*data
= rtnl_dereference(tp
->root
);
402 if ((data
->hgenerator
+= 0x10000) == 0)
403 data
->hgenerator
= 0x10000;
404 h
= data
->hgenerator
|salt
;
405 if (!rsvp_get(tp
, h
))
411 static int tunnel_bts(struct rsvp_head
*data
)
413 int n
= data
->tgenerator
>> 5;
414 u32 b
= 1 << (data
->tgenerator
& 0x1F);
416 if (data
->tmap
[n
] & b
)
422 static void tunnel_recycle(struct rsvp_head
*data
)
424 struct rsvp_session __rcu
**sht
= data
->ht
;
428 memset(tmap
, 0, sizeof(tmap
));
430 for (h1
= 0; h1
< 256; h1
++) {
431 struct rsvp_session
*s
;
432 for (s
= rtnl_dereference(sht
[h1
]); s
;
433 s
= rtnl_dereference(s
->next
)) {
434 for (h2
= 0; h2
<= 16; h2
++) {
435 struct rsvp_filter
*f
;
437 for (f
= rtnl_dereference(s
->ht
[h2
]); f
;
438 f
= rtnl_dereference(f
->next
)) {
439 if (f
->tunnelhdr
== 0)
441 data
->tgenerator
= f
->res
.classid
;
448 memcpy(data
->tmap
, tmap
, sizeof(tmap
));
451 static u32
gen_tunnel(struct rsvp_head
*data
)
455 for (k
= 0; k
< 2; k
++) {
456 for (i
= 255; i
> 0; i
--) {
457 if (++data
->tgenerator
== 0)
458 data
->tgenerator
= 1;
459 if (tunnel_bts(data
))
460 return data
->tgenerator
;
462 tunnel_recycle(data
);
467 static const struct nla_policy rsvp_policy
[TCA_RSVP_MAX
+ 1] = {
468 [TCA_RSVP_CLASSID
] = { .type
= NLA_U32
},
469 [TCA_RSVP_DST
] = { .len
= RSVP_DST_LEN
* sizeof(u32
) },
470 [TCA_RSVP_SRC
] = { .len
= RSVP_DST_LEN
* sizeof(u32
) },
471 [TCA_RSVP_PINFO
] = { .len
= sizeof(struct tc_rsvp_pinfo
) },
474 static int rsvp_change(struct net
*net
, struct sk_buff
*in_skb
,
475 struct tcf_proto
*tp
, unsigned long base
,
478 void **arg
, bool ovr
, struct netlink_ext_ack
*extack
)
480 struct rsvp_head
*data
= rtnl_dereference(tp
->root
);
481 struct rsvp_filter
*f
, *nfp
;
482 struct rsvp_filter __rcu
**fp
;
483 struct rsvp_session
*nsp
, *s
;
484 struct rsvp_session __rcu
**sp
;
485 struct tc_rsvp_pinfo
*pinfo
= NULL
;
486 struct nlattr
*opt
= tca
[TCA_OPTIONS
];
487 struct nlattr
*tb
[TCA_RSVP_MAX
+ 1];
494 return handle
? -EINVAL
: 0;
496 err
= nla_parse_nested(tb
, TCA_RSVP_MAX
, opt
, rsvp_policy
, NULL
);
500 err
= tcf_exts_init(&e
, TCA_RSVP_ACT
, TCA_RSVP_POLICE
);
503 err
= tcf_exts_validate(net
, tp
, tb
, tca
[TCA_RATE
], &e
, ovr
, extack
);
509 /* Node exists: adjust only classid */
510 struct rsvp_filter
*n
;
512 if (f
->handle
!= handle
&& handle
)
515 n
= kmemdup(f
, sizeof(*f
), GFP_KERNEL
);
521 err
= tcf_exts_init(&n
->exts
, TCA_RSVP_ACT
, TCA_RSVP_POLICE
);
527 if (tb
[TCA_RSVP_CLASSID
]) {
528 n
->res
.classid
= nla_get_u32(tb
[TCA_RSVP_CLASSID
]);
529 tcf_bind_filter(tp
, &n
->res
, base
);
532 tcf_exts_change(&n
->exts
, &e
);
533 rsvp_replace(tp
, n
, handle
);
537 /* Now more serious part... */
541 if (tb
[TCA_RSVP_DST
] == NULL
)
545 f
= kzalloc(sizeof(struct rsvp_filter
), GFP_KERNEL
);
549 err
= tcf_exts_init(&f
->exts
, TCA_RSVP_ACT
, TCA_RSVP_POLICE
);
553 if (tb
[TCA_RSVP_SRC
]) {
554 memcpy(f
->src
, nla_data(tb
[TCA_RSVP_SRC
]), sizeof(f
->src
));
555 h2
= hash_src(f
->src
);
557 if (tb
[TCA_RSVP_PINFO
]) {
558 pinfo
= nla_data(tb
[TCA_RSVP_PINFO
]);
560 f
->tunnelhdr
= pinfo
->tunnelhdr
;
562 if (tb
[TCA_RSVP_CLASSID
])
563 f
->res
.classid
= nla_get_u32(tb
[TCA_RSVP_CLASSID
]);
565 dst
= nla_data(tb
[TCA_RSVP_DST
]);
566 h1
= hash_dst(dst
, pinfo
? pinfo
->protocol
: 0, pinfo
? pinfo
->tunnelid
: 0);
569 if ((f
->handle
= gen_handle(tp
, h1
| (h2
<<8))) == 0)
574 if (f
->res
.classid
> 255)
578 if (f
->res
.classid
== 0 &&
579 (f
->res
.classid
= gen_tunnel(data
)) == 0)
583 for (sp
= &data
->ht
[h1
];
584 (s
= rtnl_dereference(*sp
)) != NULL
;
586 if (dst
[RSVP_DST_LEN
-1] == s
->dst
[RSVP_DST_LEN
-1] &&
587 pinfo
&& pinfo
->protocol
== s
->protocol
&&
588 memcmp(&pinfo
->dpi
, &s
->dpi
, sizeof(s
->dpi
)) == 0 &&
589 #if RSVP_DST_LEN == 4
590 dst
[0] == s
->dst
[0] &&
591 dst
[1] == s
->dst
[1] &&
592 dst
[2] == s
->dst
[2] &&
594 pinfo
->tunnelid
== s
->tunnelid
) {
597 /* OK, we found appropriate session */
602 if (f
->tunnelhdr
== 0)
603 tcf_bind_filter(tp
, &f
->res
, base
);
605 tcf_exts_change(&f
->exts
, &e
);
608 for (nfp
= rtnl_dereference(*fp
); nfp
;
609 fp
= &nfp
->next
, nfp
= rtnl_dereference(*fp
)) {
610 __u32 mask
= nfp
->spi
.mask
& f
->spi
.mask
;
612 if (mask
!= f
->spi
.mask
)
615 RCU_INIT_POINTER(f
->next
, nfp
);
616 rcu_assign_pointer(*fp
, f
);
623 /* No session found. Create new one. */
626 s
= kzalloc(sizeof(struct rsvp_session
), GFP_KERNEL
);
629 memcpy(s
->dst
, dst
, sizeof(s
->dst
));
633 s
->protocol
= pinfo
->protocol
;
634 s
->tunnelid
= pinfo
->tunnelid
;
637 for (nsp
= rtnl_dereference(*sp
); nsp
;
638 sp
= &nsp
->next
, nsp
= rtnl_dereference(*sp
)) {
639 if ((nsp
->dpi
.mask
& s
->dpi
.mask
) != s
->dpi
.mask
)
642 RCU_INIT_POINTER(s
->next
, nsp
);
643 rcu_assign_pointer(*sp
, s
);
648 tcf_exts_destroy(&f
->exts
);
651 tcf_exts_destroy(&e
);
655 static void rsvp_walk(struct tcf_proto
*tp
, struct tcf_walker
*arg
)
657 struct rsvp_head
*head
= rtnl_dereference(tp
->root
);
663 for (h
= 0; h
< 256; h
++) {
664 struct rsvp_session
*s
;
666 for (s
= rtnl_dereference(head
->ht
[h
]); s
;
667 s
= rtnl_dereference(s
->next
)) {
668 for (h1
= 0; h1
<= 16; h1
++) {
669 struct rsvp_filter
*f
;
671 for (f
= rtnl_dereference(s
->ht
[h1
]); f
;
672 f
= rtnl_dereference(f
->next
)) {
673 if (arg
->count
< arg
->skip
) {
677 if (arg
->fn(tp
, f
, arg
) < 0) {
688 static int rsvp_dump(struct net
*net
, struct tcf_proto
*tp
, void *fh
,
689 struct sk_buff
*skb
, struct tcmsg
*t
)
691 struct rsvp_filter
*f
= fh
;
692 struct rsvp_session
*s
;
694 struct tc_rsvp_pinfo pinfo
;
700 t
->tcm_handle
= f
->handle
;
702 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
704 goto nla_put_failure
;
706 if (nla_put(skb
, TCA_RSVP_DST
, sizeof(s
->dst
), &s
->dst
))
707 goto nla_put_failure
;
710 pinfo
.protocol
= s
->protocol
;
711 pinfo
.tunnelid
= s
->tunnelid
;
712 pinfo
.tunnelhdr
= f
->tunnelhdr
;
714 if (nla_put(skb
, TCA_RSVP_PINFO
, sizeof(pinfo
), &pinfo
))
715 goto nla_put_failure
;
716 if (f
->res
.classid
&&
717 nla_put_u32(skb
, TCA_RSVP_CLASSID
, f
->res
.classid
))
718 goto nla_put_failure
;
719 if (((f
->handle
>> 8) & 0xFF) != 16 &&
720 nla_put(skb
, TCA_RSVP_SRC
, sizeof(f
->src
), f
->src
))
721 goto nla_put_failure
;
723 if (tcf_exts_dump(skb
, &f
->exts
) < 0)
724 goto nla_put_failure
;
726 nla_nest_end(skb
, nest
);
728 if (tcf_exts_dump_stats(skb
, &f
->exts
) < 0)
729 goto nla_put_failure
;
733 nla_nest_cancel(skb
, nest
);
737 static void rsvp_bind_class(void *fh
, u32 classid
, unsigned long cl
, void *q
,
740 struct rsvp_filter
*f
= fh
;
742 if (f
&& f
->res
.classid
== classid
) {
744 __tcf_bind_filter(q
, &f
->res
, base
);
746 __tcf_unbind_filter(q
, &f
->res
);
750 static struct tcf_proto_ops RSVP_OPS __read_mostly
= {
752 .classify
= rsvp_classify
,
754 .destroy
= rsvp_destroy
,
756 .change
= rsvp_change
,
757 .delete = rsvp_delete
,
760 .bind_class
= rsvp_bind_class
,
761 .owner
= THIS_MODULE
,
764 static int __init
init_rsvp(void)
766 return register_tcf_proto_ops(&RSVP_OPS
);
769 static void __exit
exit_rsvp(void)
771 unregister_tcf_proto_ops(&RSVP_OPS
);
774 module_init(init_rsvp
)
775 module_exit(exit_rsvp
)