2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
73 struct rsvp_session __rcu
*ht
[256];
78 struct rsvp_session __rcu
*next
;
79 __be32 dst
[RSVP_DST_LEN
];
80 struct tc_rsvp_gpi dpi
;
83 /* 16 (src,sport) hash slots, and one wildcard source slot */
84 struct rsvp_filter __rcu
*ht
[16 + 1];
90 struct rsvp_filter __rcu
*next
;
91 __be32 src
[RSVP_DST_LEN
];
92 struct tc_rsvp_gpi spi
;
95 struct tcf_result res
;
99 struct rsvp_session
*sess
;
100 struct rcu_work rwork
;
103 static inline unsigned int hash_dst(__be32
*dst
, u8 protocol
, u8 tunnelid
)
105 unsigned int h
= (__force __u32
)dst
[RSVP_DST_LEN
- 1];
109 return (h
^ protocol
^ tunnelid
) & 0xFF;
112 static inline unsigned int hash_src(__be32
*src
)
114 unsigned int h
= (__force __u32
)src
[RSVP_DST_LEN
-1];
122 #define RSVP_APPLY_RESULT() \
124 int r = tcf_exts_exec(skb, &f->exts, res); \
131 static int rsvp_classify(struct sk_buff
*skb
, const struct tcf_proto
*tp
,
132 struct tcf_result
*res
)
134 struct rsvp_head
*head
= rcu_dereference_bh(tp
->root
);
135 struct rsvp_session
*s
;
136 struct rsvp_filter
*f
;
142 #if RSVP_DST_LEN == 4
143 struct ipv6hdr
*nhptr
;
145 if (!pskb_network_may_pull(skb
, sizeof(*nhptr
)))
147 nhptr
= ipv6_hdr(skb
);
151 if (!pskb_network_may_pull(skb
, sizeof(*nhptr
)))
157 #if RSVP_DST_LEN == 4
158 src
= &nhptr
->saddr
.s6_addr32
[0];
159 dst
= &nhptr
->daddr
.s6_addr32
[0];
160 protocol
= nhptr
->nexthdr
;
161 xprt
= ((u8
*)nhptr
) + sizeof(struct ipv6hdr
);
165 protocol
= nhptr
->protocol
;
166 xprt
= ((u8
*)nhptr
) + (nhptr
->ihl
<<2);
167 if (ip_is_fragment(nhptr
))
171 h1
= hash_dst(dst
, protocol
, tunnelid
);
174 for (s
= rcu_dereference_bh(head
->ht
[h1
]); s
;
175 s
= rcu_dereference_bh(s
->next
)) {
176 if (dst
[RSVP_DST_LEN
-1] == s
->dst
[RSVP_DST_LEN
- 1] &&
177 protocol
== s
->protocol
&&
179 (*(u32
*)(xprt
+ s
->dpi
.offset
) ^ s
->dpi
.key
)) &&
180 #if RSVP_DST_LEN == 4
181 dst
[0] == s
->dst
[0] &&
182 dst
[1] == s
->dst
[1] &&
183 dst
[2] == s
->dst
[2] &&
185 tunnelid
== s
->tunnelid
) {
187 for (f
= rcu_dereference_bh(s
->ht
[h2
]); f
;
188 f
= rcu_dereference_bh(f
->next
)) {
189 if (src
[RSVP_DST_LEN
-1] == f
->src
[RSVP_DST_LEN
- 1] &&
190 !(f
->spi
.mask
& (*(u32
*)(xprt
+ f
->spi
.offset
) ^ f
->spi
.key
))
191 #if RSVP_DST_LEN == 4
193 src
[0] == f
->src
[0] &&
194 src
[1] == f
->src
[1] &&
202 if (f
->tunnelhdr
== 0)
205 tunnelid
= f
->res
.classid
;
206 nhptr
= (void *)(xprt
+ f
->tunnelhdr
- sizeof(*nhptr
));
211 /* And wildcard bucket... */
212 for (f
= rcu_dereference_bh(s
->ht
[16]); f
;
213 f
= rcu_dereference_bh(f
->next
)) {
224 static void rsvp_replace(struct tcf_proto
*tp
, struct rsvp_filter
*n
, u32 h
)
226 struct rsvp_head
*head
= rtnl_dereference(tp
->root
);
227 struct rsvp_session
*s
;
228 struct rsvp_filter __rcu
**ins
;
229 struct rsvp_filter
*pins
;
230 unsigned int h1
= h
& 0xFF;
231 unsigned int h2
= (h
>> 8) & 0xFF;
233 for (s
= rtnl_dereference(head
->ht
[h1
]); s
;
234 s
= rtnl_dereference(s
->next
)) {
235 for (ins
= &s
->ht
[h2
], pins
= rtnl_dereference(*ins
); ;
236 ins
= &pins
->next
, pins
= rtnl_dereference(*ins
)) {
237 if (pins
->handle
== h
) {
238 RCU_INIT_POINTER(n
->next
, pins
->next
);
239 rcu_assign_pointer(*ins
, n
);
245 /* Something went wrong if we are trying to replace a non-existant
246 * node. Mind as well halt instead of silently failing.
251 static void *rsvp_get(struct tcf_proto
*tp
, u32 handle
)
253 struct rsvp_head
*head
= rtnl_dereference(tp
->root
);
254 struct rsvp_session
*s
;
255 struct rsvp_filter
*f
;
256 unsigned int h1
= handle
& 0xFF;
257 unsigned int h2
= (handle
>> 8) & 0xFF;
262 for (s
= rtnl_dereference(head
->ht
[h1
]); s
;
263 s
= rtnl_dereference(s
->next
)) {
264 for (f
= rtnl_dereference(s
->ht
[h2
]); f
;
265 f
= rtnl_dereference(f
->next
)) {
266 if (f
->handle
== handle
)
273 static int rsvp_init(struct tcf_proto
*tp
)
275 struct rsvp_head
*data
;
277 data
= kzalloc(sizeof(struct rsvp_head
), GFP_KERNEL
);
279 rcu_assign_pointer(tp
->root
, data
);
285 static void __rsvp_delete_filter(struct rsvp_filter
*f
)
287 tcf_exts_destroy(&f
->exts
);
288 tcf_exts_put_net(&f
->exts
);
292 static void rsvp_delete_filter_work(struct work_struct
*work
)
294 struct rsvp_filter
*f
= container_of(to_rcu_work(work
),
298 __rsvp_delete_filter(f
);
302 static void rsvp_delete_filter(struct tcf_proto
*tp
, struct rsvp_filter
*f
)
304 tcf_unbind_filter(tp
, &f
->res
);
305 /* all classifiers are required to call tcf_exts_destroy() after rcu
306 * grace period, since converted-to-rcu actions are relying on that
307 * in cleanup() callback
309 if (tcf_exts_get_net(&f
->exts
))
310 tcf_queue_work(&f
->rwork
, rsvp_delete_filter_work
);
312 __rsvp_delete_filter(f
);
315 static void rsvp_destroy(struct tcf_proto
*tp
, struct netlink_ext_ack
*extack
)
317 struct rsvp_head
*data
= rtnl_dereference(tp
->root
);
323 for (h1
= 0; h1
< 256; h1
++) {
324 struct rsvp_session
*s
;
326 while ((s
= rtnl_dereference(data
->ht
[h1
])) != NULL
) {
327 RCU_INIT_POINTER(data
->ht
[h1
], s
->next
);
329 for (h2
= 0; h2
<= 16; h2
++) {
330 struct rsvp_filter
*f
;
332 while ((f
= rtnl_dereference(s
->ht
[h2
])) != NULL
) {
333 rcu_assign_pointer(s
->ht
[h2
], f
->next
);
334 rsvp_delete_filter(tp
, f
);
340 kfree_rcu(data
, rcu
);
343 static int rsvp_delete(struct tcf_proto
*tp
, void *arg
, bool *last
,
344 struct netlink_ext_ack
*extack
)
346 struct rsvp_head
*head
= rtnl_dereference(tp
->root
);
347 struct rsvp_filter
*nfp
, *f
= arg
;
348 struct rsvp_filter __rcu
**fp
;
349 unsigned int h
= f
->handle
;
350 struct rsvp_session __rcu
**sp
;
351 struct rsvp_session
*nsp
, *s
= f
->sess
;
354 fp
= &s
->ht
[(h
>> 8) & 0xFF];
355 for (nfp
= rtnl_dereference(*fp
); nfp
;
356 fp
= &nfp
->next
, nfp
= rtnl_dereference(*fp
)) {
358 RCU_INIT_POINTER(*fp
, f
->next
);
359 rsvp_delete_filter(tp
, f
);
363 for (i
= 0; i
<= 16; i
++)
367 /* OK, session has no flows */
368 sp
= &head
->ht
[h
& 0xFF];
369 for (nsp
= rtnl_dereference(*sp
); nsp
;
370 sp
= &nsp
->next
, nsp
= rtnl_dereference(*sp
)) {
372 RCU_INIT_POINTER(*sp
, s
->next
);
384 for (h1
= 0; h1
< 256; h1
++) {
385 if (rcu_access_pointer(head
->ht
[h1
])) {
394 static unsigned int gen_handle(struct tcf_proto
*tp
, unsigned salt
)
396 struct rsvp_head
*data
= rtnl_dereference(tp
->root
);
402 if ((data
->hgenerator
+= 0x10000) == 0)
403 data
->hgenerator
= 0x10000;
404 h
= data
->hgenerator
|salt
;
405 if (!rsvp_get(tp
, h
))
411 static int tunnel_bts(struct rsvp_head
*data
)
413 int n
= data
->tgenerator
>> 5;
414 u32 b
= 1 << (data
->tgenerator
& 0x1F);
416 if (data
->tmap
[n
] & b
)
422 static void tunnel_recycle(struct rsvp_head
*data
)
424 struct rsvp_session __rcu
**sht
= data
->ht
;
428 memset(tmap
, 0, sizeof(tmap
));
430 for (h1
= 0; h1
< 256; h1
++) {
431 struct rsvp_session
*s
;
432 for (s
= rtnl_dereference(sht
[h1
]); s
;
433 s
= rtnl_dereference(s
->next
)) {
434 for (h2
= 0; h2
<= 16; h2
++) {
435 struct rsvp_filter
*f
;
437 for (f
= rtnl_dereference(s
->ht
[h2
]); f
;
438 f
= rtnl_dereference(f
->next
)) {
439 if (f
->tunnelhdr
== 0)
441 data
->tgenerator
= f
->res
.classid
;
448 memcpy(data
->tmap
, tmap
, sizeof(tmap
));
451 static u32
gen_tunnel(struct rsvp_head
*data
)
455 for (k
= 0; k
< 2; k
++) {
456 for (i
= 255; i
> 0; i
--) {
457 if (++data
->tgenerator
== 0)
458 data
->tgenerator
= 1;
459 if (tunnel_bts(data
))
460 return data
->tgenerator
;
462 tunnel_recycle(data
);
467 static const struct nla_policy rsvp_policy
[TCA_RSVP_MAX
+ 1] = {
468 [TCA_RSVP_CLASSID
] = { .type
= NLA_U32
},
469 [TCA_RSVP_DST
] = { .type
= NLA_BINARY
,
470 .len
= RSVP_DST_LEN
* sizeof(u32
) },
471 [TCA_RSVP_SRC
] = { .type
= NLA_BINARY
,
472 .len
= RSVP_DST_LEN
* sizeof(u32
) },
473 [TCA_RSVP_PINFO
] = { .len
= sizeof(struct tc_rsvp_pinfo
) },
476 static int rsvp_change(struct net
*net
, struct sk_buff
*in_skb
,
477 struct tcf_proto
*tp
, unsigned long base
,
480 void **arg
, bool ovr
, struct netlink_ext_ack
*extack
)
482 struct rsvp_head
*data
= rtnl_dereference(tp
->root
);
483 struct rsvp_filter
*f
, *nfp
;
484 struct rsvp_filter __rcu
**fp
;
485 struct rsvp_session
*nsp
, *s
;
486 struct rsvp_session __rcu
**sp
;
487 struct tc_rsvp_pinfo
*pinfo
= NULL
;
488 struct nlattr
*opt
= tca
[TCA_OPTIONS
];
489 struct nlattr
*tb
[TCA_RSVP_MAX
+ 1];
496 return handle
? -EINVAL
: 0;
498 err
= nla_parse_nested(tb
, TCA_RSVP_MAX
, opt
, rsvp_policy
, NULL
);
502 err
= tcf_exts_init(&e
, TCA_RSVP_ACT
, TCA_RSVP_POLICE
);
505 err
= tcf_exts_validate(net
, tp
, tb
, tca
[TCA_RATE
], &e
, ovr
, extack
);
511 /* Node exists: adjust only classid */
512 struct rsvp_filter
*n
;
514 if (f
->handle
!= handle
&& handle
)
517 n
= kmemdup(f
, sizeof(*f
), GFP_KERNEL
);
523 err
= tcf_exts_init(&n
->exts
, TCA_RSVP_ACT
, TCA_RSVP_POLICE
);
529 if (tb
[TCA_RSVP_CLASSID
]) {
530 n
->res
.classid
= nla_get_u32(tb
[TCA_RSVP_CLASSID
]);
531 tcf_bind_filter(tp
, &n
->res
, base
);
534 tcf_exts_change(&n
->exts
, &e
);
535 rsvp_replace(tp
, n
, handle
);
539 /* Now more serious part... */
543 if (tb
[TCA_RSVP_DST
] == NULL
)
547 f
= kzalloc(sizeof(struct rsvp_filter
), GFP_KERNEL
);
551 err
= tcf_exts_init(&f
->exts
, TCA_RSVP_ACT
, TCA_RSVP_POLICE
);
555 if (tb
[TCA_RSVP_SRC
]) {
556 memcpy(f
->src
, nla_data(tb
[TCA_RSVP_SRC
]), sizeof(f
->src
));
557 h2
= hash_src(f
->src
);
559 if (tb
[TCA_RSVP_PINFO
]) {
560 pinfo
= nla_data(tb
[TCA_RSVP_PINFO
]);
562 f
->tunnelhdr
= pinfo
->tunnelhdr
;
564 if (tb
[TCA_RSVP_CLASSID
])
565 f
->res
.classid
= nla_get_u32(tb
[TCA_RSVP_CLASSID
]);
567 dst
= nla_data(tb
[TCA_RSVP_DST
]);
568 h1
= hash_dst(dst
, pinfo
? pinfo
->protocol
: 0, pinfo
? pinfo
->tunnelid
: 0);
571 if ((f
->handle
= gen_handle(tp
, h1
| (h2
<<8))) == 0)
576 if (f
->res
.classid
> 255)
580 if (f
->res
.classid
== 0 &&
581 (f
->res
.classid
= gen_tunnel(data
)) == 0)
585 for (sp
= &data
->ht
[h1
];
586 (s
= rtnl_dereference(*sp
)) != NULL
;
588 if (dst
[RSVP_DST_LEN
-1] == s
->dst
[RSVP_DST_LEN
-1] &&
589 pinfo
&& pinfo
->protocol
== s
->protocol
&&
590 memcmp(&pinfo
->dpi
, &s
->dpi
, sizeof(s
->dpi
)) == 0 &&
591 #if RSVP_DST_LEN == 4
592 dst
[0] == s
->dst
[0] &&
593 dst
[1] == s
->dst
[1] &&
594 dst
[2] == s
->dst
[2] &&
596 pinfo
->tunnelid
== s
->tunnelid
) {
599 /* OK, we found appropriate session */
604 if (f
->tunnelhdr
== 0)
605 tcf_bind_filter(tp
, &f
->res
, base
);
607 tcf_exts_change(&f
->exts
, &e
);
610 for (nfp
= rtnl_dereference(*fp
); nfp
;
611 fp
= &nfp
->next
, nfp
= rtnl_dereference(*fp
)) {
612 __u32 mask
= nfp
->spi
.mask
& f
->spi
.mask
;
614 if (mask
!= f
->spi
.mask
)
617 RCU_INIT_POINTER(f
->next
, nfp
);
618 rcu_assign_pointer(*fp
, f
);
625 /* No session found. Create new one. */
628 s
= kzalloc(sizeof(struct rsvp_session
), GFP_KERNEL
);
631 memcpy(s
->dst
, dst
, sizeof(s
->dst
));
635 s
->protocol
= pinfo
->protocol
;
636 s
->tunnelid
= pinfo
->tunnelid
;
639 for (nsp
= rtnl_dereference(*sp
); nsp
;
640 sp
= &nsp
->next
, nsp
= rtnl_dereference(*sp
)) {
641 if ((nsp
->dpi
.mask
& s
->dpi
.mask
) != s
->dpi
.mask
)
644 RCU_INIT_POINTER(s
->next
, nsp
);
645 rcu_assign_pointer(*sp
, s
);
650 tcf_exts_destroy(&f
->exts
);
653 tcf_exts_destroy(&e
);
657 static void rsvp_walk(struct tcf_proto
*tp
, struct tcf_walker
*arg
)
659 struct rsvp_head
*head
= rtnl_dereference(tp
->root
);
665 for (h
= 0; h
< 256; h
++) {
666 struct rsvp_session
*s
;
668 for (s
= rtnl_dereference(head
->ht
[h
]); s
;
669 s
= rtnl_dereference(s
->next
)) {
670 for (h1
= 0; h1
<= 16; h1
++) {
671 struct rsvp_filter
*f
;
673 for (f
= rtnl_dereference(s
->ht
[h1
]); f
;
674 f
= rtnl_dereference(f
->next
)) {
675 if (arg
->count
< arg
->skip
) {
679 if (arg
->fn(tp
, f
, arg
) < 0) {
690 static int rsvp_dump(struct net
*net
, struct tcf_proto
*tp
, void *fh
,
691 struct sk_buff
*skb
, struct tcmsg
*t
)
693 struct rsvp_filter
*f
= fh
;
694 struct rsvp_session
*s
;
696 struct tc_rsvp_pinfo pinfo
;
702 t
->tcm_handle
= f
->handle
;
704 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
706 goto nla_put_failure
;
708 if (nla_put(skb
, TCA_RSVP_DST
, sizeof(s
->dst
), &s
->dst
))
709 goto nla_put_failure
;
712 pinfo
.protocol
= s
->protocol
;
713 pinfo
.tunnelid
= s
->tunnelid
;
714 pinfo
.tunnelhdr
= f
->tunnelhdr
;
716 if (nla_put(skb
, TCA_RSVP_PINFO
, sizeof(pinfo
), &pinfo
))
717 goto nla_put_failure
;
718 if (f
->res
.classid
&&
719 nla_put_u32(skb
, TCA_RSVP_CLASSID
, f
->res
.classid
))
720 goto nla_put_failure
;
721 if (((f
->handle
>> 8) & 0xFF) != 16 &&
722 nla_put(skb
, TCA_RSVP_SRC
, sizeof(f
->src
), f
->src
))
723 goto nla_put_failure
;
725 if (tcf_exts_dump(skb
, &f
->exts
) < 0)
726 goto nla_put_failure
;
728 nla_nest_end(skb
, nest
);
730 if (tcf_exts_dump_stats(skb
, &f
->exts
) < 0)
731 goto nla_put_failure
;
735 nla_nest_cancel(skb
, nest
);
739 static void rsvp_bind_class(void *fh
, u32 classid
, unsigned long cl
)
741 struct rsvp_filter
*f
= fh
;
743 if (f
&& f
->res
.classid
== classid
)
747 static struct tcf_proto_ops RSVP_OPS __read_mostly
= {
749 .classify
= rsvp_classify
,
751 .destroy
= rsvp_destroy
,
753 .change
= rsvp_change
,
754 .delete = rsvp_delete
,
757 .bind_class
= rsvp_bind_class
,
758 .owner
= THIS_MODULE
,
761 static int __init
init_rsvp(void)
763 return register_tcf_proto_ops(&RSVP_OPS
);
766 static void __exit
exit_rsvp(void)
768 unregister_tcf_proto_ops(&RSVP_OPS
);
771 module_init(init_rsvp
)
772 module_exit(exit_rsvp
)