1 /* $NetBSD: altq_subr.c,v 1.27 2008/11/25 23:10:43 tsutsui Exp $ */
2 /* $KAME: altq_subr.c,v 1.24 2005/04/13 03:44:25 suz Exp $ */
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: altq_subr.c,v 1.27 2008/11/25 23:10:43 tsutsui Exp $");
39 #include <sys/param.h>
40 #include <sys/malloc.h>
42 #include <sys/systm.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/kernel.h>
47 #include <sys/errno.h>
48 #include <sys/syslog.h>
49 #include <sys/sysctl.h>
50 #include <sys/queue.h>
53 #include <net/if_dl.h>
54 #include <net/if_types.h>
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/ip.h>
60 #include <netinet/ip6.h>
62 #include <netinet/tcp.h>
63 #include <netinet/udp.h>
66 #include <net/pfvar.h>
68 #include <altq/altq.h>
70 #include <altq/altq_conf.h>
74 * internal function prototypes
76 static void tbr_timeout(void *);
77 int (*altq_input
)(struct mbuf
*, int) = NULL
;
78 static int tbr_timer
= 0; /* token bucket regulator timer */
79 static struct callout tbr_callout
;
81 #ifdef ALTQ3_CLFIER_COMPAT
82 static int extract_ports4(struct mbuf
*, struct ip
*, struct flowinfo_in
*);
84 static int extract_ports6(struct mbuf
*, struct ip6_hdr
*,
85 struct flowinfo_in6
*);
87 static int apply_filter4(u_int32_t
, struct flow_filter
*,
88 struct flowinfo_in
*);
89 static int apply_ppfilter4(u_int32_t
, struct flow_filter
*,
90 struct flowinfo_in
*);
92 static int apply_filter6(u_int32_t
, struct flow_filter6
*,
93 struct flowinfo_in6
*);
95 static int apply_tosfilter4(u_int32_t
, struct flow_filter
*,
96 struct flowinfo_in
*);
97 static u_long
get_filt_handle(struct acc_classifier
*, int);
98 static struct acc_filter
*filth_to_filtp(struct acc_classifier
*, u_long
);
99 static u_int32_t
filt2fibmask(struct flow_filter
*);
101 static void ip4f_cache(struct ip
*, struct flowinfo_in
*);
102 static int ip4f_lookup(struct ip
*, struct flowinfo_in
*);
103 static int ip4f_init(void);
104 static struct ip4_frag
*ip4f_alloc(void);
105 static void ip4f_free(struct ip4_frag
*);
106 #endif /* ALTQ3_CLFIER_COMPAT */
109 * alternate queueing support routines
112 /* look up the queue state by the interface name and the queueing type. */
114 altq_lookup(char *name
, int type
)
118 if ((ifp
= ifunit(name
)) != NULL
) {
119 if (type
!= ALTQT_NONE
&& ifp
->if_snd
.altq_type
== type
)
120 return (ifp
->if_snd
.altq_disc
);
127 altq_attach(struct ifaltq
*ifq
, int type
, void *discipline
,
128 int (*enqueue
)(struct ifaltq
*, struct mbuf
*, struct altq_pktattr
*),
129 struct mbuf
*(*dequeue
)(struct ifaltq
*, int),
130 int (*request
)(struct ifaltq
*, int, void *),
131 void *clfier
, void *(*classify
)(void *, struct mbuf
*, int))
133 if (!ALTQ_IS_READY(ifq
))
138 * pfaltq can override the existing discipline, but altq3 cannot.
139 * check these if clfier is not NULL (which implies altq3).
141 if (clfier
!= NULL
) {
142 if (ALTQ_IS_ENABLED(ifq
))
144 if (ALTQ_IS_ATTACHED(ifq
))
148 ifq
->altq_type
= type
;
149 ifq
->altq_disc
= discipline
;
150 ifq
->altq_enqueue
= enqueue
;
151 ifq
->altq_dequeue
= dequeue
;
152 ifq
->altq_request
= request
;
153 ifq
->altq_clfier
= clfier
;
154 ifq
->altq_classify
= classify
;
155 ifq
->altq_flags
&= (ALTQF_CANTCHANGE
|ALTQF_ENABLED
);
158 altq_module_incref(type
);
165 altq_detach(struct ifaltq
*ifq
)
167 if (!ALTQ_IS_READY(ifq
))
169 if (ALTQ_IS_ENABLED(ifq
))
171 if (!ALTQ_IS_ATTACHED(ifq
))
175 altq_module_declref(ifq
->altq_type
);
179 ifq
->altq_type
= ALTQT_NONE
;
180 ifq
->altq_disc
= NULL
;
181 ifq
->altq_enqueue
= NULL
;
182 ifq
->altq_dequeue
= NULL
;
183 ifq
->altq_request
= NULL
;
184 ifq
->altq_clfier
= NULL
;
185 ifq
->altq_classify
= NULL
;
186 ifq
->altq_flags
&= ALTQF_CANTCHANGE
;
191 altq_enable(struct ifaltq
*ifq
)
195 if (!ALTQ_IS_READY(ifq
))
197 if (ALTQ_IS_ENABLED(ifq
))
202 ASSERT(ifq
->ifq_len
== 0);
203 ifq
->altq_flags
|= ALTQF_ENABLED
;
204 if (ifq
->altq_clfier
!= NULL
)
205 ifq
->altq_flags
|= ALTQF_CLASSIFY
;
212 altq_disable(struct ifaltq
*ifq
)
216 if (!ALTQ_IS_ENABLED(ifq
))
221 ASSERT(ifq
->ifq_len
== 0);
222 ifq
->altq_flags
&= ~(ALTQF_ENABLED
|ALTQF_CLASSIFY
);
229 altq_assert(const char *file
, int line
, const char *failedexpr
)
231 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
232 failedexpr
, file
, line
);
233 panic("altq assertion");
239 * internal representation of token bucket parameters
240 * rate: byte_per_unittime << 32
241 * (((bits_per_sec) / 8) << 32) / machclk_freq
246 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
247 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
250 tbr_dequeue(struct ifaltq
*ifq
, int op
)
252 struct tb_regulator
*tbr
;
258 if (op
== ALTDQ_REMOVE
&& tbr
->tbr_lastop
== ALTDQ_POLL
) {
259 /* if this is a remove after poll, bypass tbr check */
261 /* update token only when it is negative */
262 if (tbr
->tbr_token
<= 0) {
263 now
= read_machclk();
264 interval
= now
- tbr
->tbr_last
;
265 if (interval
>= tbr
->tbr_filluptime
)
266 tbr
->tbr_token
= tbr
->tbr_depth
;
268 tbr
->tbr_token
+= interval
* tbr
->tbr_rate
;
269 if (tbr
->tbr_token
> tbr
->tbr_depth
)
270 tbr
->tbr_token
= tbr
->tbr_depth
;
274 /* if token is still negative, don't allow dequeue */
275 if (tbr
->tbr_token
<= 0)
279 if (ALTQ_IS_ENABLED(ifq
))
280 m
= (*ifq
->altq_dequeue
)(ifq
, op
);
282 if (op
== ALTDQ_POLL
)
288 if (m
!= NULL
&& op
== ALTDQ_REMOVE
)
289 tbr
->tbr_token
-= TBR_SCALE(m_pktlen(m
));
290 tbr
->tbr_lastop
= op
;
295 * set a token bucket regulator.
296 * if the specified rate is zero, the token bucket regulator is deleted.
299 tbr_set(struct ifaltq
*ifq
, struct tb_profile
*profile
)
301 struct tb_regulator
*tbr
, *otbr
;
303 if (machclk_freq
== 0)
305 if (machclk_freq
== 0) {
306 printf("tbr_set: no CPU clock available!\n");
310 if (profile
->rate
== 0) {
311 /* delete this tbr */
312 if ((tbr
= ifq
->altq_tbr
) == NULL
)
314 ifq
->altq_tbr
= NULL
;
319 tbr
= malloc(sizeof(struct tb_regulator
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
323 tbr
->tbr_rate
= TBR_SCALE(profile
->rate
/ 8) / machclk_freq
;
324 tbr
->tbr_depth
= TBR_SCALE(profile
->depth
);
325 if (tbr
->tbr_rate
> 0)
326 tbr
->tbr_filluptime
= tbr
->tbr_depth
/ tbr
->tbr_rate
;
328 tbr
->tbr_filluptime
= 0xffffffffffffffffLL
;
329 tbr
->tbr_token
= tbr
->tbr_depth
;
330 tbr
->tbr_last
= read_machclk();
331 tbr
->tbr_lastop
= ALTDQ_REMOVE
;
333 otbr
= ifq
->altq_tbr
;
334 ifq
->altq_tbr
= tbr
; /* set the new tbr */
337 free(otbr
, M_DEVBUF
);
339 if (tbr_timer
== 0) {
340 CALLOUT_RESET(&tbr_callout
, 1, tbr_timeout
, (void *)0);
348 * tbr_timeout goes through the interface list, and kicks the drivers
352 tbr_timeout(void *arg
)
359 for (ifp
= TAILQ_FIRST(&ifnet
); ifp
; ifp
= TAILQ_NEXT(ifp
, if_list
)) {
360 if (!TBR_IS_ENABLED(&ifp
->if_snd
))
363 if (!IFQ_IS_EMPTY(&ifp
->if_snd
) && ifp
->if_start
!= NULL
)
364 (*ifp
->if_start
)(ifp
);
368 CALLOUT_RESET(&tbr_callout
, 1, tbr_timeout
, (void *)0);
370 tbr_timer
= 0; /* don't need tbr_timer anymore */
374 * get token bucket regulator profile
377 tbr_get(struct ifaltq
*ifq
, struct tb_profile
*profile
)
379 struct tb_regulator
*tbr
;
381 if ((tbr
= ifq
->altq_tbr
) == NULL
) {
386 (u_int
)TBR_UNSCALE(tbr
->tbr_rate
* 8 * machclk_freq
);
387 profile
->depth
= (u_int
)TBR_UNSCALE(tbr
->tbr_depth
);
394 * attach a discipline to the interface. if one already exists, it is
398 altq_pfattach(struct pf_altq
*a
)
402 switch (a
->scheduler
) {
407 error
= cbq_pfattach(a
);
412 error
= priq_pfattach(a
);
417 error
= hfsc_pfattach(a
);
428 * detach a discipline from the interface.
429 * it is possible that the discipline was already overridden by another
433 altq_pfdetach(struct pf_altq
*a
)
438 if ((ifp
= ifunit(a
->ifname
)) == NULL
)
441 /* if this discipline is no longer referenced, just return */
442 if (a
->altq_disc
== NULL
|| a
->altq_disc
!= ifp
->if_snd
.altq_disc
)
446 if (ALTQ_IS_ENABLED(&ifp
->if_snd
))
447 error
= altq_disable(&ifp
->if_snd
);
449 error
= altq_detach(&ifp
->if_snd
);
456 * add a discipline or a queue
459 altq_add(struct pf_altq
*a
)
463 if (a
->qname
[0] != 0)
464 return (altq_add_queue(a
));
466 if (machclk_freq
== 0)
468 if (machclk_freq
== 0)
469 panic("altq_add: no CPU clock");
471 switch (a
->scheduler
) {
474 error
= cbq_add_altq(a
);
479 error
= priq_add_altq(a
);
484 error
= hfsc_add_altq(a
);
495 * remove a discipline or a queue
498 altq_remove(struct pf_altq
*a
)
502 if (a
->qname
[0] != 0)
503 return (altq_remove_queue(a
));
505 switch (a
->scheduler
) {
508 error
= cbq_remove_altq(a
);
513 error
= priq_remove_altq(a
);
518 error
= hfsc_remove_altq(a
);
529 * add a queue to the discipline
532 altq_add_queue(struct pf_altq
*a
)
536 switch (a
->scheduler
) {
539 error
= cbq_add_queue(a
);
544 error
= priq_add_queue(a
);
549 error
= hfsc_add_queue(a
);
560 * remove a queue from the discipline
563 altq_remove_queue(struct pf_altq
*a
)
567 switch (a
->scheduler
) {
570 error
= cbq_remove_queue(a
);
575 error
= priq_remove_queue(a
);
580 error
= hfsc_remove_queue(a
);
591 * get queue statistics
594 altq_getqstats(struct pf_altq
*a
, void *ubuf
, int *nbytes
)
598 switch (a
->scheduler
) {
601 error
= cbq_getqstats(a
, ubuf
, nbytes
);
606 error
= priq_getqstats(a
, ubuf
, nbytes
);
611 error
= hfsc_getqstats(a
, ubuf
, nbytes
);
623 * read and write diffserv field in IPv4 or IPv6 header
626 read_dsfield(struct mbuf
*m
, struct altq_pktattr
*pktattr
)
629 u_int8_t ds_field
= 0;
631 if (pktattr
== NULL
||
632 (pktattr
->pattr_af
!= AF_INET
&& pktattr
->pattr_af
!= AF_INET6
))
633 return ((u_int8_t
)0);
635 /* verify that pattr_hdr is within the mbuf data */
636 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
)
637 if (((char *)pktattr
->pattr_hdr
>= m0
->m_data
) &&
638 ((char *)pktattr
->pattr_hdr
< m0
->m_data
+ m0
->m_len
))
641 /* ick, pattr_hdr is stale */
642 pktattr
->pattr_af
= AF_UNSPEC
;
644 printf("read_dsfield: can't locate header!\n");
646 return ((u_int8_t
)0);
649 if (pktattr
->pattr_af
== AF_INET
) {
650 struct ip
*ip
= (struct ip
*)pktattr
->pattr_hdr
;
653 return ((u_int8_t
)0); /* version mismatch! */
654 ds_field
= ip
->ip_tos
;
657 else if (pktattr
->pattr_af
== AF_INET6
) {
658 struct ip6_hdr
*ip6
= (struct ip6_hdr
*)pktattr
->pattr_hdr
;
661 flowlabel
= ntohl(ip6
->ip6_flow
);
662 if ((flowlabel
>> 28) != 6)
663 return ((u_int8_t
)0); /* version mismatch! */
664 ds_field
= (flowlabel
>> 20) & 0xff;
671 write_dsfield(struct mbuf
*m
, struct altq_pktattr
*pktattr
, u_int8_t dsfield
)
675 if (pktattr
== NULL
||
676 (pktattr
->pattr_af
!= AF_INET
&& pktattr
->pattr_af
!= AF_INET6
))
679 /* verify that pattr_hdr is within the mbuf data */
680 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
)
681 if (((char *)pktattr
->pattr_hdr
>= m0
->m_data
) &&
682 ((char *)pktattr
->pattr_hdr
< m0
->m_data
+ m0
->m_len
))
685 /* ick, pattr_hdr is stale */
686 pktattr
->pattr_af
= AF_UNSPEC
;
688 printf("write_dsfield: can't locate header!\n");
693 if (pktattr
->pattr_af
== AF_INET
) {
694 struct ip
*ip
= (struct ip
*)pktattr
->pattr_hdr
;
699 return; /* version mismatch! */
701 dsfield
|= old
& 3; /* leave CU bits */
704 ip
->ip_tos
= dsfield
;
706 * update checksum (from RFC1624)
707 * HC' = ~(~HC + ~m + m')
709 sum
= ~ntohs(ip
->ip_sum
) & 0xffff;
710 sum
+= 0xff00 + (~old
& 0xff) + dsfield
;
711 sum
= (sum
>> 16) + (sum
& 0xffff);
712 sum
+= (sum
>> 16); /* add carry */
714 ip
->ip_sum
= htons(~sum
& 0xffff);
717 else if (pktattr
->pattr_af
== AF_INET6
) {
718 struct ip6_hdr
*ip6
= (struct ip6_hdr
*)pktattr
->pattr_hdr
;
721 flowlabel
= ntohl(ip6
->ip6_flow
);
722 if ((flowlabel
>> 28) != 6)
723 return; /* version mismatch! */
724 flowlabel
= (flowlabel
& 0xf03fffff) | (dsfield
<< 20);
725 ip6
->ip6_flow
= htonl(flowlabel
);
731 #define BINTIME_SHIFT 2
733 u_int32_t machclk_freq
= 0;
734 u_int32_t machclk_per_tick
= 0;
740 callout_init(&tbr_callout
, 0);
743 * Always emulate 1GiHz counter using bintime(9)
744 * since it has enough resolution via timecounter(9).
745 * Using machine dependent cpu_counter() is not MP safe
746 * and it won't work even on UP with Speedstep etc.
748 machclk_freq
= 1024 * 1024 * 1024; /* 2^30 to emulate ~1GHz */
749 machclk_per_tick
= machclk_freq
/ hz
;
751 printf("altq: emulate %uHz CPU clock\n", machclk_freq
);
762 val
= (((u_int64_t
)bt
.sec
<< 32) + (bt
.frac
>> 32)) >> BINTIME_SHIFT
;
766 #ifdef ALTQ3_CLFIER_COMPAT
769 #define IPPROTO_ESP 50 /* encapsulating security payload */
772 #define IPPROTO_AH 51 /* authentication header */
776 * extract flow information from a given packet.
777 * filt_mask shows flowinfo fields required.
778 * we assume the ip header is in one mbuf, and addresses and ports are
779 * in network byte order.
782 altq_extractflow(struct mbuf
*m
, int af
, struct flowinfo
*flow
,
783 u_int32_t filt_bmask
)
788 struct flowinfo_in
*fin
;
791 ip
= mtod(m
, struct ip
*);
796 fin
= (struct flowinfo_in
*)flow
;
797 fin
->fi_len
= sizeof(struct flowinfo_in
);
798 fin
->fi_family
= AF_INET
;
800 fin
->fi_proto
= ip
->ip_p
;
801 fin
->fi_tos
= ip
->ip_tos
;
803 fin
->fi_src
.s_addr
= ip
->ip_src
.s_addr
;
804 fin
->fi_dst
.s_addr
= ip
->ip_dst
.s_addr
;
806 if (filt_bmask
& FIMB4_PORTS
)
807 /* if port info is required, extract port numbers */
808 extract_ports4(m
, ip
, fin
);
819 struct flowinfo_in6
*fin6
;
822 ip6
= mtod(m
, struct ip6_hdr
*);
823 /* should we check the ip version? */
825 fin6
= (struct flowinfo_in6
*)flow
;
826 fin6
->fi6_len
= sizeof(struct flowinfo_in6
);
827 fin6
->fi6_family
= AF_INET6
;
829 fin6
->fi6_proto
= ip6
->ip6_nxt
;
830 fin6
->fi6_tclass
= (ntohl(ip6
->ip6_flow
) >> 20) & 0xff;
832 fin6
->fi6_flowlabel
= ip6
->ip6_flow
& htonl(0x000fffff);
833 fin6
->fi6_src
= ip6
->ip6_src
;
834 fin6
->fi6_dst
= ip6
->ip6_dst
;
836 if ((filt_bmask
& FIMB6_PORTS
) ||
837 ((filt_bmask
& FIMB6_PROTO
)
838 && ip6
->ip6_nxt
> IPPROTO_IPV6
))
840 * if port info is required, or proto is required
841 * but there are option headers, extract port
842 * and protocol numbers.
844 extract_ports6(m
, ip6
, fin6
);
859 flow
->fi_len
= sizeof(struct flowinfo
);
860 flow
->fi_family
= AF_UNSPEC
;
865 * helper routine to extract port numbers
867 /* structure for ipsec and ipv6 option header template */
869 u_int8_t opt6_nxt
; /* next header */
870 u_int8_t opt6_hlen
; /* header extension length */
872 u_int32_t ah_spi
; /* security parameter index
873 for authentication header */
877 * extract port numbers from a ipv4 packet.
880 extract_ports4(struct mbuf
*m
, struct ip
*ip
, struct flowinfo_in
*fin
)
891 ip_off
= ntohs(ip
->ip_off
);
892 /* if it is a fragment, try cached fragment info */
893 if (ip_off
& IP_OFFMASK
) {
894 ip4f_lookup(ip
, fin
);
898 /* locate the mbuf containing the protocol header */
899 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
)
900 if (((char *)ip
>= m0
->m_data
) &&
901 ((char *)ip
< m0
->m_data
+ m0
->m_len
))
905 printf("extract_ports4: can't locate header! ip=%p\n", ip
);
909 off
= ((char *)ip
- m0
->m_data
) + (ip
->ip_hl
<< 2);
915 while (off
>= m0
->m_len
) {
919 return (0); /* bogus ip_hl! */
921 if (m0
->m_len
< off
+ 4)
929 udp
= (struct udphdr
*)(mtod(m0
, char *) + off
);
930 fin
->fi_sport
= udp
->uh_sport
;
931 fin
->fi_dport
= udp
->uh_dport
;
932 fin
->fi_proto
= proto
;
938 if (fin
->fi_gpi
== 0){
941 gpi
= (u_int32_t
*)(mtod(m0
, char *) + off
);
944 fin
->fi_proto
= proto
;
948 /* get next header and header length */
951 opt6
= (struct _opt6
*)(mtod(m0
, char *) + off
);
952 proto
= opt6
->opt6_nxt
;
953 off
+= 8 + (opt6
->opt6_hlen
* 4);
954 if (fin
->fi_gpi
== 0 && m0
->m_len
>= off
+ 8)
955 fin
->fi_gpi
= opt6
->ah_spi
;
957 /* goto the next header */
959 #endif /* ALTQ_IPSEC */
962 fin
->fi_proto
= proto
;
966 /* if this is a first fragment, cache it. */
975 extract_ports6(struct mbuf
*m
, struct ip6_hdr
*ip6
, struct flowinfo_in6
*fin6
)
985 /* locate the mbuf containing the protocol header */
986 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
)
987 if (((char *)ip6
>= m0
->m_data
) &&
988 ((char *)ip6
< m0
->m_data
+ m0
->m_len
))
992 printf("extract_ports6: can't locate header! ip6=%p\n", ip6
);
996 off
= ((char *)ip6
- m0
->m_data
) + sizeof(struct ip6_hdr
);
998 proto
= ip6
->ip6_nxt
;
1000 while (off
>= m0
->m_len
) {
1006 if (m0
->m_len
< off
+ 4)
1014 udp
= (struct udphdr
*)(mtod(m0
, char *) + off
);
1015 fin6
->fi6_sport
= udp
->uh_sport
;
1016 fin6
->fi6_dport
= udp
->uh_dport
;
1017 fin6
->fi6_proto
= proto
;
1022 if (fin6
->fi6_gpi
== 0) {
1025 gpi
= (u_int32_t
*)(mtod(m0
, char *) + off
);
1026 fin6
->fi6_gpi
= *gpi
;
1028 fin6
->fi6_proto
= proto
;
1032 /* get next header and header length */
1035 opt6
= (struct _opt6
*)(mtod(m0
, char *) + off
);
1036 if (fin6
->fi6_gpi
== 0 && m0
->m_len
>= off
+ 8)
1037 fin6
->fi6_gpi
= opt6
->ah_spi
;
1038 proto
= opt6
->opt6_nxt
;
1039 off
+= 8 + (opt6
->opt6_hlen
* 4);
1040 /* goto the next header */
1044 case IPPROTO_HOPOPTS
:
1045 case IPPROTO_ROUTING
:
1046 case IPPROTO_DSTOPTS
: {
1047 /* get next header and header length */
1050 opt6
= (struct _opt6
*)(mtod(m0
, char *) + off
);
1051 proto
= opt6
->opt6_nxt
;
1052 off
+= (opt6
->opt6_hlen
+ 1) * 8;
1053 /* goto the next header */
1057 case IPPROTO_FRAGMENT
:
1058 /* ipv6 fragmentations are not supported yet */
1060 fin6
->fi6_proto
= proto
;
1069 * altq common classifier
1072 acc_add_filter(struct acc_classifier
*classifier
, struct flow_filter
*filter
,
1073 void *class, u_long
*phandle
)
1075 struct acc_filter
*afp
, *prev
, *tmp
;
1079 if (filter
->ff_flow
.fi_family
!= AF_INET
&&
1080 filter
->ff_flow
.fi_family
!= AF_INET6
)
1083 if (filter
->ff_flow
.fi_family
!= AF_INET
)
1087 afp
= malloc(sizeof(struct acc_filter
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
1091 afp
->f_filter
= *filter
;
1092 afp
->f_class
= class;
1094 i
= ACC_WILDCARD_INDEX
;
1095 if (filter
->ff_flow
.fi_family
== AF_INET
) {
1096 struct flow_filter
*filter4
= &afp
->f_filter
;
1099 * if address is 0, it's a wildcard. if address mask
1100 * isn't set, use full mask.
1102 if (filter4
->ff_flow
.fi_dst
.s_addr
== 0)
1103 filter4
->ff_mask
.mask_dst
.s_addr
= 0;
1104 else if (filter4
->ff_mask
.mask_dst
.s_addr
== 0)
1105 filter4
->ff_mask
.mask_dst
.s_addr
= 0xffffffff;
1106 if (filter4
->ff_flow
.fi_src
.s_addr
== 0)
1107 filter4
->ff_mask
.mask_src
.s_addr
= 0;
1108 else if (filter4
->ff_mask
.mask_src
.s_addr
== 0)
1109 filter4
->ff_mask
.mask_src
.s_addr
= 0xffffffff;
1111 /* clear extra bits in addresses */
1112 filter4
->ff_flow
.fi_dst
.s_addr
&=
1113 filter4
->ff_mask
.mask_dst
.s_addr
;
1114 filter4
->ff_flow
.fi_src
.s_addr
&=
1115 filter4
->ff_mask
.mask_src
.s_addr
;
1118 * if dst address is a wildcard, use hash-entry
1119 * ACC_WILDCARD_INDEX.
1121 if (filter4
->ff_mask
.mask_dst
.s_addr
!= 0xffffffff)
1122 i
= ACC_WILDCARD_INDEX
;
1124 i
= ACC_GET_HASH_INDEX(filter4
->ff_flow
.fi_dst
.s_addr
);
1127 else if (filter
->ff_flow
.fi_family
== AF_INET6
) {
1128 struct flow_filter6
*filter6
=
1129 (struct flow_filter6
*)&afp
->f_filter
;
1130 #ifndef IN6MASK0 /* taken from kame ipv6 */
1131 #define IN6MASK0 {{{ 0, 0, 0, 0 }}}
1132 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
1133 const struct in6_addr in6mask0
= IN6MASK0
;
1134 const struct in6_addr in6mask128
= IN6MASK128
;
1137 if (IN6_IS_ADDR_UNSPECIFIED(&filter6
->ff_flow6
.fi6_dst
))
1138 filter6
->ff_mask6
.mask6_dst
= in6mask0
;
1139 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6
->ff_mask6
.mask6_dst
))
1140 filter6
->ff_mask6
.mask6_dst
= in6mask128
;
1141 if (IN6_IS_ADDR_UNSPECIFIED(&filter6
->ff_flow6
.fi6_src
))
1142 filter6
->ff_mask6
.mask6_src
= in6mask0
;
1143 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6
->ff_mask6
.mask6_src
))
1144 filter6
->ff_mask6
.mask6_src
= in6mask128
;
1146 /* clear extra bits in addresses */
1147 for (i
= 0; i
< 16; i
++)
1148 filter6
->ff_flow6
.fi6_dst
.s6_addr
[i
] &=
1149 filter6
->ff_mask6
.mask6_dst
.s6_addr
[i
];
1150 for (i
= 0; i
< 16; i
++)
1151 filter6
->ff_flow6
.fi6_src
.s6_addr
[i
] &=
1152 filter6
->ff_mask6
.mask6_src
.s6_addr
[i
];
1154 if (filter6
->ff_flow6
.fi6_flowlabel
== 0)
1155 i
= ACC_WILDCARD_INDEX
;
1157 i
= ACC_GET_HASH_INDEX(filter6
->ff_flow6
.fi6_flowlabel
);
1161 afp
->f_handle
= get_filt_handle(classifier
, i
);
1163 /* update filter bitmask */
1164 afp
->f_fbmask
= filt2fibmask(filter
);
1165 classifier
->acc_fbmask
|= afp
->f_fbmask
;
1168 * add this filter to the filter list.
1169 * filters are ordered from the highest rule number.
1173 LIST_FOREACH(tmp
, &classifier
->acc_filters
[i
], f_chain
) {
1174 if (tmp
->f_filter
.ff_ruleno
> afp
->f_filter
.ff_ruleno
)
1180 LIST_INSERT_HEAD(&classifier
->acc_filters
[i
], afp
, f_chain
);
1182 LIST_INSERT_AFTER(prev
, afp
, f_chain
);
1185 *phandle
= afp
->f_handle
;
1190 acc_delete_filter(struct acc_classifier
*classifier
, u_long handle
)
1192 struct acc_filter
*afp
;
1195 if ((afp
= filth_to_filtp(classifier
, handle
)) == NULL
)
1199 LIST_REMOVE(afp
, f_chain
);
1202 free(afp
, M_DEVBUF
);
1204 /* todo: update filt_bmask */
1210 * delete filters referencing to the specified class.
1211 * if the all flag is not 0, delete all the filters.
1214 acc_discard_filters(struct acc_classifier
*classifier
, void *class, int all
)
1216 struct acc_filter
*afp
;
1220 for (i
= 0; i
< ACC_FILTER_TABLESIZE
; i
++) {
1222 LIST_FOREACH(afp
, &classifier
->acc_filters
[i
], f_chain
)
1223 if (all
|| afp
->f_class
== class) {
1224 LIST_REMOVE(afp
, f_chain
);
1225 free(afp
, M_DEVBUF
);
1226 /* start again from the head */
1229 } while (afp
!= NULL
);
1234 classifier
->acc_fbmask
= 0;
1240 acc_classify(void *clfier
, struct mbuf
*m
, int af
)
1242 struct acc_classifier
*classifier
;
1243 struct flowinfo flow
;
1244 struct acc_filter
*afp
;
1247 classifier
= (struct acc_classifier
*)clfier
;
1248 altq_extractflow(m
, af
, &flow
, classifier
->acc_fbmask
);
1250 if (flow
.fi_family
== AF_INET
) {
1251 struct flowinfo_in
*fp
= (struct flowinfo_in
*)&flow
;
1253 if ((classifier
->acc_fbmask
& FIMB4_ALL
) == FIMB4_TOS
) {
1254 /* only tos is used */
1256 &classifier
->acc_filters
[ACC_WILDCARD_INDEX
],
1258 if (apply_tosfilter4(afp
->f_fbmask
,
1259 &afp
->f_filter
, fp
))
1260 /* filter matched */
1261 return (afp
->f_class
);
1262 } else if ((classifier
->acc_fbmask
&
1263 (~(FIMB4_PROTO
|FIMB4_SPORT
|FIMB4_DPORT
) & FIMB4_ALL
))
1265 /* only proto and ports are used */
1267 &classifier
->acc_filters
[ACC_WILDCARD_INDEX
],
1269 if (apply_ppfilter4(afp
->f_fbmask
,
1270 &afp
->f_filter
, fp
))
1271 /* filter matched */
1272 return (afp
->f_class
);
1274 /* get the filter hash entry from its dest address */
1275 i
= ACC_GET_HASH_INDEX(fp
->fi_dst
.s_addr
);
1278 * go through this loop twice. first for dst
1279 * hash, second for wildcards.
1281 LIST_FOREACH(afp
, &classifier
->acc_filters
[i
],
1283 if (apply_filter4(afp
->f_fbmask
,
1284 &afp
->f_filter
, fp
))
1285 /* filter matched */
1286 return (afp
->f_class
);
1289 * check again for filters with a dst addr
1291 * (daddr == 0 || dmask != 0xffffffff).
1293 if (i
!= ACC_WILDCARD_INDEX
)
1294 i
= ACC_WILDCARD_INDEX
;
1301 else if (flow
.fi_family
== AF_INET6
) {
1302 struct flowinfo_in6
*fp6
= (struct flowinfo_in6
*)&flow
;
1304 /* get the filter hash entry from its flow ID */
1305 if (fp6
->fi6_flowlabel
!= 0)
1306 i
= ACC_GET_HASH_INDEX(fp6
->fi6_flowlabel
);
1308 /* flowlable can be zero */
1309 i
= ACC_WILDCARD_INDEX
;
1311 /* go through this loop twice. first for flow hash, second
1314 LIST_FOREACH(afp
, &classifier
->acc_filters
[i
], f_chain
)
1315 if (apply_filter6(afp
->f_fbmask
,
1316 (struct flow_filter6
*)&afp
->f_filter
,
1318 /* filter matched */
1319 return (afp
->f_class
);
1322 * check again for filters with a wildcard.
1324 if (i
!= ACC_WILDCARD_INDEX
)
1325 i
= ACC_WILDCARD_INDEX
;
1332 /* no filter matched */
1337 apply_filter4(u_int32_t fbmask
, struct flow_filter
*filt
,
1338 struct flowinfo_in
*pkt
)
1340 if (filt
->ff_flow
.fi_family
!= AF_INET
)
1342 if ((fbmask
& FIMB4_SPORT
) && filt
->ff_flow
.fi_sport
!= pkt
->fi_sport
)
1344 if ((fbmask
& FIMB4_DPORT
) && filt
->ff_flow
.fi_dport
!= pkt
->fi_dport
)
1346 if ((fbmask
& FIMB4_DADDR
) &&
1347 filt
->ff_flow
.fi_dst
.s_addr
!=
1348 (pkt
->fi_dst
.s_addr
& filt
->ff_mask
.mask_dst
.s_addr
))
1350 if ((fbmask
& FIMB4_SADDR
) &&
1351 filt
->ff_flow
.fi_src
.s_addr
!=
1352 (pkt
->fi_src
.s_addr
& filt
->ff_mask
.mask_src
.s_addr
))
1354 if ((fbmask
& FIMB4_PROTO
) && filt
->ff_flow
.fi_proto
!= pkt
->fi_proto
)
1356 if ((fbmask
& FIMB4_TOS
) && filt
->ff_flow
.fi_tos
!=
1357 (pkt
->fi_tos
& filt
->ff_mask
.mask_tos
))
1359 if ((fbmask
& FIMB4_GPI
) && filt
->ff_flow
.fi_gpi
!= (pkt
->fi_gpi
))
1366 * filter matching function optimized for a common case that checks
1367 * only protocol and port numbers
1370 apply_ppfilter4(u_int32_t fbmask
, struct flow_filter
*filt
,
1371 struct flowinfo_in
*pkt
)
1373 if (filt
->ff_flow
.fi_family
!= AF_INET
)
1375 if ((fbmask
& FIMB4_SPORT
) && filt
->ff_flow
.fi_sport
!= pkt
->fi_sport
)
1377 if ((fbmask
& FIMB4_DPORT
) && filt
->ff_flow
.fi_dport
!= pkt
->fi_dport
)
1379 if ((fbmask
& FIMB4_PROTO
) && filt
->ff_flow
.fi_proto
!= pkt
->fi_proto
)
1386 * filter matching function only for tos field.
1389 apply_tosfilter4(u_int32_t fbmask
, struct flow_filter
*filt
,
1390 struct flowinfo_in
*pkt
)
1392 if (filt
->ff_flow
.fi_family
!= AF_INET
)
1394 if ((fbmask
& FIMB4_TOS
) && filt
->ff_flow
.fi_tos
!=
1395 (pkt
->fi_tos
& filt
->ff_mask
.mask_tos
))
1403 apply_filter6(u_int32_t fbmask
, struct flow_filter6
*filt
,
1404 struct flowinfo_in6
*pkt
)
1408 if (filt
->ff_flow6
.fi6_family
!= AF_INET6
)
1410 if ((fbmask
& FIMB6_FLABEL
) &&
1411 filt
->ff_flow6
.fi6_flowlabel
!= pkt
->fi6_flowlabel
)
1413 if ((fbmask
& FIMB6_PROTO
) &&
1414 filt
->ff_flow6
.fi6_proto
!= pkt
->fi6_proto
)
1416 if ((fbmask
& FIMB6_SPORT
) &&
1417 filt
->ff_flow6
.fi6_sport
!= pkt
->fi6_sport
)
1419 if ((fbmask
& FIMB6_DPORT
) &&
1420 filt
->ff_flow6
.fi6_dport
!= pkt
->fi6_dport
)
1422 if (fbmask
& FIMB6_SADDR
) {
1423 for (i
= 0; i
< 4; i
++)
1424 if (filt
->ff_flow6
.fi6_src
.s6_addr32
[i
] !=
1425 (pkt
->fi6_src
.s6_addr32
[i
] &
1426 filt
->ff_mask6
.mask6_src
.s6_addr32
[i
]))
1429 if (fbmask
& FIMB6_DADDR
) {
1430 for (i
= 0; i
< 4; i
++)
1431 if (filt
->ff_flow6
.fi6_dst
.s6_addr32
[i
] !=
1432 (pkt
->fi6_dst
.s6_addr32
[i
] &
1433 filt
->ff_mask6
.mask6_dst
.s6_addr32
[i
]))
1436 if ((fbmask
& FIMB6_TCLASS
) &&
1437 filt
->ff_flow6
.fi6_tclass
!=
1438 (pkt
->fi6_tclass
& filt
->ff_mask6
.mask6_tclass
))
1440 if ((fbmask
& FIMB6_GPI
) &&
1441 filt
->ff_flow6
.fi6_gpi
!= pkt
->fi6_gpi
)
1450 * bit 20-28: index to the filter hash table
1451 * bit 0-19: unique id in the hash bucket.
1454 get_filt_handle(struct acc_classifier
*classifier
, int i
)
1456 static u_long handle_number
= 1;
1458 struct acc_filter
*afp
;
1461 handle
= handle_number
++ & 0x000fffff;
1463 if (LIST_EMPTY(&classifier
->acc_filters
[i
]))
1466 LIST_FOREACH(afp
, &classifier
->acc_filters
[i
], f_chain
)
1467 if ((afp
->f_handle
& 0x000fffff) == handle
)
1471 /* this handle is already used, try again */
1474 return ((i
<< 20) | handle
);
1477 /* convert filter handle to filter pointer */
1478 static struct acc_filter
*
1479 filth_to_filtp(struct acc_classifier
*classifier
, u_long handle
)
1481 struct acc_filter
*afp
;
1484 i
= ACC_GET_HINDEX(handle
);
1486 LIST_FOREACH(afp
, &classifier
->acc_filters
[i
], f_chain
)
1487 if (afp
->f_handle
== handle
)
1493 /* create flowinfo bitmask */
1495 filt2fibmask(struct flow_filter
*filt
)
1499 struct flow_filter6
*filt6
;
1502 switch (filt
->ff_flow
.fi_family
) {
1504 if (filt
->ff_flow
.fi_proto
!= 0)
1505 mask
|= FIMB4_PROTO
;
1506 if (filt
->ff_flow
.fi_tos
!= 0)
1508 if (filt
->ff_flow
.fi_dst
.s_addr
!= 0)
1509 mask
|= FIMB4_DADDR
;
1510 if (filt
->ff_flow
.fi_src
.s_addr
!= 0)
1511 mask
|= FIMB4_SADDR
;
1512 if (filt
->ff_flow
.fi_sport
!= 0)
1513 mask
|= FIMB4_SPORT
;
1514 if (filt
->ff_flow
.fi_dport
!= 0)
1515 mask
|= FIMB4_DPORT
;
1516 if (filt
->ff_flow
.fi_gpi
!= 0)
1521 filt6
= (struct flow_filter6
*)filt
;
1523 if (filt6
->ff_flow6
.fi6_proto
!= 0)
1524 mask
|= FIMB6_PROTO
;
1525 if (filt6
->ff_flow6
.fi6_tclass
!= 0)
1526 mask
|= FIMB6_TCLASS
;
1527 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6
->ff_flow6
.fi6_dst
))
1528 mask
|= FIMB6_DADDR
;
1529 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6
->ff_flow6
.fi6_src
))
1530 mask
|= FIMB6_SADDR
;
1531 if (filt6
->ff_flow6
.fi6_sport
!= 0)
1532 mask
|= FIMB6_SPORT
;
1533 if (filt6
->ff_flow6
.fi6_dport
!= 0)
1534 mask
|= FIMB6_DPORT
;
1535 if (filt6
->ff_flow6
.fi6_gpi
!= 0)
1537 if (filt6
->ff_flow6
.fi6_flowlabel
!= 0)
1538 mask
|= FIMB6_FLABEL
;
1547 * helper functions to handle IPv4 fragments.
1548 * currently only in-sequence fragments are handled.
1549 * - fragment info is cached in a LRU list.
1550 * - when a first fragment is found, cache its flow info.
1551 * - when a non-first fragment is found, lookup the cache.
1555 TAILQ_ENTRY(ip4_frag
) ip4f_chain
;
1558 struct flowinfo_in ip4f_info
;
1561 static TAILQ_HEAD(ip4f_list
, ip4_frag
) ip4f_list
; /* IPv4 fragment cache */
1563 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */
1567 ip4f_cache(struct ip
*ip
, struct flowinfo_in
*fin
)
1569 struct ip4_frag
*fp
;
1571 if (TAILQ_EMPTY(&ip4f_list
)) {
1572 /* first time call, allocate fragment cache entries. */
1573 if (ip4f_init() < 0)
1574 /* allocation failed! */
1579 fp
->ip4f_id
= ip
->ip_id
;
1580 fp
->ip4f_info
.fi_proto
= ip
->ip_p
;
1581 fp
->ip4f_info
.fi_src
.s_addr
= ip
->ip_src
.s_addr
;
1582 fp
->ip4f_info
.fi_dst
.s_addr
= ip
->ip_dst
.s_addr
;
1584 /* save port numbers */
1585 fp
->ip4f_info
.fi_sport
= fin
->fi_sport
;
1586 fp
->ip4f_info
.fi_dport
= fin
->fi_dport
;
1587 fp
->ip4f_info
.fi_gpi
= fin
->fi_gpi
;
1591 ip4f_lookup(struct ip
*ip
, struct flowinfo_in
*fin
)
1593 struct ip4_frag
*fp
;
1595 for (fp
= TAILQ_FIRST(&ip4f_list
); fp
!= NULL
&& fp
->ip4f_valid
;
1596 fp
= TAILQ_NEXT(fp
, ip4f_chain
))
1597 if (ip
->ip_id
== fp
->ip4f_id
&&
1598 ip
->ip_src
.s_addr
== fp
->ip4f_info
.fi_src
.s_addr
&&
1599 ip
->ip_dst
.s_addr
== fp
->ip4f_info
.fi_dst
.s_addr
&&
1600 ip
->ip_p
== fp
->ip4f_info
.fi_proto
) {
1602 /* found the matching entry */
1603 fin
->fi_sport
= fp
->ip4f_info
.fi_sport
;
1604 fin
->fi_dport
= fp
->ip4f_info
.fi_dport
;
1605 fin
->fi_gpi
= fp
->ip4f_info
.fi_gpi
;
1607 if ((ntohs(ip
->ip_off
) & IP_MF
) == 0)
1608 /* this is the last fragment,
1609 release the entry. */
1615 /* no matching entry found */
1622 struct ip4_frag
*fp
;
1625 TAILQ_INIT(&ip4f_list
);
1626 for (i
=0; i
<IP4F_TABSIZE
; i
++) {
1627 fp
= malloc(sizeof(struct ip4_frag
), M_DEVBUF
, M_NOWAIT
);
1629 printf("ip4f_init: can't alloc %dth entry!\n", i
);
1635 TAILQ_INSERT_TAIL(&ip4f_list
, fp
, ip4f_chain
);
1640 static struct ip4_frag
*
1643 struct ip4_frag
*fp
;
1645 /* reclaim an entry at the tail, put it at the head */
1646 fp
= TAILQ_LAST(&ip4f_list
, ip4f_list
);
1647 TAILQ_REMOVE(&ip4f_list
, fp
, ip4f_chain
);
1649 TAILQ_INSERT_HEAD(&ip4f_list
, fp
, ip4f_chain
);
1654 ip4f_free(struct ip4_frag
*fp
)
1656 TAILQ_REMOVE(&ip4f_list
, fp
, ip4f_chain
);
1658 TAILQ_INSERT_TAIL(&ip4f_list
, fp
, ip4f_chain
);
1661 #endif /* ALTQ3_CLFIER_COMPAT */