1 /* $KAME: ip_encap.c,v 1.73 2001/10/02 08:30:58 itojun Exp $ */
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * My grandfather said that there's a devil inside tunnelling technology...
34 * We have surprisingly many protocols that want packets with IP protocol
35 * #4 or #41. Here's a list of protocols that want protocol #41:
36 * RFC1933 configured tunnel
37 * RFC1933 automatic tunnel
38 * RFC2401 IPsec tunnel
39 * RFC2473 IPv6 generic packet tunnelling
40 * RFC2529 6over4 tunnel
43 * mobile-ip6 (uses RFC2473)
44 * Here's a list of protocol that want protocol #4:
45 * RFC1853 IPv4-in-IPv4 tunnelling
46 * RFC2003 IPv4 encapsulation within IPv4
47 * RFC2344 reverse tunnelling for mobile-ip4
48 * RFC2401 IPsec tunnel
49 * Well, what can I say. They impose different en/decapsulation mechanism
50 * from each other, so they need separate protocol handler. The only one
51 * we can easily determine by protocol # is IPsec, which always has
52 * AH/ESP/IPComp header right after outer IP header.
54 * So, clearly good old protosw does not work for protocol #4 and #41.
55 * The code will let you match protocol via src/dst address pair.
57 /* XXX is M_NETADDR correct? */
60 * With USE_RADIX the code will use radix table for tunnel lookup, for
61 * tunnels registered with encap_attach() with a addr/mask pair.
62 * Faster on machines with thousands of tunnel registerations (= interfaces).
64 * The code assumes that radix table code can handle non-continuous netmask,
65 * as it will pass radix table memory region with (src + dst) sockaddr pair.
67 * FreeBSD is excluded here as they make max_keylen a static variable, and
68 * thus forbid definition of radix table other than proper domains.
71 * !!NOTE: dom_maxrtkey assumes USE_RADIX is defined.
76 #include <sys/cdefs.h>
77 __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.37 2009/04/18 14:58:05 tsutsui Exp $");
79 #include "opt_mrouting.h"
82 #include <sys/param.h>
83 #include <sys/systm.h>
84 #include <sys/socket.h>
85 #include <sys/sockio.h>
87 #include <sys/errno.h>
88 #include <sys/protosw.h>
89 #include <sys/queue.h>
92 #include <net/route.h>
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #include <netinet/ip_var.h>
98 #include <netinet/ip_encap.h>
100 #include <netinet/ip_mroute.h>
101 #endif /* MROUTING */
104 #include <netinet/ip6.h>
105 #include <netinet6/ip6_var.h>
106 #include <netinet6/ip6protosw.h>
107 #include <netinet6/in6_var.h>
108 #include <netinet6/in6_pcb.h>
109 #include <netinet/icmp6.h>
112 #include <machine/stdarg.h>
114 #include <net/net_osdep.h>
116 enum direction
{ INBOUND
, OUTBOUND
};
119 static struct encaptab
*encap4_lookup(struct mbuf
*, int, int, enum direction
);
122 static struct encaptab
*encap6_lookup(struct mbuf
*, int, int, enum direction
);
124 static int encap_add(struct encaptab
*);
125 static int encap_remove(struct encaptab
*);
126 static int encap_afcheck(int, const struct sockaddr
*, const struct sockaddr
*);
128 static struct radix_node_head
*encap_rnh(int);
129 static int mask_matchlen(const struct sockaddr
*);
132 static int mask_match(const struct encaptab
*, const struct sockaddr
*,
133 const struct sockaddr
*);
135 static void encap_fillarg(struct mbuf
*, const struct encaptab
*);
137 LIST_HEAD(, encaptab
) encaptab
= LIST_HEAD_INITIALIZER(&encaptab
);
140 extern int max_keylen
; /* radix.c */
141 struct radix_node_head
*encap_head
[2]; /* 0 for AF_INET, 1 for AF_INET6 */
147 static int initialized
= 0;
154 * we cannot use LIST_INIT() here, since drivers may want to call
155 * encap_attach(), on driver attach. encap_init() will be called
156 * on AF_INET{,6} initialization, which happens after driver
157 * initialization - using LIST_INIT() here can nuke encap_attach()
160 LIST_INIT(&encaptab
);
165 * initialize radix lookup table when the radix subsystem is inited.
167 rn_delayedinit((void *)&encap_head
[0],
168 sizeof(struct sockaddr_pack
) << 3);
170 rn_delayedinit((void *)&encap_head
[1],
171 sizeof(struct sockaddr_pack
) << 3);
177 static struct encaptab
*
178 encap4_lookup(struct mbuf
*m
, int off
, int proto
, enum direction dir
)
181 struct ip_pack4 pack
;
182 struct encaptab
*ep
, *match
;
185 struct radix_node_head
*rnh
= encap_rnh(AF_INET
);
186 struct radix_node
*rn
;
190 if (m
->m_len
< sizeof(*ip
))
191 panic("encap4_lookup");
193 ip
= mtod(m
, struct ip
*);
195 memset(&pack
, 0, sizeof(pack
));
196 pack
.p
.sp_len
= sizeof(pack
);
197 pack
.mine
.sin_family
= pack
.yours
.sin_family
= AF_INET
;
198 pack
.mine
.sin_len
= pack
.yours
.sin_len
= sizeof(struct sockaddr_in
);
199 if (dir
== INBOUND
) {
200 pack
.mine
.sin_addr
= ip
->ip_dst
;
201 pack
.yours
.sin_addr
= ip
->ip_src
;
203 pack
.mine
.sin_addr
= ip
->ip_src
;
204 pack
.yours
.sin_addr
= ip
->ip_dst
;
211 rn
= rnh
->rnh_matchaddr((void *)&pack
, rnh
);
212 if (rn
&& (rn
->rn_flags
& RNF_ROOT
) == 0) {
213 match
= (struct encaptab
*)rn
;
214 matchprio
= mask_matchlen(match
->srcmask
) +
215 mask_matchlen(match
->dstmask
);
219 LIST_FOREACH(ep
, &encaptab
, chain
) {
220 if (ep
->af
!= AF_INET
)
222 if (ep
->proto
>= 0 && ep
->proto
!= proto
)
225 prio
= (*ep
->func
)(m
, off
, proto
, ep
->arg
);
230 prio
= mask_match(ep
, (struct sockaddr
*)&pack
.mine
,
231 (struct sockaddr
*)&pack
.yours
);
236 * We prioritize the matches by using bit length of the
237 * matches. mask_match() and user-supplied matching function
238 * should return the bit length of the matches (for example,
239 * if both src/dst are matched for IPv4, 64 should be returned).
240 * 0 or negative return value means "it did not match".
242 * The question is, since we have two "mask" portion, we
243 * cannot really define total order between entries.
244 * For example, which of these should be preferred?
245 * mask_match() returns 48 (32 + 16) for both of them.
246 * src=3ffe::/16, dst=3ffe:501::/32
247 * src=3ffe:501::/32, dst=3ffe::/16
249 * We need to loop through all the possible candidates
250 * to get the best match - the search takes O(n) for
251 * n attachments (i.e. interfaces).
253 * For radix-based lookup, I guess source takes precedence.
254 * See rn_{refines,lexobetter} for the correct answer.
258 if (prio
> matchprio
) {
270 encap4_input(struct mbuf
*m
, ...)
274 const struct protosw
*psw
;
275 struct encaptab
*match
;
278 off
= va_arg(ap
, int);
279 proto
= va_arg(ap
, int);
282 match
= encap4_lookup(m
, off
, proto
, INBOUND
);
285 /* found a match, "match" has the best one */
287 if (psw
&& psw
->pr_input
) {
288 encap_fillarg(m
, match
);
289 (*psw
->pr_input
)(m
, off
, proto
);
295 /* last resort: inject to raw socket */
296 rip_input(m
, off
, proto
);
301 static struct encaptab
*
302 encap6_lookup(struct mbuf
*m
, int off
, int proto
, enum direction dir
)
305 struct ip_pack6 pack
;
307 struct encaptab
*ep
, *match
;
309 struct radix_node_head
*rnh
= encap_rnh(AF_INET6
);
310 struct radix_node
*rn
;
314 if (m
->m_len
< sizeof(*ip6
))
315 panic("encap6_lookup");
317 ip6
= mtod(m
, struct ip6_hdr
*);
319 memset(&pack
, 0, sizeof(pack
));
320 pack
.p
.sp_len
= sizeof(pack
);
321 pack
.mine
.sin6_family
= pack
.yours
.sin6_family
= AF_INET6
;
322 pack
.mine
.sin6_len
= pack
.yours
.sin6_len
= sizeof(struct sockaddr_in6
);
323 if (dir
== INBOUND
) {
324 pack
.mine
.sin6_addr
= ip6
->ip6_dst
;
325 pack
.yours
.sin6_addr
= ip6
->ip6_src
;
327 pack
.mine
.sin6_addr
= ip6
->ip6_src
;
328 pack
.yours
.sin6_addr
= ip6
->ip6_dst
;
335 rn
= rnh
->rnh_matchaddr((void *)&pack
, rnh
);
336 if (rn
&& (rn
->rn_flags
& RNF_ROOT
) == 0) {
337 match
= (struct encaptab
*)rn
;
338 matchprio
= mask_matchlen(match
->srcmask
) +
339 mask_matchlen(match
->dstmask
);
343 LIST_FOREACH(ep
, &encaptab
, chain
) {
344 if (ep
->af
!= AF_INET6
)
346 if (ep
->proto
>= 0 && ep
->proto
!= proto
)
349 prio
= (*ep
->func
)(m
, off
, proto
, ep
->arg
);
354 prio
= mask_match(ep
, (struct sockaddr
*)&pack
.mine
,
355 (struct sockaddr
*)&pack
.yours
);
359 /* see encap4_lookup() for issues here */
362 if (prio
> matchprio
) {
374 encap6_input(struct mbuf
**mp
, int *offp
, int proto
)
376 struct mbuf
*m
= *mp
;
377 const struct ip6protosw
*psw
;
378 struct encaptab
*match
;
380 match
= encap6_lookup(m
, *offp
, proto
, INBOUND
);
384 psw
= (const struct ip6protosw
*)match
->psw
;
385 if (psw
&& psw
->pr_input
) {
386 encap_fillarg(m
, match
);
387 return (*psw
->pr_input
)(mp
, offp
, proto
);
394 /* last resort: inject to raw socket */
395 return rip6_input(mp
, offp
, proto
);
400 encap_add(struct encaptab
*ep
)
403 struct radix_node_head
*rnh
= encap_rnh(ep
->af
);
407 LIST_INSERT_HEAD(&encaptab
, ep
, chain
);
409 if (!ep
->func
&& rnh
) {
410 if (!rnh
->rnh_addaddr((void *)ep
->addrpack
,
411 (void *)ep
->maskpack
, rnh
, ep
->nodes
)) {
420 LIST_REMOVE(ep
, chain
);
425 encap_remove(struct encaptab
*ep
)
428 struct radix_node_head
*rnh
= encap_rnh(ep
->af
);
432 LIST_REMOVE(ep
, chain
);
434 if (!ep
->func
&& rnh
) {
435 if (!rnh
->rnh_deladdr((void *)ep
->addrpack
,
436 (void *)ep
->maskpack
, rnh
))
444 encap_afcheck(int af
, const struct sockaddr
*sp
, const struct sockaddr
*dp
)
447 if (sp
->sa_len
!= dp
->sa_len
)
449 if (af
!= sp
->sa_family
|| af
!= dp
->sa_family
)
451 } else if (!sp
&& !dp
)
458 if (sp
&& sp
->sa_len
!= sizeof(struct sockaddr_in
))
460 if (dp
&& dp
->sa_len
!= sizeof(struct sockaddr_in
))
465 if (sp
&& sp
->sa_len
!= sizeof(struct sockaddr_in6
))
467 if (dp
&& dp
->sa_len
!= sizeof(struct sockaddr_in6
))
479 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
480 * length of mask (sm and dm) is assumed to be same as sp/dp.
481 * Return value will be necessary as input (cookie) for encap_detach().
483 const struct encaptab
*
484 encap_attach(int af
, int proto
,
485 const struct sockaddr
*sp
, const struct sockaddr
*sm
,
486 const struct sockaddr
*dp
, const struct sockaddr
*dm
,
487 const struct protosw
*psw
, void *arg
)
493 struct ip_pack4
*pack4
;
495 struct ip_pack6
*pack6
;
499 /* sanity check on args */
500 error
= encap_afcheck(af
, sp
, dp
);
504 /* check if anyone have already attached with exactly same config */
505 LIST_FOREACH(ep
, &encaptab
, chain
) {
508 if (ep
->proto
!= proto
)
513 if (!ep
->src
|| !ep
->dst
|| !ep
->srcmask
|| !ep
->dstmask
)
514 panic("null pointers in encaptab");
516 if (ep
->src
->sa_len
!= sp
->sa_len
||
517 memcmp(ep
->src
, sp
, sp
->sa_len
) != 0 ||
518 memcmp(ep
->srcmask
, sm
, sp
->sa_len
) != 0)
520 if (ep
->dst
->sa_len
!= dp
->sa_len
||
521 memcmp(ep
->dst
, dp
, dp
->sa_len
) != 0 ||
522 memcmp(ep
->dstmask
, dm
, dp
->sa_len
) != 0)
543 ep
= malloc(sizeof(*ep
), M_NETADDR
, M_NOWAIT
|M_ZERO
);
548 ep
->addrpack
= malloc(l
, M_NETADDR
, M_NOWAIT
|M_ZERO
);
549 if (ep
->addrpack
== NULL
) {
553 ep
->maskpack
= malloc(l
, M_NETADDR
, M_NOWAIT
|M_ZERO
);
554 if (ep
->maskpack
== NULL
) {
561 ep
->addrpack
->sa_len
= l
& 0xff;
562 ep
->maskpack
->sa_len
= l
& 0xff;
565 pack4
= (struct ip_pack4
*)ep
->addrpack
;
566 ep
->src
= (struct sockaddr
*)&pack4
->mine
;
567 ep
->dst
= (struct sockaddr
*)&pack4
->yours
;
568 pack4
= (struct ip_pack4
*)ep
->maskpack
;
569 ep
->srcmask
= (struct sockaddr
*)&pack4
->mine
;
570 ep
->dstmask
= (struct sockaddr
*)&pack4
->yours
;
574 pack6
= (struct ip_pack6
*)ep
->addrpack
;
575 ep
->src
= (struct sockaddr
*)&pack6
->mine
;
576 ep
->dst
= (struct sockaddr
*)&pack6
->yours
;
577 pack6
= (struct ip_pack6
*)ep
->maskpack
;
578 ep
->srcmask
= (struct sockaddr
*)&pack6
->mine
;
579 ep
->dstmask
= (struct sockaddr
*)&pack6
->yours
;
584 memcpy(ep
->src
, sp
, sp
->sa_len
);
585 memcpy(ep
->srcmask
, sm
, sp
->sa_len
);
586 memcpy(ep
->dst
, dp
, dp
->sa_len
);
587 memcpy(ep
->dstmask
, dm
, dp
->sa_len
);
591 error
= encap_add(ep
);
601 free(ep
->addrpack
, M_NETADDR
);
603 free(ep
->maskpack
, M_NETADDR
);
611 const struct encaptab
*
612 encap_attach_func(int af
, int proto
,
613 int (*func
)(struct mbuf
*, int, int, void *),
614 const struct protosw
*psw
, void *arg
)
621 /* sanity check on args */
627 error
= encap_afcheck(af
, NULL
, NULL
);
631 ep
= malloc(sizeof(*ep
), M_NETADDR
, M_NOWAIT
); /*XXX*/
636 memset(ep
, 0, sizeof(*ep
));
644 error
= encap_add(ep
);
657 /* XXX encap4_ctlinput() is necessary if we set DF=1 on outer IPv4 header */
661 encap6_ctlinput(int cmd
, const struct sockaddr
*sa
, void *d0
)
667 struct ip6ctlparam
*ip6cp
= NULL
;
670 const struct ip6protosw
*psw
;
672 if (sa
->sa_family
!= AF_INET6
||
673 sa
->sa_len
!= sizeof(struct sockaddr_in6
))
676 if ((unsigned)cmd
>= PRC_NCMDS
)
678 if (cmd
== PRC_HOSTDEAD
)
680 else if (cmd
== PRC_MSGSIZE
)
681 ; /* special code is present, see below */
682 else if (inet6ctlerrmap
[cmd
] == 0)
685 /* if the parameter is from icmp6, decode it. */
687 ip6cp
= (struct ip6ctlparam
*)d
;
689 ip6
= ip6cp
->ip6c_ip6
;
690 off
= ip6cp
->ip6c_off
;
691 nxt
= ip6cp
->ip6c_nxt
;
693 if (ip6
&& cmd
== PRC_MSGSIZE
) {
695 struct encaptab
*match
;
698 * Check to see if we have a valid encap configuration.
700 match
= encap6_lookup(m
, off
, nxt
, OUTBOUND
);
705 * Depending on the value of "valid" and routing table
706 * size (mtudisc_{hi,lo}wat), we will:
707 * - recalcurate the new MTU and create the
708 * corresponding routing entry, or
709 * - ignore the MTU change notification.
711 icmp6_mtudisc_update((struct ip6ctlparam
*)d
, valid
);
719 /* inform all listeners */
720 LIST_FOREACH(ep
, &encaptab
, chain
) {
721 if (ep
->af
!= AF_INET6
)
723 if (ep
->proto
>= 0 && ep
->proto
!= nxt
)
726 /* should optimize by looking at address pairs */
728 /* XXX need to pass ep->arg or ep itself to listeners */
729 psw
= (const struct ip6protosw
*)ep
->psw
;
730 if (psw
&& psw
->pr_ctlinput
)
731 (*psw
->pr_ctlinput
)(cmd
, sa
, d
);
734 rip6_ctlinput(cmd
, sa
, d0
);
740 encap_detach(const struct encaptab
*cookie
)
742 const struct encaptab
*ep
= cookie
;
746 LIST_FOREACH(p
, &encaptab
, chain
) {
748 error
= encap_remove(p
);
752 free(p
->addrpack
, M_NETADDR
);
753 free(p
->maskpack
, M_NETADDR
);
755 free(p
, M_NETADDR
); /*XXX*/
764 static struct radix_node_head
*
770 return encap_head
[0];
773 return encap_head
[1];
781 mask_matchlen(const struct sockaddr
*sa
)
786 p
= (const char *)sa
;
788 p
+= 2; /* sa_len + sa_family */
792 l
+= (*p
? 8 : 0); /* estimate */
801 mask_match(const struct encaptab
*ep
,
802 const struct sockaddr
*sp
,
803 const struct sockaddr
*dp
)
805 struct sockaddr_storage s
;
806 struct sockaddr_storage d
;
808 const u_int8_t
*p
, *q
;
814 panic("wrong encaptab passed to mask_match");
816 if (sp
->sa_len
> sizeof(s
) || dp
->sa_len
> sizeof(d
))
818 if (sp
->sa_family
!= ep
->af
|| dp
->sa_family
!= ep
->af
)
820 if (sp
->sa_len
!= ep
->src
->sa_len
|| dp
->sa_len
!= ep
->dst
->sa_len
)
825 p
= (const u_int8_t
*)sp
;
826 q
= (const u_int8_t
*)ep
->srcmask
;
828 for (i
= 0 ; i
< sp
->sa_len
; i
++) {
831 matchlen
+= (q
[i
] ? 8 : 0);
834 p
= (const u_int8_t
*)dp
;
835 q
= (const u_int8_t
*)ep
->dstmask
;
837 for (i
= 0 ; i
< dp
->sa_len
; i
++) {
839 /* XXX rough estimate */
840 matchlen
+= (q
[i
] ? 8 : 0);
843 /* need to overwrite len/family portion as we don't compare them */
844 s
.ss_len
= sp
->sa_len
;
845 s
.ss_family
= sp
->sa_family
;
846 d
.ss_len
= dp
->sa_len
;
847 d
.ss_family
= dp
->sa_family
;
849 if (memcmp(&s
, ep
->src
, ep
->src
->sa_len
) == 0 &&
850 memcmp(&d
, ep
->dst
, ep
->dst
->sa_len
) == 0) {
858 encap_fillarg(struct mbuf
*m
, const struct encaptab
*ep
)
862 mtag
= m_tag_get(PACKET_TAG_ENCAP
, sizeof(void *), M_NOWAIT
);
864 *(void **)(mtag
+ 1) = ep
->arg
;
865 m_tag_prepend(m
, mtag
);
870 encap_getarg(struct mbuf
*m
)
876 mtag
= m_tag_find(m
, PACKET_TAG_ENCAP
, NULL
);
878 p
= *(void **)(mtag
+ 1);
879 m_tag_delete(m
, mtag
);