2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
35 #include <sys/cdefs.h>
37 #include "opt_inet6.h"
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
43 #include <sys/malloc.h>
45 #include <sys/module.h>
46 #include <sys/rmlock.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
50 #include <sys/errno.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
57 #include <machine/cpu.h>
60 #include <net/if_var.h>
61 #include <net/if_private.h>
62 #include <net/if_clone.h>
63 #include <net/if_types.h>
64 #include <net/netisr.h>
65 #include <net/route.h>
69 #include <netinet/in.h>
70 #include <netinet/in_systm.h>
71 #include <netinet/ip.h>
72 #include <netinet/ip_ecn.h>
74 #include <netinet/in_var.h>
75 #include <netinet/ip_var.h>
80 #include <netinet/in.h>
82 #include <netinet6/in6_var.h>
83 #include <netinet/ip6.h>
84 #include <netinet6/ip6_ecn.h>
85 #include <netinet6/ip6_var.h>
88 #include <netinet/ip_encap.h>
89 #include <net/ethernet.h>
90 #include <net/if_bridgevar.h>
91 #include <net/if_gif.h>
93 #include <security/mac/mac_framework.h>
95 static const char gifname
[] = "gif";
97 MALLOC_DEFINE(M_GIF
, "gif", "Generic Tunnel Interface");
98 static struct sx gif_ioctl_sx
;
99 SX_SYSINIT(gif_ioctl_sx
, &gif_ioctl_sx
, "gif_ioctl");
101 void (*ng_gif_input_p
)(struct ifnet
*ifp
, struct mbuf
**mp
, int af
);
102 void (*ng_gif_input_orphan_p
)(struct ifnet
*ifp
, struct mbuf
*m
, int af
);
103 void (*ng_gif_attach_p
)(struct ifnet
*ifp
);
104 void (*ng_gif_detach_p
)(struct ifnet
*ifp
);
107 static void gif_reassign(struct ifnet
*, struct vnet
*, char *);
109 static void gif_delete_tunnel(struct gif_softc
*);
110 static int gif_ioctl(struct ifnet
*, u_long
, caddr_t
);
111 static int gif_transmit(struct ifnet
*, struct mbuf
*);
112 static void gif_qflush(struct ifnet
*);
113 static int gif_clone_create(struct if_clone
*, int, caddr_t
);
114 static void gif_clone_destroy(struct ifnet
*);
115 VNET_DEFINE_STATIC(struct if_clone
*, gif_cloner
);
116 #define V_gif_cloner VNET(gif_cloner)
118 SYSCTL_DECL(_net_link
);
119 static SYSCTL_NODE(_net_link
, IFT_GIF
, gif
, CTLFLAG_RW
| CTLFLAG_MPSAFE
, 0,
120 "Generic Tunnel Interface");
123 * This macro controls the default upper limitation on nesting of gif tunnels.
124 * Since, setting a large value to this macro with a careless configuration
125 * may introduce system crash, we don't allow any nestings by default.
126 * If you need to configure nested gif tunnels, you can define this macro
127 * in your kernel configuration file. However, if you do so, please be
128 * careful to configure the tunnels so that it won't make a loop.
130 #define MAX_GIF_NEST 1
132 VNET_DEFINE_STATIC(int, max_gif_nesting
) = MAX_GIF_NEST
;
133 #define V_max_gif_nesting VNET(max_gif_nesting)
134 SYSCTL_INT(_net_link_gif
, OID_AUTO
, max_nesting
, CTLFLAG_VNET
| CTLFLAG_RW
,
135 &VNET_NAME(max_gif_nesting
), 0, "Max nested tunnels");
138 gif_clone_create(struct if_clone
*ifc
, int unit
, caddr_t params
)
140 struct gif_softc
*sc
;
142 sc
= malloc(sizeof(struct gif_softc
), M_GIF
, M_WAITOK
| M_ZERO
);
143 sc
->gif_fibnum
= curthread
->td_proc
->p_fibnum
;
144 GIF2IFP(sc
) = if_alloc(IFT_GIF
);
145 GIF2IFP(sc
)->if_softc
= sc
;
146 if_initname(GIF2IFP(sc
), gifname
, unit
);
148 GIF2IFP(sc
)->if_addrlen
= 0;
149 GIF2IFP(sc
)->if_mtu
= GIF_MTU
;
150 GIF2IFP(sc
)->if_flags
= IFF_POINTOPOINT
| IFF_MULTICAST
;
151 GIF2IFP(sc
)->if_ioctl
= gif_ioctl
;
152 GIF2IFP(sc
)->if_transmit
= gif_transmit
;
153 GIF2IFP(sc
)->if_qflush
= gif_qflush
;
154 GIF2IFP(sc
)->if_output
= gif_output
;
156 GIF2IFP(sc
)->if_reassign
= gif_reassign
;
158 GIF2IFP(sc
)->if_capabilities
|= IFCAP_LINKSTATE
;
159 GIF2IFP(sc
)->if_capenable
|= IFCAP_LINKSTATE
;
160 if_attach(GIF2IFP(sc
));
161 bpfattach(GIF2IFP(sc
), DLT_NULL
, sizeof(u_int32_t
));
162 if (ng_gif_attach_p
!= NULL
)
163 (*ng_gif_attach_p
)(GIF2IFP(sc
));
170 gif_reassign(struct ifnet
*ifp
, struct vnet
*new_vnet __unused
,
171 char *unused __unused
)
173 struct gif_softc
*sc
;
175 sx_xlock(&gif_ioctl_sx
);
178 gif_delete_tunnel(sc
);
179 sx_xunlock(&gif_ioctl_sx
);
184 gif_clone_destroy(struct ifnet
*ifp
)
186 struct gif_softc
*sc
;
188 sx_xlock(&gif_ioctl_sx
);
190 gif_delete_tunnel(sc
);
191 if (ng_gif_detach_p
!= NULL
)
192 (*ng_gif_detach_p
)(ifp
);
195 ifp
->if_softc
= NULL
;
196 sx_xunlock(&gif_ioctl_sx
);
204 vnet_gif_init(const void *unused __unused
)
207 V_gif_cloner
= if_clone_simple(gifname
, gif_clone_create
,
208 gif_clone_destroy
, 0);
216 VNET_SYSINIT(vnet_gif_init
, SI_SUB_PSEUDO
, SI_ORDER_ANY
,
217 vnet_gif_init
, NULL
);
220 vnet_gif_uninit(const void *unused __unused
)
223 if_clone_detach(V_gif_cloner
);
231 VNET_SYSUNINIT(vnet_gif_uninit
, SI_SUB_PSEUDO
, SI_ORDER_ANY
,
232 vnet_gif_uninit
, NULL
);
235 gifmodevent(module_t mod
, int type
, void *data
)
248 static moduledata_t gif_mod
= {
254 DECLARE_MODULE(if_gif
, gif_mod
, SI_SUB_PSEUDO
, SI_ORDER_ANY
);
255 MODULE_VERSION(if_gif
, 1);
260 struct gif_list
*hash
;
263 hash
= malloc(sizeof(struct gif_list
) * GIF_HASH_SIZE
,
265 for (i
= 0; i
< GIF_HASH_SIZE
; i
++)
266 CK_LIST_INIT(&hash
[i
]);
272 gif_hashdestroy(struct gif_list
*hash
)
278 #define MTAG_GIF 1080679712
280 gif_transmit(struct ifnet
*ifp
, struct mbuf
*m
)
282 struct gif_softc
*sc
;
283 struct etherip_header
*eth
;
297 error
= mac_ifnet_check_transmit(ifp
, m
);
305 if ((ifp
->if_flags
& IFF_MONITOR
) != 0 ||
306 (ifp
->if_flags
& IFF_UP
) == 0 ||
307 (ifp
->if_drv_flags
& IFF_DRV_RUNNING
) == 0 ||
308 sc
->gif_family
== 0 ||
309 (error
= if_tunnel_check_nesting(ifp
, m
, MTAG_GIF
,
310 V_max_gif_nesting
)) != 0) {
314 /* Now pull back the af that we stashed in the csum_data. */
318 af
= m
->m_pkthdr
.csum_data
;
319 m
->m_flags
&= ~(M_BCAST
|M_MCAST
);
320 M_SETFIB(m
, sc
->gif_fibnum
);
321 BPF_MTAP2(ifp
, &af
, sizeof(af
), m
);
322 if_inc_counter(ifp
, IFCOUNTER_OPACKETS
, 1);
323 if_inc_counter(ifp
, IFCOUNTER_OBYTES
, m
->m_pkthdr
.len
);
324 /* inner AF-specific encapsulation */
329 proto
= IPPROTO_IPV4
;
330 if (m
->m_len
< sizeof(struct ip
))
331 m
= m_pullup(m
, sizeof(struct ip
));
336 ip
= mtod(m
, struct ip
*);
337 ip_ecn_ingress((ifp
->if_flags
& IFF_LINK1
) ? ECN_ALLOWED
:
338 ECN_NOCARE
, &ecn
, &ip
->ip_tos
);
343 proto
= IPPROTO_IPV6
;
344 if (m
->m_len
< sizeof(struct ip6_hdr
))
345 m
= m_pullup(m
, sizeof(struct ip6_hdr
));
351 ip6
= mtod(m
, struct ip6_hdr
*);
352 ip6_ecn_ingress((ifp
->if_flags
& IFF_LINK1
) ? ECN_ALLOWED
:
353 ECN_NOCARE
, &t
, &ip6
->ip6_flow
);
354 ecn
= (ntohl(t
) >> 20) & 0xff;
358 proto
= IPPROTO_ETHERIP
;
359 M_PREPEND(m
, sizeof(struct etherip_header
), M_NOWAIT
);
364 eth
= mtod(m
, struct etherip_header
*);
366 eth
->eip_ver
= ETHERIP_VERSION
;
370 error
= EAFNOSUPPORT
;
374 /* XXX should we check if our outer source is legal? */
375 /* dispatch to output logic based on outer AF */
376 switch (sc
->gif_family
) {
379 error
= in_gif_output(ifp
, m
, proto
, ecn
);
384 error
= in6_gif_output(ifp
, m
, proto
, ecn
);
392 if_inc_counter(ifp
, IFCOUNTER_OERRORS
, 1);
397 gif_qflush(struct ifnet
*ifp __unused
)
403 gif_output(struct ifnet
*ifp
, struct mbuf
*m
, const struct sockaddr
*dst
,
408 KASSERT(ifp
->if_bridge
== NULL
,
409 ("%s: unexpectedly called with bridge attached", __func__
));
411 /* BPF writes need to be handled specially. */
412 if (dst
->sa_family
== AF_UNSPEC
|| dst
->sa_family
== pseudo_AF_HDRCMPLT
)
413 memcpy(&af
, dst
->sa_data
, sizeof(af
));
415 af
= RO_GET_FAMILY(ro
, dst
);
417 * Now save the af in the inbound pkt csum data, this is a cheat since
418 * we are using the inbound csum_data field to carry the af over to
419 * the gif_transmit() routine, avoiding using yet another mtag.
421 m
->m_pkthdr
.csum_data
= af
;
422 return (ifp
->if_transmit(ifp
, m
));
426 gif_input(struct mbuf
*m
, struct ifnet
*ifp
, int proto
, uint8_t ecn
)
428 struct etherip_header
*eip
;
436 struct ether_header
*eh
;
437 struct ifnet
*oldifp
;
447 m
->m_pkthdr
.rcvif
= ifp
;
453 if (m
->m_len
< sizeof(struct ip
))
454 m
= m_pullup(m
, sizeof(struct ip
));
457 ip
= mtod(m
, struct ip
*);
458 if (ip_ecn_egress((ifp
->if_flags
& IFF_LINK1
) ? ECN_ALLOWED
:
459 ECN_NOCARE
, &ecn
, &ip
->ip_tos
) == 0) {
468 if (m
->m_len
< sizeof(struct ip6_hdr
))
469 m
= m_pullup(m
, sizeof(struct ip6_hdr
));
472 t
= htonl((uint32_t)ecn
<< 20);
473 ip6
= mtod(m
, struct ip6_hdr
*);
474 if (ip6_ecn_egress((ifp
->if_flags
& IFF_LINK1
) ? ECN_ALLOWED
:
475 ECN_NOCARE
, &t
, &ip6
->ip6_flow
) == 0) {
481 case IPPROTO_ETHERIP
:
490 mac_ifnet_create_mbuf(ifp
, m
);
493 if (bpf_peers_present(ifp
->if_bpf
)) {
495 bpf_mtap2(ifp
->if_bpf
, &af1
, sizeof(af1
), m
);
498 if ((ifp
->if_flags
& IFF_MONITOR
) != 0) {
499 if_inc_counter(ifp
, IFCOUNTER_IPACKETS
, 1);
500 if_inc_counter(ifp
, IFCOUNTER_IBYTES
, m
->m_pkthdr
.len
);
505 if (ng_gif_input_p
!= NULL
) {
506 (*ng_gif_input_p
)(ifp
, &m
, af
);
512 * Put the packet to the network layer input queue according to the
513 * specified address family.
514 * Note: older versions of gif_input directly called network layer
515 * input functions, e.g. ip6_input, here. We changed the policy to
516 * prevent too many recursive calls of such input functions, which
517 * might cause kernel panic. But the change may introduce another
518 * problem; if the input queue is full, packets are discarded.
519 * The kernel stack overflow really happened, and we believed
520 * queue-full rarely occurs, so we changed the policy.
534 n
= sizeof(struct etherip_header
) +
535 sizeof(struct ether_header
);
540 eip
= mtod(m
, struct etherip_header
*);
541 if (eip
->eip_ver
!= ETHERIP_VERSION
) {
542 /* discard unknown versions */
547 m_adj_decap(m
, sizeof(struct etherip_header
));
549 m
->m_flags
&= ~(M_BCAST
|M_MCAST
);
550 m
->m_pkthdr
.rcvif
= ifp
;
552 if (ifp
->if_bridge
) {
554 eh
= mtod(m
, struct ether_header
*);
555 if (ETHER_IS_MULTICAST(eh
->ether_dhost
)) {
556 if (ETHER_IS_BROADCAST(eh
->ether_dhost
))
557 m
->m_flags
|= M_BCAST
;
559 m
->m_flags
|= M_MCAST
;
560 if_inc_counter(ifp
, IFCOUNTER_IMCASTS
, 1);
562 BRIDGE_INPUT(ifp
, m
);
564 if (m
!= NULL
&& ifp
!= oldifp
) {
566 * The bridge gave us back itself or one of the
567 * members for which the frame is addressed.
578 if (ng_gif_input_orphan_p
!= NULL
)
579 (*ng_gif_input_orphan_p
)(ifp
, m
, af
);
585 if_inc_counter(ifp
, IFCOUNTER_IPACKETS
, 1);
586 if_inc_counter(ifp
, IFCOUNTER_IBYTES
, m
->m_pkthdr
.len
);
587 M_SETFIB(m
, ifp
->if_fib
);
588 netisr_dispatch(isr
, m
);
591 if_inc_counter(ifp
, IFCOUNTER_IERRORS
, 1);
595 gif_ioctl(struct ifnet
*ifp
, u_long cmd
, caddr_t data
)
597 struct ifreq
*ifr
= (struct ifreq
*)data
;
598 struct gif_softc
*sc
;
604 ifp
->if_flags
|= IFF_UP
;
611 if (ifr
->ifr_mtu
< GIF_MTU_MIN
||
612 ifr
->ifr_mtu
> GIF_MTU_MAX
)
615 ifp
->if_mtu
= ifr
->ifr_mtu
;
618 sx_xlock(&gif_ioctl_sx
);
627 if (sc
->gif_family
== 0)
629 gif_delete_tunnel(sc
);
633 case SIOCGIFPSRCADDR
:
634 case SIOCGIFPDSTADDR
:
635 error
= in_gif_ioctl(sc
, cmd
, data
);
639 case SIOCSIFPHYADDR_IN6
:
640 case SIOCGIFPSRCADDR_IN6
:
641 case SIOCGIFPDSTADDR_IN6
:
642 error
= in6_gif_ioctl(sc
, cmd
, data
);
646 ifr
->ifr_fib
= sc
->gif_fibnum
;
649 if ((error
= priv_check(curthread
, PRIV_NET_GIF
)) != 0)
651 if (ifr
->ifr_fib
>= rt_numfibs
)
654 sc
->gif_fibnum
= ifr
->ifr_fib
;
657 options
= sc
->gif_options
;
658 error
= copyout(&options
, ifr_data_get_ptr(ifr
),
662 if ((error
= priv_check(curthread
, PRIV_NET_GIF
)) != 0)
664 error
= copyin(ifr_data_get_ptr(ifr
), &options
,
668 if (options
& ~GIF_OPTMASK
) {
672 if (sc
->gif_options
!= options
) {
673 switch (sc
->gif_family
) {
676 error
= in_gif_setopts(sc
, options
);
681 error
= in6_gif_setopts(sc
, options
);
685 /* No need to invoke AF-handler */
686 sc
->gif_options
= options
;
694 if (error
== 0 && sc
->gif_family
!= 0) {
697 cmd
== SIOCSIFPHYADDR
||
700 cmd
== SIOCSIFPHYADDR_IN6
||
703 if_link_state_change(ifp
, LINK_STATE_UP
);
707 sx_xunlock(&gif_ioctl_sx
);
712 gif_delete_tunnel(struct gif_softc
*sc
)
715 sx_assert(&gif_ioctl_sx
, SA_XLOCKED
);
716 if (sc
->gif_family
!= 0) {
717 CK_LIST_REMOVE(sc
, srchash
);
718 CK_LIST_REMOVE(sc
, chain
);
719 /* Wait until it become safe to free gif_hdr */
721 free(sc
->gif_hdr
, M_GIF
);
724 GIF2IFP(sc
)->if_drv_flags
&= ~IFF_DRV_RUNNING
;
725 if_link_state_change(GIF2IFP(sc
), LINK_STATE_DOWN
);