1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Linux NET3: IP/IP protocol decoder modified to support
4 * virtual tunnel interface
7 * Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
11 This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
13 For comments look at net/ipv4/ip_gre.c --ANK
17 #include <linux/capability.h>
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/uaccess.h>
22 #include <linux/skbuff.h>
23 #include <linux/netdevice.h>
25 #include <linux/tcp.h>
26 #include <linux/udp.h>
27 #include <linux/if_arp.h>
28 #include <linux/init.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/if_ether.h>
31 #include <linux/icmpv6.h>
36 #include <net/ip_tunnels.h>
37 #include <net/inet_ecn.h>
39 #include <net/net_namespace.h>
40 #include <net/netns/generic.h>
42 static struct rtnl_link_ops vti_link_ops __read_mostly
;
44 static unsigned int vti_net_id __read_mostly
;
45 static int vti_tunnel_init(struct net_device
*dev
);
47 static int vti_input(struct sk_buff
*skb
, int nexthdr
, __be32 spi
,
48 int encap_type
, bool update_skb_dev
)
50 struct ip_tunnel
*tunnel
;
51 const struct iphdr
*iph
= ip_hdr(skb
);
52 struct net
*net
= dev_net(skb
->dev
);
53 struct ip_tunnel_net
*itn
= net_generic(net
, vti_net_id
);
55 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
56 iph
->saddr
, iph
->daddr
, 0);
58 if (!xfrm4_policy_check(NULL
, XFRM_POLICY_IN
, skb
))
61 XFRM_TUNNEL_SKB_CB(skb
)->tunnel
.ip4
= tunnel
;
64 skb
->dev
= tunnel
->dev
;
66 return xfrm_input(skb
, nexthdr
, spi
, encap_type
);
75 static int vti_input_proto(struct sk_buff
*skb
, int nexthdr
, __be32 spi
,
78 return vti_input(skb
, nexthdr
, spi
, encap_type
, false);
81 static int vti_rcv(struct sk_buff
*skb
, __be32 spi
, bool update_skb_dev
)
83 XFRM_SPI_SKB_CB(skb
)->family
= AF_INET
;
84 XFRM_SPI_SKB_CB(skb
)->daddroff
= offsetof(struct iphdr
, daddr
);
86 return vti_input(skb
, ip_hdr(skb
)->protocol
, spi
, 0, update_skb_dev
);
89 static int vti_rcv_proto(struct sk_buff
*skb
)
91 return vti_rcv(skb
, 0, false);
94 static int vti_rcv_cb(struct sk_buff
*skb
, int err
)
96 unsigned short family
;
97 struct net_device
*dev
;
99 const struct xfrm_mode
*inner_mode
;
100 struct ip_tunnel
*tunnel
= XFRM_TUNNEL_SKB_CB(skb
)->tunnel
.ip4
;
101 u32 orig_mark
= skb
->mark
;
110 dev
->stats
.rx_errors
++;
111 dev
->stats
.rx_dropped
++;
116 x
= xfrm_input_state(skb
);
118 inner_mode
= &x
->inner_mode
;
120 if (x
->sel
.family
== AF_UNSPEC
) {
121 inner_mode
= xfrm_ip2inner_mode(x
, XFRM_MODE_SKB_CB(skb
)->protocol
);
122 if (inner_mode
== NULL
) {
123 XFRM_INC_STATS(dev_net(skb
->dev
),
124 LINUX_MIB_XFRMINSTATEMODEERROR
);
129 family
= inner_mode
->family
;
131 skb
->mark
= be32_to_cpu(tunnel
->parms
.i_key
);
132 ret
= xfrm_policy_check(NULL
, XFRM_POLICY_IN
, skb
, family
);
133 skb
->mark
= orig_mark
;
138 skb_scrub_packet(skb
, !net_eq(tunnel
->net
, dev_net(skb
->dev
)));
140 dev_sw_netstats_rx_add(dev
, skb
->len
);
145 static bool vti_state_check(const struct xfrm_state
*x
, __be32 dst
, __be32 src
)
147 xfrm_address_t
*daddr
= (xfrm_address_t
*)&dst
;
148 xfrm_address_t
*saddr
= (xfrm_address_t
*)&src
;
150 /* if there is no transform then this tunnel is not functional.
151 * Or if the xfrm is not mode tunnel.
153 if (!x
|| x
->props
.mode
!= XFRM_MODE_TUNNEL
||
154 x
->props
.family
!= AF_INET
)
158 return xfrm_addr_equal(saddr
, &x
->props
.saddr
, AF_INET
);
160 if (!xfrm_state_addr_check(x
, daddr
, saddr
, AF_INET
))
166 static netdev_tx_t
vti_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
169 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
170 struct ip_tunnel_parm
*parms
= &tunnel
->parms
;
171 struct dst_entry
*dst
= skb_dst(skb
);
172 struct net_device
*tdev
; /* Device to other host */
173 int pkt_len
= skb
->len
;
178 switch (skb
->protocol
) {
179 case htons(ETH_P_IP
): {
182 fl
->u
.ip4
.flowi4_oif
= dev
->ifindex
;
183 fl
->u
.ip4
.flowi4_flags
|= FLOWI_FLAG_ANYSRC
;
184 rt
= __ip_route_output_key(dev_net(dev
), &fl
->u
.ip4
);
186 dev
->stats
.tx_carrier_errors
++;
190 skb_dst_set(skb
, dst
);
193 #if IS_ENABLED(CONFIG_IPV6)
194 case htons(ETH_P_IPV6
):
195 fl
->u
.ip6
.flowi6_oif
= dev
->ifindex
;
196 fl
->u
.ip6
.flowi6_flags
|= FLOWI_FLAG_ANYSRC
;
197 dst
= ip6_route_output(dev_net(dev
), NULL
, &fl
->u
.ip6
);
201 dev
->stats
.tx_carrier_errors
++;
204 skb_dst_set(skb
, dst
);
208 dev
->stats
.tx_carrier_errors
++;
214 dst
= xfrm_lookup_route(tunnel
->net
, dst
, fl
, NULL
, 0);
216 dev
->stats
.tx_carrier_errors
++;
220 if (dst
->flags
& DST_XFRM_QUEUE
)
223 if (!vti_state_check(dst
->xfrm
, parms
->iph
.daddr
, parms
->iph
.saddr
)) {
224 dev
->stats
.tx_carrier_errors
++;
233 dev
->stats
.collisions
++;
238 if (skb
->len
> mtu
) {
239 skb_dst_update_pmtu_no_confirm(skb
, mtu
);
240 if (skb
->protocol
== htons(ETH_P_IP
)) {
241 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_FRAG_NEEDED
,
244 if (mtu
< IPV6_MIN_MTU
)
247 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
255 skb_scrub_packet(skb
, !net_eq(tunnel
->net
, dev_net(dev
)));
256 skb_dst_set(skb
, dst
);
257 skb
->dev
= skb_dst(skb
)->dev
;
259 err
= dst_output(tunnel
->net
, skb
->sk
, skb
);
260 if (net_xmit_eval(err
) == 0)
262 iptunnel_xmit_stats(dev
, err
);
266 dst_link_failure(skb
);
268 dev
->stats
.tx_errors
++;
273 /* This function assumes it is being called from dev_queue_xmit()
274 * and that skb is filled properly by that function.
276 static netdev_tx_t
vti_tunnel_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
278 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
281 if (!pskb_inet_may_pull(skb
))
284 memset(&fl
, 0, sizeof(fl
));
286 switch (skb
->protocol
) {
287 case htons(ETH_P_IP
):
288 xfrm_decode_session(skb
, &fl
, AF_INET
);
289 memset(IPCB(skb
), 0, sizeof(*IPCB(skb
)));
291 case htons(ETH_P_IPV6
):
292 xfrm_decode_session(skb
, &fl
, AF_INET6
);
293 memset(IP6CB(skb
), 0, sizeof(*IP6CB(skb
)));
299 /* override mark with tunnel output key */
300 fl
.flowi_mark
= be32_to_cpu(tunnel
->parms
.o_key
);
302 return vti_xmit(skb
, dev
, &fl
);
305 dev
->stats
.tx_errors
++;
310 static int vti4_err(struct sk_buff
*skb
, u32 info
)
314 struct xfrm_state
*x
;
315 struct ip_tunnel
*tunnel
;
316 struct ip_esp_hdr
*esph
;
317 struct ip_auth_hdr
*ah
;
318 struct ip_comp_hdr
*ipch
;
319 struct net
*net
= dev_net(skb
->dev
);
320 const struct iphdr
*iph
= (const struct iphdr
*)skb
->data
;
321 int protocol
= iph
->protocol
;
322 struct ip_tunnel_net
*itn
= net_generic(net
, vti_net_id
);
324 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
325 iph
->daddr
, iph
->saddr
, 0);
329 mark
= be32_to_cpu(tunnel
->parms
.o_key
);
333 esph
= (struct ip_esp_hdr
*)(skb
->data
+(iph
->ihl
<<2));
337 ah
= (struct ip_auth_hdr
*)(skb
->data
+(iph
->ihl
<<2));
341 ipch
= (struct ip_comp_hdr
*)(skb
->data
+(iph
->ihl
<<2));
342 spi
= htonl(ntohs(ipch
->cpi
));
348 switch (icmp_hdr(skb
)->type
) {
349 case ICMP_DEST_UNREACH
:
350 if (icmp_hdr(skb
)->code
!= ICMP_FRAG_NEEDED
)
358 x
= xfrm_state_lookup(net
, mark
, (const xfrm_address_t
*)&iph
->daddr
,
359 spi
, protocol
, AF_INET
);
363 if (icmp_hdr(skb
)->type
== ICMP_DEST_UNREACH
)
364 ipv4_update_pmtu(skb
, net
, info
, 0, protocol
);
366 ipv4_redirect(skb
, net
, 0, protocol
);
373 vti_tunnel_ctl(struct net_device
*dev
, struct ip_tunnel_parm
*p
, int cmd
)
377 if (cmd
== SIOCADDTUNNEL
|| cmd
== SIOCCHGTUNNEL
) {
378 if (p
->iph
.version
!= 4 || p
->iph
.protocol
!= IPPROTO_IPIP
||
383 if (!(p
->i_flags
& GRE_KEY
))
385 if (!(p
->o_flags
& GRE_KEY
))
388 p
->i_flags
= VTI_ISVTI
;
390 err
= ip_tunnel_ctl(dev
, p
, cmd
);
394 if (cmd
!= SIOCDELTUNNEL
) {
395 p
->i_flags
|= GRE_KEY
;
396 p
->o_flags
|= GRE_KEY
;
401 static const struct net_device_ops vti_netdev_ops
= {
402 .ndo_init
= vti_tunnel_init
,
403 .ndo_uninit
= ip_tunnel_uninit
,
404 .ndo_start_xmit
= vti_tunnel_xmit
,
405 .ndo_do_ioctl
= ip_tunnel_ioctl
,
406 .ndo_change_mtu
= ip_tunnel_change_mtu
,
407 .ndo_get_stats64
= dev_get_tstats64
,
408 .ndo_get_iflink
= ip_tunnel_get_iflink
,
409 .ndo_tunnel_ctl
= vti_tunnel_ctl
,
412 static void vti_tunnel_setup(struct net_device
*dev
)
414 dev
->netdev_ops
= &vti_netdev_ops
;
415 dev
->header_ops
= &ip_tunnel_header_ops
;
416 dev
->type
= ARPHRD_TUNNEL
;
417 ip_tunnel_setup(dev
, vti_net_id
);
420 static int vti_tunnel_init(struct net_device
*dev
)
422 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
423 struct iphdr
*iph
= &tunnel
->parms
.iph
;
425 memcpy(dev
->dev_addr
, &iph
->saddr
, 4);
426 memcpy(dev
->broadcast
, &iph
->daddr
, 4);
428 dev
->flags
= IFF_NOARP
;
430 dev
->features
|= NETIF_F_LLTX
;
433 return ip_tunnel_init(dev
);
436 static void __net_init
vti_fb_tunnel_init(struct net_device
*dev
)
438 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
439 struct iphdr
*iph
= &tunnel
->parms
.iph
;
442 iph
->protocol
= IPPROTO_IPIP
;
446 static struct xfrm4_protocol vti_esp4_protocol __read_mostly
= {
447 .handler
= vti_rcv_proto
,
448 .input_handler
= vti_input_proto
,
449 .cb_handler
= vti_rcv_cb
,
450 .err_handler
= vti4_err
,
454 static struct xfrm4_protocol vti_ah4_protocol __read_mostly
= {
455 .handler
= vti_rcv_proto
,
456 .input_handler
= vti_input_proto
,
457 .cb_handler
= vti_rcv_cb
,
458 .err_handler
= vti4_err
,
462 static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly
= {
463 .handler
= vti_rcv_proto
,
464 .input_handler
= vti_input_proto
,
465 .cb_handler
= vti_rcv_cb
,
466 .err_handler
= vti4_err
,
470 #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)
471 static int vti_rcv_tunnel(struct sk_buff
*skb
)
473 XFRM_SPI_SKB_CB(skb
)->family
= AF_INET
;
474 XFRM_SPI_SKB_CB(skb
)->daddroff
= offsetof(struct iphdr
, daddr
);
476 return vti_input(skb
, IPPROTO_IPIP
, ip_hdr(skb
)->saddr
, 0, false);
479 static struct xfrm_tunnel vti_ipip_handler __read_mostly
= {
480 .handler
= vti_rcv_tunnel
,
481 .cb_handler
= vti_rcv_cb
,
482 .err_handler
= vti4_err
,
486 #if IS_ENABLED(CONFIG_IPV6)
487 static struct xfrm_tunnel vti_ipip6_handler __read_mostly
= {
488 .handler
= vti_rcv_tunnel
,
489 .cb_handler
= vti_rcv_cb
,
490 .err_handler
= vti4_err
,
496 static int __net_init
vti_init_net(struct net
*net
)
499 struct ip_tunnel_net
*itn
;
501 err
= ip_tunnel_init_net(net
, vti_net_id
, &vti_link_ops
, "ip_vti0");
504 itn
= net_generic(net
, vti_net_id
);
505 if (itn
->fb_tunnel_dev
)
506 vti_fb_tunnel_init(itn
->fb_tunnel_dev
);
510 static void __net_exit
vti_exit_batch_net(struct list_head
*list_net
)
512 ip_tunnel_delete_nets(list_net
, vti_net_id
, &vti_link_ops
);
515 static struct pernet_operations vti_net_ops
= {
516 .init
= vti_init_net
,
517 .exit_batch
= vti_exit_batch_net
,
519 .size
= sizeof(struct ip_tunnel_net
),
522 static int vti_tunnel_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
523 struct netlink_ext_ack
*extack
)
528 static void vti_netlink_parms(struct nlattr
*data
[],
529 struct ip_tunnel_parm
*parms
,
532 memset(parms
, 0, sizeof(*parms
));
534 parms
->iph
.protocol
= IPPROTO_IPIP
;
539 parms
->i_flags
= VTI_ISVTI
;
541 if (data
[IFLA_VTI_LINK
])
542 parms
->link
= nla_get_u32(data
[IFLA_VTI_LINK
]);
544 if (data
[IFLA_VTI_IKEY
])
545 parms
->i_key
= nla_get_be32(data
[IFLA_VTI_IKEY
]);
547 if (data
[IFLA_VTI_OKEY
])
548 parms
->o_key
= nla_get_be32(data
[IFLA_VTI_OKEY
]);
550 if (data
[IFLA_VTI_LOCAL
])
551 parms
->iph
.saddr
= nla_get_in_addr(data
[IFLA_VTI_LOCAL
]);
553 if (data
[IFLA_VTI_REMOTE
])
554 parms
->iph
.daddr
= nla_get_in_addr(data
[IFLA_VTI_REMOTE
]);
556 if (data
[IFLA_VTI_FWMARK
])
557 *fwmark
= nla_get_u32(data
[IFLA_VTI_FWMARK
]);
560 static int vti_newlink(struct net
*src_net
, struct net_device
*dev
,
561 struct nlattr
*tb
[], struct nlattr
*data
[],
562 struct netlink_ext_ack
*extack
)
564 struct ip_tunnel_parm parms
;
567 vti_netlink_parms(data
, &parms
, &fwmark
);
568 return ip_tunnel_newlink(dev
, tb
, &parms
, fwmark
);
571 static int vti_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
572 struct nlattr
*data
[],
573 struct netlink_ext_ack
*extack
)
575 struct ip_tunnel
*t
= netdev_priv(dev
);
576 __u32 fwmark
= t
->fwmark
;
577 struct ip_tunnel_parm p
;
579 vti_netlink_parms(data
, &p
, &fwmark
);
580 return ip_tunnel_changelink(dev
, tb
, &p
, fwmark
);
583 static size_t vti_get_size(const struct net_device
*dev
)
594 /* IFLA_VTI_REMOTE */
596 /* IFLA_VTI_FWMARK */
601 static int vti_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
603 struct ip_tunnel
*t
= netdev_priv(dev
);
604 struct ip_tunnel_parm
*p
= &t
->parms
;
606 if (nla_put_u32(skb
, IFLA_VTI_LINK
, p
->link
) ||
607 nla_put_be32(skb
, IFLA_VTI_IKEY
, p
->i_key
) ||
608 nla_put_be32(skb
, IFLA_VTI_OKEY
, p
->o_key
) ||
609 nla_put_in_addr(skb
, IFLA_VTI_LOCAL
, p
->iph
.saddr
) ||
610 nla_put_in_addr(skb
, IFLA_VTI_REMOTE
, p
->iph
.daddr
) ||
611 nla_put_u32(skb
, IFLA_VTI_FWMARK
, t
->fwmark
))
617 static const struct nla_policy vti_policy
[IFLA_VTI_MAX
+ 1] = {
618 [IFLA_VTI_LINK
] = { .type
= NLA_U32
},
619 [IFLA_VTI_IKEY
] = { .type
= NLA_U32
},
620 [IFLA_VTI_OKEY
] = { .type
= NLA_U32
},
621 [IFLA_VTI_LOCAL
] = { .len
= sizeof_field(struct iphdr
, saddr
) },
622 [IFLA_VTI_REMOTE
] = { .len
= sizeof_field(struct iphdr
, daddr
) },
623 [IFLA_VTI_FWMARK
] = { .type
= NLA_U32
},
626 static struct rtnl_link_ops vti_link_ops __read_mostly
= {
628 .maxtype
= IFLA_VTI_MAX
,
629 .policy
= vti_policy
,
630 .priv_size
= sizeof(struct ip_tunnel
),
631 .setup
= vti_tunnel_setup
,
632 .validate
= vti_tunnel_validate
,
633 .newlink
= vti_newlink
,
634 .changelink
= vti_changelink
,
635 .dellink
= ip_tunnel_dellink
,
636 .get_size
= vti_get_size
,
637 .fill_info
= vti_fill_info
,
638 .get_link_net
= ip_tunnel_get_link_net
,
641 static int __init
vti_init(void)
646 pr_info("IPv4 over IPsec tunneling driver\n");
648 msg
= "tunnel device";
649 err
= register_pernet_device(&vti_net_ops
);
651 goto pernet_dev_failed
;
653 msg
= "tunnel protocols";
654 err
= xfrm4_protocol_register(&vti_esp4_protocol
, IPPROTO_ESP
);
656 goto xfrm_proto_esp_failed
;
657 err
= xfrm4_protocol_register(&vti_ah4_protocol
, IPPROTO_AH
);
659 goto xfrm_proto_ah_failed
;
660 err
= xfrm4_protocol_register(&vti_ipcomp4_protocol
, IPPROTO_COMP
);
662 goto xfrm_proto_comp_failed
;
664 #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)
666 err
= xfrm4_tunnel_register(&vti_ipip_handler
, AF_INET
);
668 goto xfrm_tunnel_ipip_failed
;
669 #if IS_ENABLED(CONFIG_IPV6)
670 err
= xfrm4_tunnel_register(&vti_ipip6_handler
, AF_INET6
);
672 goto xfrm_tunnel_ipip6_failed
;
676 msg
= "netlink interface";
677 err
= rtnl_link_register(&vti_link_ops
);
679 goto rtnl_link_failed
;
684 #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)
685 #if IS_ENABLED(CONFIG_IPV6)
686 xfrm4_tunnel_deregister(&vti_ipip6_handler
, AF_INET6
);
687 xfrm_tunnel_ipip6_failed
:
689 xfrm4_tunnel_deregister(&vti_ipip_handler
, AF_INET
);
690 xfrm_tunnel_ipip_failed
:
692 xfrm4_protocol_deregister(&vti_ipcomp4_protocol
, IPPROTO_COMP
);
693 xfrm_proto_comp_failed
:
694 xfrm4_protocol_deregister(&vti_ah4_protocol
, IPPROTO_AH
);
695 xfrm_proto_ah_failed
:
696 xfrm4_protocol_deregister(&vti_esp4_protocol
, IPPROTO_ESP
);
697 xfrm_proto_esp_failed
:
698 unregister_pernet_device(&vti_net_ops
);
700 pr_err("vti init: failed to register %s\n", msg
);
704 static void __exit
vti_fini(void)
706 rtnl_link_unregister(&vti_link_ops
);
707 #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)
708 #if IS_ENABLED(CONFIG_IPV6)
709 xfrm4_tunnel_deregister(&vti_ipip6_handler
, AF_INET6
);
711 xfrm4_tunnel_deregister(&vti_ipip_handler
, AF_INET
);
713 xfrm4_protocol_deregister(&vti_ipcomp4_protocol
, IPPROTO_COMP
);
714 xfrm4_protocol_deregister(&vti_ah4_protocol
, IPPROTO_AH
);
715 xfrm4_protocol_deregister(&vti_esp4_protocol
, IPPROTO_ESP
);
716 unregister_pernet_device(&vti_net_ops
);
719 module_init(vti_init
);
720 module_exit(vti_fini
);
721 MODULE_LICENSE("GPL");
722 MODULE_ALIAS_RTNL_LINK("vti");
723 MODULE_ALIAS_NETDEV("ip_vti0");