2 * Linux NET3: IP/IP protocol decoder modified to support
3 * virtual tunnel interface
6 * Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
18 For comments look at net/ipv4/ip_gre.c --ANK
22 #include <linux/capability.h>
23 #include <linux/module.h>
24 #include <linux/types.h>
25 #include <linux/kernel.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/netfilter_ipv4.h>
36 #include <linux/if_ether.h>
37 #include <linux/icmpv6.h>
42 #include <net/ip_tunnels.h>
43 #include <net/inet_ecn.h>
45 #include <net/net_namespace.h>
46 #include <net/netns/generic.h>
48 static struct rtnl_link_ops vti_link_ops __read_mostly
;
50 static int vti_net_id __read_mostly
;
51 static int vti_tunnel_init(struct net_device
*dev
);
53 static int vti_input(struct sk_buff
*skb
, int nexthdr
, __be32 spi
,
56 struct ip_tunnel
*tunnel
;
57 const struct iphdr
*iph
= ip_hdr(skb
);
58 struct net
*net
= dev_net(skb
->dev
);
59 struct ip_tunnel_net
*itn
= net_generic(net
, vti_net_id
);
61 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
62 iph
->saddr
, iph
->daddr
, 0);
64 if (!xfrm4_policy_check(NULL
, XFRM_POLICY_IN
, skb
))
67 XFRM_TUNNEL_SKB_CB(skb
)->tunnel
.ip4
= tunnel
;
69 return xfrm_input(skb
, nexthdr
, spi
, encap_type
);
78 static int vti_rcv(struct sk_buff
*skb
)
80 XFRM_SPI_SKB_CB(skb
)->family
= AF_INET
;
81 XFRM_SPI_SKB_CB(skb
)->daddroff
= offsetof(struct iphdr
, daddr
);
83 return vti_input(skb
, ip_hdr(skb
)->protocol
, 0, 0);
86 static int vti_rcv_cb(struct sk_buff
*skb
, int err
)
88 unsigned short family
;
89 struct net_device
*dev
;
90 struct pcpu_sw_netstats
*tstats
;
92 struct ip_tunnel
*tunnel
= XFRM_TUNNEL_SKB_CB(skb
)->tunnel
.ip4
;
93 u32 orig_mark
= skb
->mark
;
102 dev
->stats
.rx_errors
++;
103 dev
->stats
.rx_dropped
++;
108 x
= xfrm_input_state(skb
);
109 family
= x
->inner_mode
->afinfo
->family
;
111 skb
->mark
= be32_to_cpu(tunnel
->parms
.i_key
);
112 ret
= xfrm_policy_check(NULL
, XFRM_POLICY_IN
, skb
, family
);
113 skb
->mark
= orig_mark
;
118 skb_scrub_packet(skb
, !net_eq(tunnel
->net
, dev_net(skb
->dev
)));
121 tstats
= this_cpu_ptr(dev
->tstats
);
123 u64_stats_update_begin(&tstats
->syncp
);
124 tstats
->rx_packets
++;
125 tstats
->rx_bytes
+= skb
->len
;
126 u64_stats_update_end(&tstats
->syncp
);
131 static bool vti_state_check(const struct xfrm_state
*x
, __be32 dst
, __be32 src
)
133 xfrm_address_t
*daddr
= (xfrm_address_t
*)&dst
;
134 xfrm_address_t
*saddr
= (xfrm_address_t
*)&src
;
136 /* if there is no transform then this tunnel is not functional.
137 * Or if the xfrm is not mode tunnel.
139 if (!x
|| x
->props
.mode
!= XFRM_MODE_TUNNEL
||
140 x
->props
.family
!= AF_INET
)
144 return xfrm_addr_equal(saddr
, &x
->props
.saddr
, AF_INET
);
146 if (!xfrm_state_addr_check(x
, daddr
, saddr
, AF_INET
))
152 static netdev_tx_t
vti_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
155 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
156 struct ip_tunnel_parm
*parms
= &tunnel
->parms
;
157 struct dst_entry
*dst
= skb_dst(skb
);
158 struct net_device
*tdev
; /* Device to other host */
159 int pkt_len
= skb
->len
;
163 dev
->stats
.tx_carrier_errors
++;
168 dst
= xfrm_lookup(tunnel
->net
, dst
, fl
, NULL
, 0);
170 dev
->stats
.tx_carrier_errors
++;
174 if (!vti_state_check(dst
->xfrm
, parms
->iph
.daddr
, parms
->iph
.saddr
)) {
175 dev
->stats
.tx_carrier_errors
++;
184 dev
->stats
.collisions
++;
188 if (tunnel
->err_count
> 0) {
189 if (time_before(jiffies
,
190 tunnel
->err_time
+ IPTUNNEL_ERR_TIMEO
)) {
192 dst_link_failure(skb
);
194 tunnel
->err_count
= 0;
197 skb_scrub_packet(skb
, !net_eq(tunnel
->net
, dev_net(dev
)));
198 skb_dst_set(skb
, dst
);
199 skb
->dev
= skb_dst(skb
)->dev
;
201 err
= dst_output(tunnel
->net
, skb
->sk
, skb
);
202 if (net_xmit_eval(err
) == 0)
204 iptunnel_xmit_stats(err
, &dev
->stats
, dev
->tstats
);
208 dst_link_failure(skb
);
210 dev
->stats
.tx_errors
++;
215 /* This function assumes it is being called from dev_queue_xmit()
216 * and that skb is filled properly by that function.
218 static netdev_tx_t
vti_tunnel_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
220 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
223 memset(&fl
, 0, sizeof(fl
));
225 switch (skb
->protocol
) {
226 case htons(ETH_P_IP
):
227 xfrm_decode_session(skb
, &fl
, AF_INET
);
228 memset(IPCB(skb
), 0, sizeof(*IPCB(skb
)));
230 case htons(ETH_P_IPV6
):
231 xfrm_decode_session(skb
, &fl
, AF_INET6
);
232 memset(IP6CB(skb
), 0, sizeof(*IP6CB(skb
)));
235 dev
->stats
.tx_errors
++;
240 /* override mark with tunnel output key */
241 fl
.flowi_mark
= be32_to_cpu(tunnel
->parms
.o_key
);
243 return vti_xmit(skb
, dev
, &fl
);
246 static int vti4_err(struct sk_buff
*skb
, u32 info
)
250 struct xfrm_state
*x
;
251 struct ip_tunnel
*tunnel
;
252 struct ip_esp_hdr
*esph
;
253 struct ip_auth_hdr
*ah
;
254 struct ip_comp_hdr
*ipch
;
255 struct net
*net
= dev_net(skb
->dev
);
256 const struct iphdr
*iph
= (const struct iphdr
*)skb
->data
;
257 int protocol
= iph
->protocol
;
258 struct ip_tunnel_net
*itn
= net_generic(net
, vti_net_id
);
260 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
261 iph
->daddr
, iph
->saddr
, 0);
265 mark
= be32_to_cpu(tunnel
->parms
.o_key
);
269 esph
= (struct ip_esp_hdr
*)(skb
->data
+(iph
->ihl
<<2));
273 ah
= (struct ip_auth_hdr
*)(skb
->data
+(iph
->ihl
<<2));
277 ipch
= (struct ip_comp_hdr
*)(skb
->data
+(iph
->ihl
<<2));
278 spi
= htonl(ntohs(ipch
->cpi
));
284 switch (icmp_hdr(skb
)->type
) {
285 case ICMP_DEST_UNREACH
:
286 if (icmp_hdr(skb
)->code
!= ICMP_FRAG_NEEDED
)
294 x
= xfrm_state_lookup(net
, mark
, (const xfrm_address_t
*)&iph
->daddr
,
295 spi
, protocol
, AF_INET
);
299 if (icmp_hdr(skb
)->type
== ICMP_DEST_UNREACH
)
300 ipv4_update_pmtu(skb
, net
, info
, 0, 0, protocol
, 0);
302 ipv4_redirect(skb
, net
, 0, 0, protocol
, 0);
309 vti_tunnel_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
312 struct ip_tunnel_parm p
;
314 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
317 if (cmd
== SIOCADDTUNNEL
|| cmd
== SIOCCHGTUNNEL
) {
318 if (p
.iph
.version
!= 4 || p
.iph
.protocol
!= IPPROTO_IPIP
||
323 if (!(p
.i_flags
& GRE_KEY
))
325 if (!(p
.o_flags
& GRE_KEY
))
328 p
.i_flags
= VTI_ISVTI
;
330 err
= ip_tunnel_ioctl(dev
, &p
, cmd
);
334 if (cmd
!= SIOCDELTUNNEL
) {
335 p
.i_flags
|= GRE_KEY
;
336 p
.o_flags
|= GRE_KEY
;
339 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
344 static const struct net_device_ops vti_netdev_ops
= {
345 .ndo_init
= vti_tunnel_init
,
346 .ndo_uninit
= ip_tunnel_uninit
,
347 .ndo_start_xmit
= vti_tunnel_xmit
,
348 .ndo_do_ioctl
= vti_tunnel_ioctl
,
349 .ndo_change_mtu
= ip_tunnel_change_mtu
,
350 .ndo_get_stats64
= ip_tunnel_get_stats64
,
351 .ndo_get_iflink
= ip_tunnel_get_iflink
,
354 static void vti_tunnel_setup(struct net_device
*dev
)
356 dev
->netdev_ops
= &vti_netdev_ops
;
357 dev
->type
= ARPHRD_TUNNEL
;
358 ip_tunnel_setup(dev
, vti_net_id
);
361 static int vti_tunnel_init(struct net_device
*dev
)
363 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
364 struct iphdr
*iph
= &tunnel
->parms
.iph
;
366 memcpy(dev
->dev_addr
, &iph
->saddr
, 4);
367 memcpy(dev
->broadcast
, &iph
->daddr
, 4);
369 dev
->mtu
= ETH_DATA_LEN
;
370 dev
->flags
= IFF_NOARP
;
372 dev
->features
|= NETIF_F_LLTX
;
375 return ip_tunnel_init(dev
);
378 static void __net_init
vti_fb_tunnel_init(struct net_device
*dev
)
380 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
381 struct iphdr
*iph
= &tunnel
->parms
.iph
;
384 iph
->protocol
= IPPROTO_IPIP
;
388 static struct xfrm4_protocol vti_esp4_protocol __read_mostly
= {
390 .input_handler
= vti_input
,
391 .cb_handler
= vti_rcv_cb
,
392 .err_handler
= vti4_err
,
396 static struct xfrm4_protocol vti_ah4_protocol __read_mostly
= {
398 .input_handler
= vti_input
,
399 .cb_handler
= vti_rcv_cb
,
400 .err_handler
= vti4_err
,
404 static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly
= {
406 .input_handler
= vti_input
,
407 .cb_handler
= vti_rcv_cb
,
408 .err_handler
= vti4_err
,
412 static int __net_init
vti_init_net(struct net
*net
)
415 struct ip_tunnel_net
*itn
;
417 err
= ip_tunnel_init_net(net
, vti_net_id
, &vti_link_ops
, "ip_vti0");
420 itn
= net_generic(net
, vti_net_id
);
421 vti_fb_tunnel_init(itn
->fb_tunnel_dev
);
425 static void __net_exit
vti_exit_net(struct net
*net
)
427 struct ip_tunnel_net
*itn
= net_generic(net
, vti_net_id
);
428 ip_tunnel_delete_net(itn
, &vti_link_ops
);
431 static struct pernet_operations vti_net_ops
= {
432 .init
= vti_init_net
,
433 .exit
= vti_exit_net
,
435 .size
= sizeof(struct ip_tunnel_net
),
438 static int vti_tunnel_validate(struct nlattr
*tb
[], struct nlattr
*data
[])
443 static void vti_netlink_parms(struct nlattr
*data
[],
444 struct ip_tunnel_parm
*parms
)
446 memset(parms
, 0, sizeof(*parms
));
448 parms
->iph
.protocol
= IPPROTO_IPIP
;
453 parms
->i_flags
= VTI_ISVTI
;
455 if (data
[IFLA_VTI_LINK
])
456 parms
->link
= nla_get_u32(data
[IFLA_VTI_LINK
]);
458 if (data
[IFLA_VTI_IKEY
])
459 parms
->i_key
= nla_get_be32(data
[IFLA_VTI_IKEY
]);
461 if (data
[IFLA_VTI_OKEY
])
462 parms
->o_key
= nla_get_be32(data
[IFLA_VTI_OKEY
]);
464 if (data
[IFLA_VTI_LOCAL
])
465 parms
->iph
.saddr
= nla_get_in_addr(data
[IFLA_VTI_LOCAL
]);
467 if (data
[IFLA_VTI_REMOTE
])
468 parms
->iph
.daddr
= nla_get_in_addr(data
[IFLA_VTI_REMOTE
]);
472 static int vti_newlink(struct net
*src_net
, struct net_device
*dev
,
473 struct nlattr
*tb
[], struct nlattr
*data
[])
475 struct ip_tunnel_parm parms
;
477 vti_netlink_parms(data
, &parms
);
478 return ip_tunnel_newlink(dev
, tb
, &parms
);
481 static int vti_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
482 struct nlattr
*data
[])
484 struct ip_tunnel_parm p
;
486 vti_netlink_parms(data
, &p
);
487 return ip_tunnel_changelink(dev
, tb
, &p
);
490 static size_t vti_get_size(const struct net_device
*dev
)
501 /* IFLA_VTI_REMOTE */
506 static int vti_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
508 struct ip_tunnel
*t
= netdev_priv(dev
);
509 struct ip_tunnel_parm
*p
= &t
->parms
;
511 nla_put_u32(skb
, IFLA_VTI_LINK
, p
->link
);
512 nla_put_be32(skb
, IFLA_VTI_IKEY
, p
->i_key
);
513 nla_put_be32(skb
, IFLA_VTI_OKEY
, p
->o_key
);
514 nla_put_in_addr(skb
, IFLA_VTI_LOCAL
, p
->iph
.saddr
);
515 nla_put_in_addr(skb
, IFLA_VTI_REMOTE
, p
->iph
.daddr
);
520 static const struct nla_policy vti_policy
[IFLA_VTI_MAX
+ 1] = {
521 [IFLA_VTI_LINK
] = { .type
= NLA_U32
},
522 [IFLA_VTI_IKEY
] = { .type
= NLA_U32
},
523 [IFLA_VTI_OKEY
] = { .type
= NLA_U32
},
524 [IFLA_VTI_LOCAL
] = { .len
= FIELD_SIZEOF(struct iphdr
, saddr
) },
525 [IFLA_VTI_REMOTE
] = { .len
= FIELD_SIZEOF(struct iphdr
, daddr
) },
528 static struct rtnl_link_ops vti_link_ops __read_mostly
= {
530 .maxtype
= IFLA_VTI_MAX
,
531 .policy
= vti_policy
,
532 .priv_size
= sizeof(struct ip_tunnel
),
533 .setup
= vti_tunnel_setup
,
534 .validate
= vti_tunnel_validate
,
535 .newlink
= vti_newlink
,
536 .changelink
= vti_changelink
,
537 .dellink
= ip_tunnel_dellink
,
538 .get_size
= vti_get_size
,
539 .fill_info
= vti_fill_info
,
540 .get_link_net
= ip_tunnel_get_link_net
,
543 static bool is_vti_tunnel(const struct net_device
*dev
)
545 return dev
->netdev_ops
== &vti_netdev_ops
;
548 static int vti_device_event(struct notifier_block
*unused
,
549 unsigned long event
, void *ptr
)
551 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
552 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
554 if (!is_vti_tunnel(dev
))
559 if (!net_eq(tunnel
->net
, dev_net(dev
)))
560 xfrm_garbage_collect(tunnel
->net
);
566 static struct notifier_block vti_notifier_block __read_mostly
= {
567 .notifier_call
= vti_device_event
,
570 static int __init
vti_init(void)
575 pr_info("IPv4 over IPsec tunneling driver\n");
577 register_netdevice_notifier(&vti_notifier_block
);
579 msg
= "tunnel device";
580 err
= register_pernet_device(&vti_net_ops
);
582 goto pernet_dev_failed
;
584 msg
= "tunnel protocols";
585 err
= xfrm4_protocol_register(&vti_esp4_protocol
, IPPROTO_ESP
);
587 goto xfrm_proto_esp_failed
;
588 err
= xfrm4_protocol_register(&vti_ah4_protocol
, IPPROTO_AH
);
590 goto xfrm_proto_ah_failed
;
591 err
= xfrm4_protocol_register(&vti_ipcomp4_protocol
, IPPROTO_COMP
);
593 goto xfrm_proto_comp_failed
;
595 msg
= "netlink interface";
596 err
= rtnl_link_register(&vti_link_ops
);
598 goto rtnl_link_failed
;
603 xfrm4_protocol_deregister(&vti_ipcomp4_protocol
, IPPROTO_COMP
);
604 xfrm_proto_comp_failed
:
605 xfrm4_protocol_deregister(&vti_ah4_protocol
, IPPROTO_AH
);
606 xfrm_proto_ah_failed
:
607 xfrm4_protocol_deregister(&vti_esp4_protocol
, IPPROTO_ESP
);
608 xfrm_proto_esp_failed
:
609 unregister_pernet_device(&vti_net_ops
);
611 unregister_netdevice_notifier(&vti_notifier_block
);
612 pr_err("vti init: failed to register %s\n", msg
);
616 static void __exit
vti_fini(void)
618 rtnl_link_unregister(&vti_link_ops
);
619 xfrm4_protocol_deregister(&vti_ipcomp4_protocol
, IPPROTO_COMP
);
620 xfrm4_protocol_deregister(&vti_ah4_protocol
, IPPROTO_AH
);
621 xfrm4_protocol_deregister(&vti_esp4_protocol
, IPPROTO_ESP
);
622 unregister_pernet_device(&vti_net_ops
);
623 unregister_netdevice_notifier(&vti_notifier_block
);
626 module_init(vti_init
);
627 module_exit(vti_fini
);
628 MODULE_LICENSE("GPL");
629 MODULE_ALIAS_RTNL_LINK("vti");
630 MODULE_ALIAS_NETDEV("ip_vti0");