2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/init.h>
34 #include <linux/in6.h>
35 #include <linux/inetdevice.h>
36 #include <linux/igmp.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/etherdevice.h>
39 #include <linux/if_ether.h>
40 #include <linux/if_vlan.h>
41 #include <linux/rculist.h>
42 #include <linux/err.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
59 #if IS_ENABLED(CONFIG_IPV6)
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
65 static unsigned int ip_tunnel_hash(__be32 key
, __be32 remote
)
67 return hash_32((__force u32
)key
^ (__force u32
)remote
,
71 static bool ip_tunnel_key_match(const struct ip_tunnel_parm
*p
,
72 __be16 flags
, __be32 key
)
74 if (p
->i_flags
& TUNNEL_KEY
) {
75 if (flags
& TUNNEL_KEY
)
76 return key
== p
->i_key
;
78 /* key expected, none present */
81 return !(flags
& TUNNEL_KEY
);
84 /* Fallback tunnel: no source, no destination, no key, no options
87 We require exact key match i.e. if a key is present in packet
88 it will match only tunnel with the same key; if it is not present,
89 it will match only keyless tunnel.
91 All keysless packets, if not matched configured keyless tunnels
92 will match fallback tunnel.
93 Given src, dst and key, find appropriate for input tunnel.
95 struct ip_tunnel
*ip_tunnel_lookup(struct ip_tunnel_net
*itn
,
96 int link
, __be16 flags
,
97 __be32 remote
, __be32 local
,
101 struct ip_tunnel
*t
, *cand
= NULL
;
102 struct hlist_head
*head
;
104 hash
= ip_tunnel_hash(key
, remote
);
105 head
= &itn
->tunnels
[hash
];
107 hlist_for_each_entry_rcu(t
, head
, hash_node
) {
108 if (local
!= t
->parms
.iph
.saddr
||
109 remote
!= t
->parms
.iph
.daddr
||
110 !(t
->dev
->flags
& IFF_UP
))
113 if (!ip_tunnel_key_match(&t
->parms
, flags
, key
))
116 if (t
->parms
.link
== link
)
122 hlist_for_each_entry_rcu(t
, head
, hash_node
) {
123 if (remote
!= t
->parms
.iph
.daddr
||
124 t
->parms
.iph
.saddr
!= 0 ||
125 !(t
->dev
->flags
& IFF_UP
))
128 if (!ip_tunnel_key_match(&t
->parms
, flags
, key
))
131 if (t
->parms
.link
== link
)
137 hash
= ip_tunnel_hash(key
, 0);
138 head
= &itn
->tunnels
[hash
];
140 hlist_for_each_entry_rcu(t
, head
, hash_node
) {
141 if ((local
!= t
->parms
.iph
.saddr
|| t
->parms
.iph
.daddr
!= 0) &&
142 (local
!= t
->parms
.iph
.daddr
|| !ipv4_is_multicast(local
)))
145 if (!(t
->dev
->flags
& IFF_UP
))
148 if (!ip_tunnel_key_match(&t
->parms
, flags
, key
))
151 if (t
->parms
.link
== link
)
157 if (flags
& TUNNEL_NO_KEY
)
158 goto skip_key_lookup
;
160 hlist_for_each_entry_rcu(t
, head
, hash_node
) {
161 if (t
->parms
.i_key
!= key
||
162 t
->parms
.iph
.saddr
!= 0 ||
163 t
->parms
.iph
.daddr
!= 0 ||
164 !(t
->dev
->flags
& IFF_UP
))
167 if (t
->parms
.link
== link
)
177 t
= rcu_dereference(itn
->collect_md_tun
);
181 if (itn
->fb_tunnel_dev
&& itn
->fb_tunnel_dev
->flags
& IFF_UP
)
182 return netdev_priv(itn
->fb_tunnel_dev
);
186 EXPORT_SYMBOL_GPL(ip_tunnel_lookup
);
188 static struct hlist_head
*ip_bucket(struct ip_tunnel_net
*itn
,
189 struct ip_tunnel_parm
*parms
)
193 __be32 i_key
= parms
->i_key
;
195 if (parms
->iph
.daddr
&& !ipv4_is_multicast(parms
->iph
.daddr
))
196 remote
= parms
->iph
.daddr
;
200 if (!(parms
->i_flags
& TUNNEL_KEY
) && (parms
->i_flags
& VTI_ISVTI
))
203 h
= ip_tunnel_hash(i_key
, remote
);
204 return &itn
->tunnels
[h
];
207 static void ip_tunnel_add(struct ip_tunnel_net
*itn
, struct ip_tunnel
*t
)
209 struct hlist_head
*head
= ip_bucket(itn
, &t
->parms
);
212 rcu_assign_pointer(itn
->collect_md_tun
, t
);
213 hlist_add_head_rcu(&t
->hash_node
, head
);
216 static void ip_tunnel_del(struct ip_tunnel_net
*itn
, struct ip_tunnel
*t
)
219 rcu_assign_pointer(itn
->collect_md_tun
, NULL
);
220 hlist_del_init_rcu(&t
->hash_node
);
223 static struct ip_tunnel
*ip_tunnel_find(struct ip_tunnel_net
*itn
,
224 struct ip_tunnel_parm
*parms
,
227 __be32 remote
= parms
->iph
.daddr
;
228 __be32 local
= parms
->iph
.saddr
;
229 __be32 key
= parms
->i_key
;
230 __be16 flags
= parms
->i_flags
;
231 int link
= parms
->link
;
232 struct ip_tunnel
*t
= NULL
;
233 struct hlist_head
*head
= ip_bucket(itn
, parms
);
235 hlist_for_each_entry_rcu(t
, head
, hash_node
) {
236 if (local
== t
->parms
.iph
.saddr
&&
237 remote
== t
->parms
.iph
.daddr
&&
238 link
== t
->parms
.link
&&
239 type
== t
->dev
->type
&&
240 ip_tunnel_key_match(&t
->parms
, flags
, key
))
246 static struct net_device
*__ip_tunnel_create(struct net
*net
,
247 const struct rtnl_link_ops
*ops
,
248 struct ip_tunnel_parm
*parms
)
251 struct ip_tunnel
*tunnel
;
252 struct net_device
*dev
;
256 strlcpy(name
, parms
->name
, IFNAMSIZ
);
258 if (strlen(ops
->kind
) > (IFNAMSIZ
- 3)) {
262 strlcpy(name
, ops
->kind
, IFNAMSIZ
);
263 strncat(name
, "%d", 2);
267 dev
= alloc_netdev(ops
->priv_size
, name
, NET_NAME_UNKNOWN
, ops
->setup
);
272 dev_net_set(dev
, net
);
274 dev
->rtnl_link_ops
= ops
;
276 tunnel
= netdev_priv(dev
);
277 tunnel
->parms
= *parms
;
280 err
= register_netdevice(dev
);
292 static inline void init_tunnel_flow(struct flowi4
*fl4
,
294 __be32 daddr
, __be32 saddr
,
295 __be32 key
, __u8 tos
, int oif
)
297 memset(fl4
, 0, sizeof(*fl4
));
298 fl4
->flowi4_oif
= oif
;
301 fl4
->flowi4_tos
= tos
;
302 fl4
->flowi4_proto
= proto
;
303 fl4
->fl4_gre_key
= key
;
306 static int ip_tunnel_bind_dev(struct net_device
*dev
)
308 struct net_device
*tdev
= NULL
;
309 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
310 const struct iphdr
*iph
;
311 int hlen
= LL_MAX_HEADER
;
312 int mtu
= ETH_DATA_LEN
;
313 int t_hlen
= tunnel
->hlen
+ sizeof(struct iphdr
);
315 iph
= &tunnel
->parms
.iph
;
317 /* Guess output device to choose reasonable mtu and needed_headroom */
322 init_tunnel_flow(&fl4
, iph
->protocol
, iph
->daddr
,
323 iph
->saddr
, tunnel
->parms
.o_key
,
324 RT_TOS(iph
->tos
), tunnel
->parms
.link
);
325 rt
= ip_route_output_key(tunnel
->net
, &fl4
);
331 if (dev
->type
!= ARPHRD_ETHER
)
332 dev
->flags
|= IFF_POINTOPOINT
;
334 dst_cache_reset(&tunnel
->dst_cache
);
337 if (!tdev
&& tunnel
->parms
.link
)
338 tdev
= __dev_get_by_index(tunnel
->net
, tunnel
->parms
.link
);
341 hlen
= tdev
->hard_header_len
+ tdev
->needed_headroom
;
345 dev
->needed_headroom
= t_hlen
+ hlen
;
346 mtu
-= (dev
->hard_header_len
+ t_hlen
);
354 static struct ip_tunnel
*ip_tunnel_create(struct net
*net
,
355 struct ip_tunnel_net
*itn
,
356 struct ip_tunnel_parm
*parms
)
358 struct ip_tunnel
*nt
;
359 struct net_device
*dev
;
361 BUG_ON(!itn
->fb_tunnel_dev
);
362 dev
= __ip_tunnel_create(net
, itn
->fb_tunnel_dev
->rtnl_link_ops
, parms
);
364 return ERR_CAST(dev
);
366 dev
->mtu
= ip_tunnel_bind_dev(dev
);
368 nt
= netdev_priv(dev
);
369 ip_tunnel_add(itn
, nt
);
373 int ip_tunnel_rcv(struct ip_tunnel
*tunnel
, struct sk_buff
*skb
,
374 const struct tnl_ptk_info
*tpi
, struct metadata_dst
*tun_dst
,
377 struct pcpu_sw_netstats
*tstats
;
378 const struct iphdr
*iph
= ip_hdr(skb
);
381 #ifdef CONFIG_NET_IPGRE_BROADCAST
382 if (ipv4_is_multicast(iph
->daddr
)) {
383 tunnel
->dev
->stats
.multicast
++;
384 skb
->pkt_type
= PACKET_BROADCAST
;
388 if ((!(tpi
->flags
&TUNNEL_CSUM
) && (tunnel
->parms
.i_flags
&TUNNEL_CSUM
)) ||
389 ((tpi
->flags
&TUNNEL_CSUM
) && !(tunnel
->parms
.i_flags
&TUNNEL_CSUM
))) {
390 tunnel
->dev
->stats
.rx_crc_errors
++;
391 tunnel
->dev
->stats
.rx_errors
++;
395 if (tunnel
->parms
.i_flags
&TUNNEL_SEQ
) {
396 if (!(tpi
->flags
&TUNNEL_SEQ
) ||
397 (tunnel
->i_seqno
&& (s32
)(ntohl(tpi
->seq
) - tunnel
->i_seqno
) < 0)) {
398 tunnel
->dev
->stats
.rx_fifo_errors
++;
399 tunnel
->dev
->stats
.rx_errors
++;
402 tunnel
->i_seqno
= ntohl(tpi
->seq
) + 1;
405 skb_reset_network_header(skb
);
407 err
= IP_ECN_decapsulate(iph
, skb
);
410 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
411 &iph
->saddr
, iph
->tos
);
413 ++tunnel
->dev
->stats
.rx_frame_errors
;
414 ++tunnel
->dev
->stats
.rx_errors
;
419 tstats
= this_cpu_ptr(tunnel
->dev
->tstats
);
420 u64_stats_update_begin(&tstats
->syncp
);
421 tstats
->rx_packets
++;
422 tstats
->rx_bytes
+= skb
->len
;
423 u64_stats_update_end(&tstats
->syncp
);
425 skb_scrub_packet(skb
, !net_eq(tunnel
->net
, dev_net(tunnel
->dev
)));
427 if (tunnel
->dev
->type
== ARPHRD_ETHER
) {
428 skb
->protocol
= eth_type_trans(skb
, tunnel
->dev
);
429 skb_postpull_rcsum(skb
, eth_hdr(skb
), ETH_HLEN
);
431 skb
->dev
= tunnel
->dev
;
435 skb_dst_set(skb
, (struct dst_entry
*)tun_dst
);
437 gro_cells_receive(&tunnel
->gro_cells
, skb
);
444 EXPORT_SYMBOL_GPL(ip_tunnel_rcv
);
446 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops
*ops
,
449 if (num
>= MAX_IPTUN_ENCAP_OPS
)
452 return !cmpxchg((const struct ip_tunnel_encap_ops
**)
456 EXPORT_SYMBOL(ip_tunnel_encap_add_ops
);
458 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops
*ops
,
463 if (num
>= MAX_IPTUN_ENCAP_OPS
)
466 ret
= (cmpxchg((const struct ip_tunnel_encap_ops
**)
468 ops
, NULL
) == ops
) ? 0 : -1;
474 EXPORT_SYMBOL(ip_tunnel_encap_del_ops
);
476 int ip_tunnel_encap_setup(struct ip_tunnel
*t
,
477 struct ip_tunnel_encap
*ipencap
)
481 memset(&t
->encap
, 0, sizeof(t
->encap
));
483 hlen
= ip_encap_hlen(ipencap
);
487 t
->encap
.type
= ipencap
->type
;
488 t
->encap
.sport
= ipencap
->sport
;
489 t
->encap
.dport
= ipencap
->dport
;
490 t
->encap
.flags
= ipencap
->flags
;
492 t
->encap_hlen
= hlen
;
493 t
->hlen
= t
->encap_hlen
+ t
->tun_hlen
;
497 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup
);
499 static int tnl_update_pmtu(struct net_device
*dev
, struct sk_buff
*skb
,
500 struct rtable
*rt
, __be16 df
,
501 const struct iphdr
*inner_iph
)
503 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
504 int pkt_size
= skb
->len
- tunnel
->hlen
- dev
->hard_header_len
;
508 mtu
= dst_mtu(&rt
->dst
) - dev
->hard_header_len
509 - sizeof(struct iphdr
) - tunnel
->hlen
;
511 mtu
= skb_dst(skb
) ? dst_mtu(skb_dst(skb
)) : dev
->mtu
;
514 skb_dst(skb
)->ops
->update_pmtu(skb_dst(skb
), NULL
, skb
, mtu
);
516 if (skb
->protocol
== htons(ETH_P_IP
)) {
517 if (!skb_is_gso(skb
) &&
518 (inner_iph
->frag_off
& htons(IP_DF
)) &&
520 memset(IPCB(skb
), 0, sizeof(*IPCB(skb
)));
521 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_FRAG_NEEDED
, htonl(mtu
));
525 #if IS_ENABLED(CONFIG_IPV6)
526 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
527 struct rt6_info
*rt6
= (struct rt6_info
*)skb_dst(skb
);
529 if (rt6
&& mtu
< dst_mtu(skb_dst(skb
)) &&
530 mtu
>= IPV6_MIN_MTU
) {
531 if ((tunnel
->parms
.iph
.daddr
&&
532 !ipv4_is_multicast(tunnel
->parms
.iph
.daddr
)) ||
533 rt6
->rt6i_dst
.plen
== 128) {
534 rt6
->rt6i_flags
|= RTF_MODIFIED
;
535 dst_metric_set(skb_dst(skb
), RTAX_MTU
, mtu
);
539 if (!skb_is_gso(skb
) && mtu
>= IPV6_MIN_MTU
&&
541 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
549 void ip_tunnel_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
550 const struct iphdr
*tnl_params
, u8 protocol
)
552 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
553 const struct iphdr
*inner_iph
;
557 struct rtable
*rt
; /* Route to the other host */
558 unsigned int max_headroom
; /* The extra header space needed */
562 inner_iph
= (const struct iphdr
*)skb_inner_network_header(skb
);
563 connected
= (tunnel
->parms
.iph
.daddr
!= 0);
565 memset(&(IPCB(skb
)->opt
), 0, sizeof(IPCB(skb
)->opt
));
567 dst
= tnl_params
->daddr
;
572 dev
->stats
.tx_fifo_errors
++;
576 if (skb
->protocol
== htons(ETH_P_IP
)) {
577 rt
= skb_rtable(skb
);
578 dst
= rt_nexthop(rt
, inner_iph
->daddr
);
580 #if IS_ENABLED(CONFIG_IPV6)
581 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
582 const struct in6_addr
*addr6
;
583 struct neighbour
*neigh
;
584 bool do_tx_error_icmp
;
587 neigh
= dst_neigh_lookup(skb_dst(skb
),
588 &ipv6_hdr(skb
)->daddr
);
592 addr6
= (const struct in6_addr
*)&neigh
->primary_key
;
593 addr_type
= ipv6_addr_type(addr6
);
595 if (addr_type
== IPV6_ADDR_ANY
) {
596 addr6
= &ipv6_hdr(skb
)->daddr
;
597 addr_type
= ipv6_addr_type(addr6
);
600 if ((addr_type
& IPV6_ADDR_COMPATv4
) == 0)
601 do_tx_error_icmp
= true;
603 do_tx_error_icmp
= false;
604 dst
= addr6
->s6_addr32
[3];
606 neigh_release(neigh
);
607 if (do_tx_error_icmp
)
617 tos
= tnl_params
->tos
;
620 if (skb
->protocol
== htons(ETH_P_IP
)) {
621 tos
= inner_iph
->tos
;
623 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
624 tos
= ipv6_get_dsfield((const struct ipv6hdr
*)inner_iph
);
629 init_tunnel_flow(&fl4
, protocol
, dst
, tnl_params
->saddr
,
630 tunnel
->parms
.o_key
, RT_TOS(tos
), tunnel
->parms
.link
);
632 if (ip_tunnel_encap(skb
, tunnel
, &protocol
, &fl4
) < 0)
635 rt
= connected
? dst_cache_get_ip4(&tunnel
->dst_cache
, &fl4
.saddr
) :
639 rt
= ip_route_output_key(tunnel
->net
, &fl4
);
642 dev
->stats
.tx_carrier_errors
++;
646 dst_cache_set_ip4(&tunnel
->dst_cache
, &rt
->dst
,
650 if (rt
->dst
.dev
== dev
) {
652 dev
->stats
.collisions
++;
656 if (tnl_update_pmtu(dev
, skb
, rt
, tnl_params
->frag_off
, inner_iph
)) {
661 if (tunnel
->err_count
> 0) {
662 if (time_before(jiffies
,
663 tunnel
->err_time
+ IPTUNNEL_ERR_TIMEO
)) {
666 dst_link_failure(skb
);
668 tunnel
->err_count
= 0;
671 tos
= ip_tunnel_ecn_encap(tos
, inner_iph
, skb
);
672 ttl
= tnl_params
->ttl
;
674 if (skb
->protocol
== htons(ETH_P_IP
))
675 ttl
= inner_iph
->ttl
;
676 #if IS_ENABLED(CONFIG_IPV6)
677 else if (skb
->protocol
== htons(ETH_P_IPV6
))
678 ttl
= ((const struct ipv6hdr
*)inner_iph
)->hop_limit
;
681 ttl
= ip4_dst_hoplimit(&rt
->dst
);
684 df
= tnl_params
->frag_off
;
685 if (skb
->protocol
== htons(ETH_P_IP
) && !tunnel
->ignore_df
)
686 df
|= (inner_iph
->frag_off
&htons(IP_DF
));
688 max_headroom
= LL_RESERVED_SPACE(rt
->dst
.dev
) + sizeof(struct iphdr
)
689 + rt
->dst
.header_len
+ ip_encap_hlen(&tunnel
->encap
);
690 if (max_headroom
> dev
->needed_headroom
)
691 dev
->needed_headroom
= max_headroom
;
693 if (skb_cow_head(skb
, dev
->needed_headroom
)) {
695 dev
->stats
.tx_dropped
++;
700 iptunnel_xmit(NULL
, rt
, skb
, fl4
.saddr
, fl4
.daddr
, protocol
, tos
, ttl
,
701 df
, !net_eq(tunnel
->net
, dev_net(dev
)));
704 #if IS_ENABLED(CONFIG_IPV6)
706 dst_link_failure(skb
);
709 dev
->stats
.tx_errors
++;
712 EXPORT_SYMBOL_GPL(ip_tunnel_xmit
);
714 static void ip_tunnel_update(struct ip_tunnel_net
*itn
,
716 struct net_device
*dev
,
717 struct ip_tunnel_parm
*p
,
720 ip_tunnel_del(itn
, t
);
721 t
->parms
.iph
.saddr
= p
->iph
.saddr
;
722 t
->parms
.iph
.daddr
= p
->iph
.daddr
;
723 t
->parms
.i_key
= p
->i_key
;
724 t
->parms
.o_key
= p
->o_key
;
725 if (dev
->type
!= ARPHRD_ETHER
) {
726 memcpy(dev
->dev_addr
, &p
->iph
.saddr
, 4);
727 memcpy(dev
->broadcast
, &p
->iph
.daddr
, 4);
729 ip_tunnel_add(itn
, t
);
731 t
->parms
.iph
.ttl
= p
->iph
.ttl
;
732 t
->parms
.iph
.tos
= p
->iph
.tos
;
733 t
->parms
.iph
.frag_off
= p
->iph
.frag_off
;
735 if (t
->parms
.link
!= p
->link
) {
738 t
->parms
.link
= p
->link
;
739 mtu
= ip_tunnel_bind_dev(dev
);
743 dst_cache_reset(&t
->dst_cache
);
744 netdev_state_change(dev
);
747 int ip_tunnel_ioctl(struct net_device
*dev
, struct ip_tunnel_parm
*p
, int cmd
)
750 struct ip_tunnel
*t
= netdev_priv(dev
);
751 struct net
*net
= t
->net
;
752 struct ip_tunnel_net
*itn
= net_generic(net
, t
->ip_tnl_net_id
);
754 BUG_ON(!itn
->fb_tunnel_dev
);
757 if (dev
== itn
->fb_tunnel_dev
) {
758 t
= ip_tunnel_find(itn
, p
, itn
->fb_tunnel_dev
->type
);
760 t
= netdev_priv(dev
);
762 memcpy(p
, &t
->parms
, sizeof(*p
));
768 if (!ns_capable(net
->user_ns
, CAP_NET_ADMIN
))
771 p
->iph
.frag_off
|= htons(IP_DF
);
772 if (!(p
->i_flags
& VTI_ISVTI
)) {
773 if (!(p
->i_flags
& TUNNEL_KEY
))
775 if (!(p
->o_flags
& TUNNEL_KEY
))
779 t
= ip_tunnel_find(itn
, p
, itn
->fb_tunnel_dev
->type
);
781 if (cmd
== SIOCADDTUNNEL
) {
783 t
= ip_tunnel_create(net
, itn
, p
);
784 err
= PTR_ERR_OR_ZERO(t
);
791 if (dev
!= itn
->fb_tunnel_dev
&& cmd
== SIOCCHGTUNNEL
) {
798 unsigned int nflags
= 0;
800 if (ipv4_is_multicast(p
->iph
.daddr
))
801 nflags
= IFF_BROADCAST
;
802 else if (p
->iph
.daddr
)
803 nflags
= IFF_POINTOPOINT
;
805 if ((dev
->flags
^nflags
)&(IFF_POINTOPOINT
|IFF_BROADCAST
)) {
810 t
= netdev_priv(dev
);
816 ip_tunnel_update(itn
, t
, dev
, p
, true);
824 if (!ns_capable(net
->user_ns
, CAP_NET_ADMIN
))
827 if (dev
== itn
->fb_tunnel_dev
) {
829 t
= ip_tunnel_find(itn
, p
, itn
->fb_tunnel_dev
->type
);
833 if (t
== netdev_priv(itn
->fb_tunnel_dev
))
837 unregister_netdevice(dev
);
848 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl
);
850 int __ip_tunnel_change_mtu(struct net_device
*dev
, int new_mtu
, bool strict
)
852 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
853 int t_hlen
= tunnel
->hlen
+ sizeof(struct iphdr
);
854 int max_mtu
= 0xFFF8 - dev
->hard_header_len
- t_hlen
;
859 if (new_mtu
> max_mtu
) {
869 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu
);
871 int ip_tunnel_change_mtu(struct net_device
*dev
, int new_mtu
)
873 return __ip_tunnel_change_mtu(dev
, new_mtu
, true);
875 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu
);
877 static void ip_tunnel_dev_free(struct net_device
*dev
)
879 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
881 gro_cells_destroy(&tunnel
->gro_cells
);
882 dst_cache_destroy(&tunnel
->dst_cache
);
883 free_percpu(dev
->tstats
);
887 void ip_tunnel_dellink(struct net_device
*dev
, struct list_head
*head
)
889 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
890 struct ip_tunnel_net
*itn
;
892 itn
= net_generic(tunnel
->net
, tunnel
->ip_tnl_net_id
);
894 if (itn
->fb_tunnel_dev
!= dev
) {
895 ip_tunnel_del(itn
, netdev_priv(dev
));
896 unregister_netdevice_queue(dev
, head
);
899 EXPORT_SYMBOL_GPL(ip_tunnel_dellink
);
901 struct net
*ip_tunnel_get_link_net(const struct net_device
*dev
)
903 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
907 EXPORT_SYMBOL(ip_tunnel_get_link_net
);
909 int ip_tunnel_get_iflink(const struct net_device
*dev
)
911 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
913 return tunnel
->parms
.link
;
915 EXPORT_SYMBOL(ip_tunnel_get_iflink
);
917 int ip_tunnel_init_net(struct net
*net
, int ip_tnl_net_id
,
918 struct rtnl_link_ops
*ops
, char *devname
)
920 struct ip_tunnel_net
*itn
= net_generic(net
, ip_tnl_net_id
);
921 struct ip_tunnel_parm parms
;
924 for (i
= 0; i
< IP_TNL_HASH_SIZE
; i
++)
925 INIT_HLIST_HEAD(&itn
->tunnels
[i
]);
928 itn
->fb_tunnel_dev
= NULL
;
932 memset(&parms
, 0, sizeof(parms
));
934 strlcpy(parms
.name
, devname
, IFNAMSIZ
);
937 itn
->fb_tunnel_dev
= __ip_tunnel_create(net
, ops
, &parms
);
938 /* FB netdevice is special: we have one, and only one per netns.
939 * Allowing to move it to another netns is clearly unsafe.
941 if (!IS_ERR(itn
->fb_tunnel_dev
)) {
942 itn
->fb_tunnel_dev
->features
|= NETIF_F_NETNS_LOCAL
;
943 itn
->fb_tunnel_dev
->mtu
= ip_tunnel_bind_dev(itn
->fb_tunnel_dev
);
944 ip_tunnel_add(itn
, netdev_priv(itn
->fb_tunnel_dev
));
948 return PTR_ERR_OR_ZERO(itn
->fb_tunnel_dev
);
950 EXPORT_SYMBOL_GPL(ip_tunnel_init_net
);
952 static void ip_tunnel_destroy(struct ip_tunnel_net
*itn
, struct list_head
*head
,
953 struct rtnl_link_ops
*ops
)
955 struct net
*net
= dev_net(itn
->fb_tunnel_dev
);
956 struct net_device
*dev
, *aux
;
959 for_each_netdev_safe(net
, dev
, aux
)
960 if (dev
->rtnl_link_ops
== ops
)
961 unregister_netdevice_queue(dev
, head
);
963 for (h
= 0; h
< IP_TNL_HASH_SIZE
; h
++) {
965 struct hlist_node
*n
;
966 struct hlist_head
*thead
= &itn
->tunnels
[h
];
968 hlist_for_each_entry_safe(t
, n
, thead
, hash_node
)
969 /* If dev is in the same netns, it has already
970 * been added to the list by the previous loop.
972 if (!net_eq(dev_net(t
->dev
), net
))
973 unregister_netdevice_queue(t
->dev
, head
);
977 void ip_tunnel_delete_net(struct ip_tunnel_net
*itn
, struct rtnl_link_ops
*ops
)
982 ip_tunnel_destroy(itn
, &list
, ops
);
983 unregister_netdevice_many(&list
);
986 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net
);
988 int ip_tunnel_newlink(struct net_device
*dev
, struct nlattr
*tb
[],
989 struct ip_tunnel_parm
*p
)
991 struct ip_tunnel
*nt
;
992 struct net
*net
= dev_net(dev
);
993 struct ip_tunnel_net
*itn
;
997 nt
= netdev_priv(dev
);
998 itn
= net_generic(net
, nt
->ip_tnl_net_id
);
1000 if (nt
->collect_md
) {
1001 if (rtnl_dereference(itn
->collect_md_tun
))
1004 if (ip_tunnel_find(itn
, p
, dev
->type
))
1010 err
= register_netdevice(dev
);
1014 if (dev
->type
== ARPHRD_ETHER
&& !tb
[IFLA_ADDRESS
])
1015 eth_hw_addr_random(dev
);
1017 mtu
= ip_tunnel_bind_dev(dev
);
1021 ip_tunnel_add(itn
, nt
);
1025 EXPORT_SYMBOL_GPL(ip_tunnel_newlink
);
1027 int ip_tunnel_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
1028 struct ip_tunnel_parm
*p
)
1030 struct ip_tunnel
*t
;
1031 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1032 struct net
*net
= tunnel
->net
;
1033 struct ip_tunnel_net
*itn
= net_generic(net
, tunnel
->ip_tnl_net_id
);
1035 if (dev
== itn
->fb_tunnel_dev
)
1038 t
= ip_tunnel_find(itn
, p
, dev
->type
);
1046 if (dev
->type
!= ARPHRD_ETHER
) {
1047 unsigned int nflags
= 0;
1049 if (ipv4_is_multicast(p
->iph
.daddr
))
1050 nflags
= IFF_BROADCAST
;
1051 else if (p
->iph
.daddr
)
1052 nflags
= IFF_POINTOPOINT
;
1054 if ((dev
->flags
^ nflags
) &
1055 (IFF_POINTOPOINT
| IFF_BROADCAST
))
1060 ip_tunnel_update(itn
, t
, dev
, p
, !tb
[IFLA_MTU
]);
1063 EXPORT_SYMBOL_GPL(ip_tunnel_changelink
);
1065 int ip_tunnel_init(struct net_device
*dev
)
1067 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1068 struct iphdr
*iph
= &tunnel
->parms
.iph
;
1071 dev
->destructor
= ip_tunnel_dev_free
;
1072 dev
->tstats
= netdev_alloc_pcpu_stats(struct pcpu_sw_netstats
);
1076 err
= dst_cache_init(&tunnel
->dst_cache
, GFP_KERNEL
);
1078 free_percpu(dev
->tstats
);
1082 err
= gro_cells_init(&tunnel
->gro_cells
, dev
);
1084 dst_cache_destroy(&tunnel
->dst_cache
);
1085 free_percpu(dev
->tstats
);
1090 tunnel
->net
= dev_net(dev
);
1091 strcpy(tunnel
->parms
.name
, dev
->name
);
1095 if (tunnel
->collect_md
) {
1096 dev
->features
|= NETIF_F_NETNS_LOCAL
;
1097 netif_keep_dst(dev
);
1101 EXPORT_SYMBOL_GPL(ip_tunnel_init
);
1103 void ip_tunnel_uninit(struct net_device
*dev
)
1105 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1106 struct net
*net
= tunnel
->net
;
1107 struct ip_tunnel_net
*itn
;
1109 itn
= net_generic(net
, tunnel
->ip_tnl_net_id
);
1110 /* fb_tunnel_dev will be unregisted in net-exit call. */
1111 if (itn
->fb_tunnel_dev
!= dev
)
1112 ip_tunnel_del(itn
, netdev_priv(dev
));
1114 dst_cache_reset(&tunnel
->dst_cache
);
1116 EXPORT_SYMBOL_GPL(ip_tunnel_uninit
);
1118 /* Do least required initialization, rest of init is done in tunnel_init call */
1119 void ip_tunnel_setup(struct net_device
*dev
, int net_id
)
1121 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1122 tunnel
->ip_tnl_net_id
= net_id
;
1124 EXPORT_SYMBOL_GPL(ip_tunnel_setup
);
1126 MODULE_LICENSE("GPL");