octeontx2-pf: Fix error return code in otx2_probe()
[linux/fpc-iii.git] / net / ipv4 / ip_tunnel.c
blobcd4b84310d9295c4eec478a4e12136716dfa9e0c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2013 Nicira, Inc.
4 */
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
55 return hash_32((__force u32)key ^ (__force u32)remote,
56 IP_TNL_HASH_BITS);
59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 __be16 flags, __be32 key)
62 if (p->i_flags & TUNNEL_KEY) {
63 if (flags & TUNNEL_KEY)
64 return key == p->i_key;
65 else
66 /* key expected, none present */
67 return false;
68 } else
69 return !(flags & TUNNEL_KEY);
72 /* Fallback tunnel: no source, no destination, no key, no options
74 Tunnel hash table:
75 We require exact key match i.e. if a key is present in packet
76 it will match only tunnel with the same key; if it is not present,
77 it will match only keyless tunnel.
79 All keysless packets, if not matched configured keyless tunnels
80 will match fallback tunnel.
81 Given src, dst and key, find appropriate for input tunnel.
83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 int link, __be16 flags,
85 __be32 remote, __be32 local,
86 __be32 key)
88 unsigned int hash;
89 struct ip_tunnel *t, *cand = NULL;
90 struct hlist_head *head;
92 hash = ip_tunnel_hash(key, remote);
93 head = &itn->tunnels[hash];
95 hlist_for_each_entry_rcu(t, head, hash_node) {
96 if (local != t->parms.iph.saddr ||
97 remote != t->parms.iph.daddr ||
98 !(t->dev->flags & IFF_UP))
99 continue;
101 if (!ip_tunnel_key_match(&t->parms, flags, key))
102 continue;
104 if (t->parms.link == link)
105 return t;
106 else
107 cand = t;
110 hlist_for_each_entry_rcu(t, head, hash_node) {
111 if (remote != t->parms.iph.daddr ||
112 t->parms.iph.saddr != 0 ||
113 !(t->dev->flags & IFF_UP))
114 continue;
116 if (!ip_tunnel_key_match(&t->parms, flags, key))
117 continue;
119 if (t->parms.link == link)
120 return t;
121 else if (!cand)
122 cand = t;
125 hash = ip_tunnel_hash(key, 0);
126 head = &itn->tunnels[hash];
128 hlist_for_each_entry_rcu(t, head, hash_node) {
129 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
130 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
131 continue;
133 if (!(t->dev->flags & IFF_UP))
134 continue;
136 if (!ip_tunnel_key_match(&t->parms, flags, key))
137 continue;
139 if (t->parms.link == link)
140 return t;
141 else if (!cand)
142 cand = t;
145 hlist_for_each_entry_rcu(t, head, hash_node) {
146 if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
147 t->parms.iph.saddr != 0 ||
148 t->parms.iph.daddr != 0 ||
149 !(t->dev->flags & IFF_UP))
150 continue;
152 if (t->parms.link == link)
153 return t;
154 else if (!cand)
155 cand = t;
158 if (cand)
159 return cand;
161 t = rcu_dereference(itn->collect_md_tun);
162 if (t && t->dev->flags & IFF_UP)
163 return t;
165 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
166 return netdev_priv(itn->fb_tunnel_dev);
168 return NULL;
170 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
172 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
173 struct ip_tunnel_parm *parms)
175 unsigned int h;
176 __be32 remote;
177 __be32 i_key = parms->i_key;
179 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
180 remote = parms->iph.daddr;
181 else
182 remote = 0;
184 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
185 i_key = 0;
187 h = ip_tunnel_hash(i_key, remote);
188 return &itn->tunnels[h];
191 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
193 struct hlist_head *head = ip_bucket(itn, &t->parms);
195 if (t->collect_md)
196 rcu_assign_pointer(itn->collect_md_tun, t);
197 hlist_add_head_rcu(&t->hash_node, head);
200 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
202 if (t->collect_md)
203 rcu_assign_pointer(itn->collect_md_tun, NULL);
204 hlist_del_init_rcu(&t->hash_node);
207 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
208 struct ip_tunnel_parm *parms,
209 int type)
211 __be32 remote = parms->iph.daddr;
212 __be32 local = parms->iph.saddr;
213 __be32 key = parms->i_key;
214 __be16 flags = parms->i_flags;
215 int link = parms->link;
216 struct ip_tunnel *t = NULL;
217 struct hlist_head *head = ip_bucket(itn, parms);
219 hlist_for_each_entry_rcu(t, head, hash_node) {
220 if (local == t->parms.iph.saddr &&
221 remote == t->parms.iph.daddr &&
222 link == t->parms.link &&
223 type == t->dev->type &&
224 ip_tunnel_key_match(&t->parms, flags, key))
225 break;
227 return t;
230 static struct net_device *__ip_tunnel_create(struct net *net,
231 const struct rtnl_link_ops *ops,
232 struct ip_tunnel_parm *parms)
234 int err;
235 struct ip_tunnel *tunnel;
236 struct net_device *dev;
237 char name[IFNAMSIZ];
239 err = -E2BIG;
240 if (parms->name[0]) {
241 if (!dev_valid_name(parms->name))
242 goto failed;
243 strlcpy(name, parms->name, IFNAMSIZ);
244 } else {
245 if (strlen(ops->kind) > (IFNAMSIZ - 3))
246 goto failed;
247 strcpy(name, ops->kind);
248 strcat(name, "%d");
251 ASSERT_RTNL();
252 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
253 if (!dev) {
254 err = -ENOMEM;
255 goto failed;
257 dev_net_set(dev, net);
259 dev->rtnl_link_ops = ops;
261 tunnel = netdev_priv(dev);
262 tunnel->parms = *parms;
263 tunnel->net = net;
265 err = register_netdevice(dev);
266 if (err)
267 goto failed_free;
269 return dev;
271 failed_free:
272 free_netdev(dev);
273 failed:
274 return ERR_PTR(err);
277 static int ip_tunnel_bind_dev(struct net_device *dev)
279 struct net_device *tdev = NULL;
280 struct ip_tunnel *tunnel = netdev_priv(dev);
281 const struct iphdr *iph;
282 int hlen = LL_MAX_HEADER;
283 int mtu = ETH_DATA_LEN;
284 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
286 iph = &tunnel->parms.iph;
288 /* Guess output device to choose reasonable mtu and needed_headroom */
289 if (iph->daddr) {
290 struct flowi4 fl4;
291 struct rtable *rt;
293 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
294 iph->saddr, tunnel->parms.o_key,
295 RT_TOS(iph->tos), tunnel->parms.link,
296 tunnel->fwmark, 0);
297 rt = ip_route_output_key(tunnel->net, &fl4);
299 if (!IS_ERR(rt)) {
300 tdev = rt->dst.dev;
301 ip_rt_put(rt);
303 if (dev->type != ARPHRD_ETHER)
304 dev->flags |= IFF_POINTOPOINT;
306 dst_cache_reset(&tunnel->dst_cache);
309 if (!tdev && tunnel->parms.link)
310 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
312 if (tdev) {
313 hlen = tdev->hard_header_len + tdev->needed_headroom;
314 mtu = min(tdev->mtu, IP_MAX_MTU);
317 dev->needed_headroom = t_hlen + hlen;
318 mtu -= (dev->hard_header_len + t_hlen);
320 if (mtu < IPV4_MIN_MTU)
321 mtu = IPV4_MIN_MTU;
323 return mtu;
326 static struct ip_tunnel *ip_tunnel_create(struct net *net,
327 struct ip_tunnel_net *itn,
328 struct ip_tunnel_parm *parms)
330 struct ip_tunnel *nt;
331 struct net_device *dev;
332 int t_hlen;
333 int mtu;
334 int err;
336 dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
337 if (IS_ERR(dev))
338 return ERR_CAST(dev);
340 mtu = ip_tunnel_bind_dev(dev);
341 err = dev_set_mtu(dev, mtu);
342 if (err)
343 goto err_dev_set_mtu;
345 nt = netdev_priv(dev);
346 t_hlen = nt->hlen + sizeof(struct iphdr);
347 dev->min_mtu = ETH_MIN_MTU;
348 dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
349 ip_tunnel_add(itn, nt);
350 return nt;
352 err_dev_set_mtu:
353 unregister_netdevice(dev);
354 return ERR_PTR(err);
357 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
358 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
359 bool log_ecn_error)
361 struct pcpu_sw_netstats *tstats;
362 const struct iphdr *iph = ip_hdr(skb);
363 int err;
365 #ifdef CONFIG_NET_IPGRE_BROADCAST
366 if (ipv4_is_multicast(iph->daddr)) {
367 tunnel->dev->stats.multicast++;
368 skb->pkt_type = PACKET_BROADCAST;
370 #endif
372 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
373 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
374 tunnel->dev->stats.rx_crc_errors++;
375 tunnel->dev->stats.rx_errors++;
376 goto drop;
379 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
380 if (!(tpi->flags&TUNNEL_SEQ) ||
381 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
382 tunnel->dev->stats.rx_fifo_errors++;
383 tunnel->dev->stats.rx_errors++;
384 goto drop;
386 tunnel->i_seqno = ntohl(tpi->seq) + 1;
389 skb_reset_network_header(skb);
391 err = IP_ECN_decapsulate(iph, skb);
392 if (unlikely(err)) {
393 if (log_ecn_error)
394 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
395 &iph->saddr, iph->tos);
396 if (err > 1) {
397 ++tunnel->dev->stats.rx_frame_errors;
398 ++tunnel->dev->stats.rx_errors;
399 goto drop;
403 tstats = this_cpu_ptr(tunnel->dev->tstats);
404 u64_stats_update_begin(&tstats->syncp);
405 tstats->rx_packets++;
406 tstats->rx_bytes += skb->len;
407 u64_stats_update_end(&tstats->syncp);
409 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
411 if (tunnel->dev->type == ARPHRD_ETHER) {
412 skb->protocol = eth_type_trans(skb, tunnel->dev);
413 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
414 } else {
415 skb->dev = tunnel->dev;
418 if (tun_dst)
419 skb_dst_set(skb, (struct dst_entry *)tun_dst);
421 gro_cells_receive(&tunnel->gro_cells, skb);
422 return 0;
424 drop:
425 if (tun_dst)
426 dst_release((struct dst_entry *)tun_dst);
427 kfree_skb(skb);
428 return 0;
430 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
432 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
433 unsigned int num)
435 if (num >= MAX_IPTUN_ENCAP_OPS)
436 return -ERANGE;
438 return !cmpxchg((const struct ip_tunnel_encap_ops **)
439 &iptun_encaps[num],
440 NULL, ops) ? 0 : -1;
442 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
444 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
445 unsigned int num)
447 int ret;
449 if (num >= MAX_IPTUN_ENCAP_OPS)
450 return -ERANGE;
452 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
453 &iptun_encaps[num],
454 ops, NULL) == ops) ? 0 : -1;
456 synchronize_net();
458 return ret;
460 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
462 int ip_tunnel_encap_setup(struct ip_tunnel *t,
463 struct ip_tunnel_encap *ipencap)
465 int hlen;
467 memset(&t->encap, 0, sizeof(t->encap));
469 hlen = ip_encap_hlen(ipencap);
470 if (hlen < 0)
471 return hlen;
473 t->encap.type = ipencap->type;
474 t->encap.sport = ipencap->sport;
475 t->encap.dport = ipencap->dport;
476 t->encap.flags = ipencap->flags;
478 t->encap_hlen = hlen;
479 t->hlen = t->encap_hlen + t->tun_hlen;
481 return 0;
483 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
485 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
486 struct rtable *rt, __be16 df,
487 const struct iphdr *inner_iph,
488 int tunnel_hlen, __be32 dst, bool md)
490 struct ip_tunnel *tunnel = netdev_priv(dev);
491 int pkt_size;
492 int mtu;
494 tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
495 pkt_size = skb->len - tunnel_hlen - dev->hard_header_len;
497 if (df)
498 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
499 - sizeof(struct iphdr) - tunnel_hlen;
500 else
501 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
503 if (skb_valid_dst(skb))
504 skb_dst_update_pmtu_no_confirm(skb, mtu);
506 if (skb->protocol == htons(ETH_P_IP)) {
507 if (!skb_is_gso(skb) &&
508 (inner_iph->frag_off & htons(IP_DF)) &&
509 mtu < pkt_size) {
510 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
511 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
512 return -E2BIG;
515 #if IS_ENABLED(CONFIG_IPV6)
516 else if (skb->protocol == htons(ETH_P_IPV6)) {
517 struct rt6_info *rt6;
518 __be32 daddr;
520 rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
521 NULL;
522 daddr = md ? dst : tunnel->parms.iph.daddr;
524 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
525 mtu >= IPV6_MIN_MTU) {
526 if ((daddr && !ipv4_is_multicast(daddr)) ||
527 rt6->rt6i_dst.plen == 128) {
528 rt6->rt6i_flags |= RTF_MODIFIED;
529 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
533 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
534 mtu < pkt_size) {
535 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
536 return -E2BIG;
539 #endif
540 return 0;
543 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
544 u8 proto, int tunnel_hlen)
546 struct ip_tunnel *tunnel = netdev_priv(dev);
547 u32 headroom = sizeof(struct iphdr);
548 struct ip_tunnel_info *tun_info;
549 const struct ip_tunnel_key *key;
550 const struct iphdr *inner_iph;
551 struct rtable *rt = NULL;
552 struct flowi4 fl4;
553 __be16 df = 0;
554 u8 tos, ttl;
555 bool use_cache;
557 tun_info = skb_tunnel_info(skb);
558 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
559 ip_tunnel_info_af(tun_info) != AF_INET))
560 goto tx_error;
561 key = &tun_info->key;
562 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
563 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
564 tos = key->tos;
565 if (tos == 1) {
566 if (skb->protocol == htons(ETH_P_IP))
567 tos = inner_iph->tos;
568 else if (skb->protocol == htons(ETH_P_IPV6))
569 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
571 ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
572 tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
573 0, skb->mark, skb_get_hash(skb));
574 if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
575 goto tx_error;
577 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
578 if (use_cache)
579 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
580 if (!rt) {
581 rt = ip_route_output_key(tunnel->net, &fl4);
582 if (IS_ERR(rt)) {
583 dev->stats.tx_carrier_errors++;
584 goto tx_error;
586 if (use_cache)
587 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
588 fl4.saddr);
590 if (rt->dst.dev == dev) {
591 ip_rt_put(rt);
592 dev->stats.collisions++;
593 goto tx_error;
596 if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
597 df = htons(IP_DF);
598 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
599 key->u.ipv4.dst, true)) {
600 ip_rt_put(rt);
601 goto tx_error;
604 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
605 ttl = key->ttl;
606 if (ttl == 0) {
607 if (skb->protocol == htons(ETH_P_IP))
608 ttl = inner_iph->ttl;
609 else if (skb->protocol == htons(ETH_P_IPV6))
610 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
611 else
612 ttl = ip4_dst_hoplimit(&rt->dst);
615 if (!df && skb->protocol == htons(ETH_P_IP))
616 df = inner_iph->frag_off & htons(IP_DF);
618 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
619 if (headroom > dev->needed_headroom)
620 dev->needed_headroom = headroom;
622 if (skb_cow_head(skb, dev->needed_headroom)) {
623 ip_rt_put(rt);
624 goto tx_dropped;
626 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
627 df, !net_eq(tunnel->net, dev_net(dev)));
628 return;
629 tx_error:
630 dev->stats.tx_errors++;
631 goto kfree;
632 tx_dropped:
633 dev->stats.tx_dropped++;
634 kfree:
635 kfree_skb(skb);
637 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
639 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
640 const struct iphdr *tnl_params, u8 protocol)
642 struct ip_tunnel *tunnel = netdev_priv(dev);
643 struct ip_tunnel_info *tun_info = NULL;
644 const struct iphdr *inner_iph;
645 unsigned int max_headroom; /* The extra header space needed */
646 struct rtable *rt = NULL; /* Route to the other host */
647 bool use_cache = false;
648 struct flowi4 fl4;
649 bool md = false;
650 bool connected;
651 u8 tos, ttl;
652 __be32 dst;
653 __be16 df;
655 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
656 connected = (tunnel->parms.iph.daddr != 0);
658 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
660 dst = tnl_params->daddr;
661 if (dst == 0) {
662 /* NBMA tunnel */
664 if (!skb_dst(skb)) {
665 dev->stats.tx_fifo_errors++;
666 goto tx_error;
669 tun_info = skb_tunnel_info(skb);
670 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
671 ip_tunnel_info_af(tun_info) == AF_INET &&
672 tun_info->key.u.ipv4.dst) {
673 dst = tun_info->key.u.ipv4.dst;
674 md = true;
675 connected = true;
677 else if (skb->protocol == htons(ETH_P_IP)) {
678 rt = skb_rtable(skb);
679 dst = rt_nexthop(rt, inner_iph->daddr);
681 #if IS_ENABLED(CONFIG_IPV6)
682 else if (skb->protocol == htons(ETH_P_IPV6)) {
683 const struct in6_addr *addr6;
684 struct neighbour *neigh;
685 bool do_tx_error_icmp;
686 int addr_type;
688 neigh = dst_neigh_lookup(skb_dst(skb),
689 &ipv6_hdr(skb)->daddr);
690 if (!neigh)
691 goto tx_error;
693 addr6 = (const struct in6_addr *)&neigh->primary_key;
694 addr_type = ipv6_addr_type(addr6);
696 if (addr_type == IPV6_ADDR_ANY) {
697 addr6 = &ipv6_hdr(skb)->daddr;
698 addr_type = ipv6_addr_type(addr6);
701 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
702 do_tx_error_icmp = true;
703 else {
704 do_tx_error_icmp = false;
705 dst = addr6->s6_addr32[3];
707 neigh_release(neigh);
708 if (do_tx_error_icmp)
709 goto tx_error_icmp;
711 #endif
712 else
713 goto tx_error;
715 if (!md)
716 connected = false;
719 tos = tnl_params->tos;
720 if (tos & 0x1) {
721 tos &= ~0x1;
722 if (skb->protocol == htons(ETH_P_IP)) {
723 tos = inner_iph->tos;
724 connected = false;
725 } else if (skb->protocol == htons(ETH_P_IPV6)) {
726 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
727 connected = false;
731 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
732 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
733 tunnel->fwmark, skb_get_hash(skb));
735 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
736 goto tx_error;
738 if (connected && md) {
739 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
740 if (use_cache)
741 rt = dst_cache_get_ip4(&tun_info->dst_cache,
742 &fl4.saddr);
743 } else {
744 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
745 &fl4.saddr) : NULL;
748 if (!rt) {
749 rt = ip_route_output_key(tunnel->net, &fl4);
751 if (IS_ERR(rt)) {
752 dev->stats.tx_carrier_errors++;
753 goto tx_error;
755 if (use_cache)
756 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
757 fl4.saddr);
758 else if (!md && connected)
759 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
760 fl4.saddr);
763 if (rt->dst.dev == dev) {
764 ip_rt_put(rt);
765 dev->stats.collisions++;
766 goto tx_error;
769 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph,
770 0, 0, false)) {
771 ip_rt_put(rt);
772 goto tx_error;
775 if (tunnel->err_count > 0) {
776 if (time_before(jiffies,
777 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
778 tunnel->err_count--;
780 dst_link_failure(skb);
781 } else
782 tunnel->err_count = 0;
785 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
786 ttl = tnl_params->ttl;
787 if (ttl == 0) {
788 if (skb->protocol == htons(ETH_P_IP))
789 ttl = inner_iph->ttl;
790 #if IS_ENABLED(CONFIG_IPV6)
791 else if (skb->protocol == htons(ETH_P_IPV6))
792 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
793 #endif
794 else
795 ttl = ip4_dst_hoplimit(&rt->dst);
798 df = tnl_params->frag_off;
799 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
800 df |= (inner_iph->frag_off&htons(IP_DF));
802 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
803 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
804 if (max_headroom > dev->needed_headroom)
805 dev->needed_headroom = max_headroom;
807 if (skb_cow_head(skb, dev->needed_headroom)) {
808 ip_rt_put(rt);
809 dev->stats.tx_dropped++;
810 kfree_skb(skb);
811 return;
814 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
815 df, !net_eq(tunnel->net, dev_net(dev)));
816 return;
818 #if IS_ENABLED(CONFIG_IPV6)
819 tx_error_icmp:
820 dst_link_failure(skb);
821 #endif
822 tx_error:
823 dev->stats.tx_errors++;
824 kfree_skb(skb);
826 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
828 static void ip_tunnel_update(struct ip_tunnel_net *itn,
829 struct ip_tunnel *t,
830 struct net_device *dev,
831 struct ip_tunnel_parm *p,
832 bool set_mtu,
833 __u32 fwmark)
835 ip_tunnel_del(itn, t);
836 t->parms.iph.saddr = p->iph.saddr;
837 t->parms.iph.daddr = p->iph.daddr;
838 t->parms.i_key = p->i_key;
839 t->parms.o_key = p->o_key;
840 if (dev->type != ARPHRD_ETHER) {
841 memcpy(dev->dev_addr, &p->iph.saddr, 4);
842 memcpy(dev->broadcast, &p->iph.daddr, 4);
844 ip_tunnel_add(itn, t);
846 t->parms.iph.ttl = p->iph.ttl;
847 t->parms.iph.tos = p->iph.tos;
848 t->parms.iph.frag_off = p->iph.frag_off;
850 if (t->parms.link != p->link || t->fwmark != fwmark) {
851 int mtu;
853 t->parms.link = p->link;
854 t->fwmark = fwmark;
855 mtu = ip_tunnel_bind_dev(dev);
856 if (set_mtu)
857 dev->mtu = mtu;
859 dst_cache_reset(&t->dst_cache);
860 netdev_state_change(dev);
863 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
865 int err = 0;
866 struct ip_tunnel *t = netdev_priv(dev);
867 struct net *net = t->net;
868 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
870 switch (cmd) {
871 case SIOCGETTUNNEL:
872 if (dev == itn->fb_tunnel_dev) {
873 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
874 if (!t)
875 t = netdev_priv(dev);
877 memcpy(p, &t->parms, sizeof(*p));
878 break;
880 case SIOCADDTUNNEL:
881 case SIOCCHGTUNNEL:
882 err = -EPERM;
883 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
884 goto done;
885 if (p->iph.ttl)
886 p->iph.frag_off |= htons(IP_DF);
887 if (!(p->i_flags & VTI_ISVTI)) {
888 if (!(p->i_flags & TUNNEL_KEY))
889 p->i_key = 0;
890 if (!(p->o_flags & TUNNEL_KEY))
891 p->o_key = 0;
894 t = ip_tunnel_find(itn, p, itn->type);
896 if (cmd == SIOCADDTUNNEL) {
897 if (!t) {
898 t = ip_tunnel_create(net, itn, p);
899 err = PTR_ERR_OR_ZERO(t);
900 break;
903 err = -EEXIST;
904 break;
906 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
907 if (t) {
908 if (t->dev != dev) {
909 err = -EEXIST;
910 break;
912 } else {
913 unsigned int nflags = 0;
915 if (ipv4_is_multicast(p->iph.daddr))
916 nflags = IFF_BROADCAST;
917 else if (p->iph.daddr)
918 nflags = IFF_POINTOPOINT;
920 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
921 err = -EINVAL;
922 break;
925 t = netdev_priv(dev);
929 if (t) {
930 err = 0;
931 ip_tunnel_update(itn, t, dev, p, true, 0);
932 } else {
933 err = -ENOENT;
935 break;
937 case SIOCDELTUNNEL:
938 err = -EPERM;
939 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
940 goto done;
942 if (dev == itn->fb_tunnel_dev) {
943 err = -ENOENT;
944 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
945 if (!t)
946 goto done;
947 err = -EPERM;
948 if (t == netdev_priv(itn->fb_tunnel_dev))
949 goto done;
950 dev = t->dev;
952 unregister_netdevice(dev);
953 err = 0;
954 break;
956 default:
957 err = -EINVAL;
960 done:
961 return err;
963 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
965 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
967 struct ip_tunnel *tunnel = netdev_priv(dev);
968 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
969 int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
971 if (new_mtu < ETH_MIN_MTU)
972 return -EINVAL;
974 if (new_mtu > max_mtu) {
975 if (strict)
976 return -EINVAL;
978 new_mtu = max_mtu;
981 dev->mtu = new_mtu;
982 return 0;
984 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
986 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
988 return __ip_tunnel_change_mtu(dev, new_mtu, true);
990 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
992 static void ip_tunnel_dev_free(struct net_device *dev)
994 struct ip_tunnel *tunnel = netdev_priv(dev);
996 gro_cells_destroy(&tunnel->gro_cells);
997 dst_cache_destroy(&tunnel->dst_cache);
998 free_percpu(dev->tstats);
1001 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1003 struct ip_tunnel *tunnel = netdev_priv(dev);
1004 struct ip_tunnel_net *itn;
1006 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1008 if (itn->fb_tunnel_dev != dev) {
1009 ip_tunnel_del(itn, netdev_priv(dev));
1010 unregister_netdevice_queue(dev, head);
1013 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1015 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1017 struct ip_tunnel *tunnel = netdev_priv(dev);
1019 return tunnel->net;
1021 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1023 int ip_tunnel_get_iflink(const struct net_device *dev)
1025 struct ip_tunnel *tunnel = netdev_priv(dev);
1027 return tunnel->parms.link;
1029 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1031 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1032 struct rtnl_link_ops *ops, char *devname)
1034 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1035 struct ip_tunnel_parm parms;
1036 unsigned int i;
1038 itn->rtnl_link_ops = ops;
1039 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1040 INIT_HLIST_HEAD(&itn->tunnels[i]);
1042 if (!ops || !net_has_fallback_tunnels(net)) {
1043 struct ip_tunnel_net *it_init_net;
1045 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1046 itn->type = it_init_net->type;
1047 itn->fb_tunnel_dev = NULL;
1048 return 0;
1051 memset(&parms, 0, sizeof(parms));
1052 if (devname)
1053 strlcpy(parms.name, devname, IFNAMSIZ);
1055 rtnl_lock();
1056 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1057 /* FB netdevice is special: we have one, and only one per netns.
1058 * Allowing to move it to another netns is clearly unsafe.
1060 if (!IS_ERR(itn->fb_tunnel_dev)) {
1061 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1062 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1063 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1064 itn->type = itn->fb_tunnel_dev->type;
1066 rtnl_unlock();
1068 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1070 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1072 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1073 struct list_head *head,
1074 struct rtnl_link_ops *ops)
1076 struct net_device *dev, *aux;
1077 int h;
1079 for_each_netdev_safe(net, dev, aux)
1080 if (dev->rtnl_link_ops == ops)
1081 unregister_netdevice_queue(dev, head);
1083 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1084 struct ip_tunnel *t;
1085 struct hlist_node *n;
1086 struct hlist_head *thead = &itn->tunnels[h];
1088 hlist_for_each_entry_safe(t, n, thead, hash_node)
1089 /* If dev is in the same netns, it has already
1090 * been added to the list by the previous loop.
1092 if (!net_eq(dev_net(t->dev), net))
1093 unregister_netdevice_queue(t->dev, head);
1097 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1098 struct rtnl_link_ops *ops)
1100 struct ip_tunnel_net *itn;
1101 struct net *net;
1102 LIST_HEAD(list);
1104 rtnl_lock();
1105 list_for_each_entry(net, net_list, exit_list) {
1106 itn = net_generic(net, id);
1107 ip_tunnel_destroy(net, itn, &list, ops);
1109 unregister_netdevice_many(&list);
1110 rtnl_unlock();
1112 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1114 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1115 struct ip_tunnel_parm *p, __u32 fwmark)
1117 struct ip_tunnel *nt;
1118 struct net *net = dev_net(dev);
1119 struct ip_tunnel_net *itn;
1120 int mtu;
1121 int err;
1123 nt = netdev_priv(dev);
1124 itn = net_generic(net, nt->ip_tnl_net_id);
1126 if (nt->collect_md) {
1127 if (rtnl_dereference(itn->collect_md_tun))
1128 return -EEXIST;
1129 } else {
1130 if (ip_tunnel_find(itn, p, dev->type))
1131 return -EEXIST;
1134 nt->net = net;
1135 nt->parms = *p;
1136 nt->fwmark = fwmark;
1137 err = register_netdevice(dev);
1138 if (err)
1139 goto err_register_netdevice;
1141 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1142 eth_hw_addr_random(dev);
1144 mtu = ip_tunnel_bind_dev(dev);
1145 if (tb[IFLA_MTU]) {
1146 unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
1148 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
1149 (unsigned int)(max - sizeof(struct iphdr)));
1152 err = dev_set_mtu(dev, mtu);
1153 if (err)
1154 goto err_dev_set_mtu;
1156 ip_tunnel_add(itn, nt);
1157 return 0;
1159 err_dev_set_mtu:
1160 unregister_netdevice(dev);
1161 err_register_netdevice:
1162 return err;
1164 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1166 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1167 struct ip_tunnel_parm *p, __u32 fwmark)
1169 struct ip_tunnel *t;
1170 struct ip_tunnel *tunnel = netdev_priv(dev);
1171 struct net *net = tunnel->net;
1172 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1174 if (dev == itn->fb_tunnel_dev)
1175 return -EINVAL;
1177 t = ip_tunnel_find(itn, p, dev->type);
1179 if (t) {
1180 if (t->dev != dev)
1181 return -EEXIST;
1182 } else {
1183 t = tunnel;
1185 if (dev->type != ARPHRD_ETHER) {
1186 unsigned int nflags = 0;
1188 if (ipv4_is_multicast(p->iph.daddr))
1189 nflags = IFF_BROADCAST;
1190 else if (p->iph.daddr)
1191 nflags = IFF_POINTOPOINT;
1193 if ((dev->flags ^ nflags) &
1194 (IFF_POINTOPOINT | IFF_BROADCAST))
1195 return -EINVAL;
1199 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1200 return 0;
1202 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1204 int ip_tunnel_init(struct net_device *dev)
1206 struct ip_tunnel *tunnel = netdev_priv(dev);
1207 struct iphdr *iph = &tunnel->parms.iph;
1208 int err;
1210 dev->needs_free_netdev = true;
1211 dev->priv_destructor = ip_tunnel_dev_free;
1212 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1213 if (!dev->tstats)
1214 return -ENOMEM;
1216 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1217 if (err) {
1218 free_percpu(dev->tstats);
1219 return err;
1222 err = gro_cells_init(&tunnel->gro_cells, dev);
1223 if (err) {
1224 dst_cache_destroy(&tunnel->dst_cache);
1225 free_percpu(dev->tstats);
1226 return err;
1229 tunnel->dev = dev;
1230 tunnel->net = dev_net(dev);
1231 strcpy(tunnel->parms.name, dev->name);
1232 iph->version = 4;
1233 iph->ihl = 5;
1235 if (tunnel->collect_md)
1236 netif_keep_dst(dev);
1237 return 0;
1239 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1241 void ip_tunnel_uninit(struct net_device *dev)
1243 struct ip_tunnel *tunnel = netdev_priv(dev);
1244 struct net *net = tunnel->net;
1245 struct ip_tunnel_net *itn;
1247 itn = net_generic(net, tunnel->ip_tnl_net_id);
1248 /* fb_tunnel_dev will be unregisted in net-exit call. */
1249 if (itn->fb_tunnel_dev != dev)
1250 ip_tunnel_del(itn, netdev_priv(dev));
1252 dst_cache_reset(&tunnel->dst_cache);
1254 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1256 /* Do least required initialization, rest of init is done in tunnel_init call */
1257 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1259 struct ip_tunnel *tunnel = netdev_priv(dev);
1260 tunnel->ip_tnl_net_id = net_id;
1262 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1264 MODULE_LICENSE("GPL");