2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
50 #include <net/dst_metadata.h>
51 #include <net/erspan.h>
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is a good
66 solution, but it supposes maintaining new variable in ALL
67 skb, even if no tunneling is used.
69 Current solution: xmit_recursion breaks dead loops. This is a percpu
70 counter, since when we enter the first ndo_xmit(), cpu migration is
71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, ttl is not solution at all.
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 rapidly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
110 static bool log_ecn_error
= true;
111 module_param(log_ecn_error
, bool, 0644);
112 MODULE_PARM_DESC(log_ecn_error
, "Log packets received with corrupted ECN");
114 static struct rtnl_link_ops ipgre_link_ops __read_mostly
;
115 static int ipgre_tunnel_init(struct net_device
*dev
);
116 static void erspan_build_header(struct sk_buff
*skb
,
118 bool truncate
, bool is_ipv4
);
120 static unsigned int ipgre_net_id __read_mostly
;
121 static unsigned int gre_tap_net_id __read_mostly
;
122 static unsigned int erspan_net_id __read_mostly
;
124 static int ipgre_err(struct sk_buff
*skb
, u32 info
,
125 const struct tnl_ptk_info
*tpi
)
128 /* All the routers (except for Linux) return only
129 8 bytes of packet payload. It means, that precise relaying of
130 ICMP in the real Internet is absolutely infeasible.
132 Moreover, Cisco "wise men" put GRE key to the third word
133 in GRE header. It makes impossible maintaining even soft
134 state for keyed GRE tunnels with enabled checksum. Tell
137 Well, I wonder, rfc1812 was written by Cisco employee,
138 what the hell these idiots break standards established
141 struct net
*net
= dev_net(skb
->dev
);
142 struct ip_tunnel_net
*itn
;
143 const struct iphdr
*iph
;
144 const int type
= icmp_hdr(skb
)->type
;
145 const int code
= icmp_hdr(skb
)->code
;
146 unsigned int data_len
= 0;
149 if (tpi
->proto
== htons(ETH_P_TEB
))
150 itn
= net_generic(net
, gre_tap_net_id
);
151 else if (tpi
->proto
== htons(ETH_P_ERSPAN
) ||
152 tpi
->proto
== htons(ETH_P_ERSPAN2
))
153 itn
= net_generic(net
, erspan_net_id
);
155 itn
= net_generic(net
, ipgre_net_id
);
157 iph
= (const struct iphdr
*)(icmp_hdr(skb
) + 1);
158 t
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, tpi
->flags
,
159 iph
->daddr
, iph
->saddr
, tpi
->key
);
166 case ICMP_PARAMETERPROB
:
169 case ICMP_DEST_UNREACH
:
172 case ICMP_PORT_UNREACH
:
173 /* Impossible event. */
176 /* All others are translated to HOST_UNREACH.
177 rfc2003 contains "deep thoughts" about NET_UNREACH,
178 I believe they are just ether pollution. --ANK
184 case ICMP_TIME_EXCEEDED
:
185 if (code
!= ICMP_EXC_TTL
)
187 data_len
= icmp_hdr(skb
)->un
.reserved
[1] * 4; /* RFC 4884 4.1 */
194 #if IS_ENABLED(CONFIG_IPV6)
195 if (tpi
->proto
== htons(ETH_P_IPV6
) &&
196 !ip6_err_gen_icmpv6_unreach(skb
, iph
->ihl
* 4 + tpi
->hdr_len
,
201 if (t
->parms
.iph
.daddr
== 0 ||
202 ipv4_is_multicast(t
->parms
.iph
.daddr
))
205 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
208 if (time_before(jiffies
, t
->err_time
+ IPTUNNEL_ERR_TIMEO
))
212 t
->err_time
= jiffies
;
217 static void gre_err(struct sk_buff
*skb
, u32 info
)
219 /* All the routers (except for Linux) return only
220 * 8 bytes of packet payload. It means, that precise relaying of
221 * ICMP in the real Internet is absolutely infeasible.
223 * Moreover, Cisco "wise men" put GRE key to the third word
224 * in GRE header. It makes impossible maintaining even soft
226 * GRE tunnels with enabled checksum. Tell them "thank you".
228 * Well, I wonder, rfc1812 was written by Cisco employee,
229 * what the hell these idiots break standards established
233 const struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
234 const int type
= icmp_hdr(skb
)->type
;
235 const int code
= icmp_hdr(skb
)->code
;
236 struct tnl_ptk_info tpi
;
238 if (gre_parse_header(skb
, &tpi
, NULL
, htons(ETH_P_IP
),
242 if (type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
) {
243 ipv4_update_pmtu(skb
, dev_net(skb
->dev
), info
,
244 skb
->dev
->ifindex
, IPPROTO_GRE
);
247 if (type
== ICMP_REDIRECT
) {
248 ipv4_redirect(skb
, dev_net(skb
->dev
), skb
->dev
->ifindex
,
253 ipgre_err(skb
, info
, &tpi
);
256 static int erspan_rcv(struct sk_buff
*skb
, struct tnl_ptk_info
*tpi
,
259 struct net
*net
= dev_net(skb
->dev
);
260 struct metadata_dst
*tun_dst
= NULL
;
261 struct erspan_base_hdr
*ershdr
;
262 struct erspan_metadata
*pkt_md
;
263 struct ip_tunnel_net
*itn
;
264 struct ip_tunnel
*tunnel
;
265 const struct iphdr
*iph
;
266 struct erspan_md2
*md2
;
270 itn
= net_generic(net
, erspan_net_id
);
273 ershdr
= (struct erspan_base_hdr
*)(skb
->data
+ gre_hdr_len
);
276 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
,
277 tpi
->flags
| TUNNEL_KEY
,
278 iph
->saddr
, iph
->daddr
, tpi
->key
);
281 len
= gre_hdr_len
+ erspan_hdr_len(ver
);
282 if (unlikely(!pskb_may_pull(skb
, len
)))
283 return PACKET_REJECT
;
285 ershdr
= (struct erspan_base_hdr
*)(skb
->data
+ gre_hdr_len
);
286 pkt_md
= (struct erspan_metadata
*)(ershdr
+ 1);
288 if (__iptunnel_pull_header(skb
,
294 if (tunnel
->collect_md
) {
295 struct ip_tunnel_info
*info
;
296 struct erspan_metadata
*md
;
300 tpi
->flags
|= TUNNEL_KEY
;
302 tun_id
= key32_to_tunnel_id(tpi
->key
);
304 tun_dst
= ip_tun_rx_dst(skb
, flags
,
305 tun_id
, sizeof(*md
));
307 return PACKET_REJECT
;
309 md
= ip_tunnel_info_opts(&tun_dst
->u
.tun_info
);
312 memcpy(md2
, pkt_md
, ver
== 1 ? ERSPAN_V1_MDSIZE
:
315 info
= &tun_dst
->u
.tun_info
;
316 info
->key
.tun_flags
|= TUNNEL_ERSPAN_OPT
;
317 info
->options_len
= sizeof(*md
);
320 skb_reset_mac_header(skb
);
321 ip_tunnel_rcv(tunnel
, skb
, tpi
, tun_dst
, log_ecn_error
);
324 return PACKET_REJECT
;
331 static int __ipgre_rcv(struct sk_buff
*skb
, const struct tnl_ptk_info
*tpi
,
332 struct ip_tunnel_net
*itn
, int hdr_len
, bool raw_proto
)
334 struct metadata_dst
*tun_dst
= NULL
;
335 const struct iphdr
*iph
;
336 struct ip_tunnel
*tunnel
;
339 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, tpi
->flags
,
340 iph
->saddr
, iph
->daddr
, tpi
->key
);
343 if (__iptunnel_pull_header(skb
, hdr_len
, tpi
->proto
,
344 raw_proto
, false) < 0)
347 if (tunnel
->dev
->type
!= ARPHRD_NONE
)
348 skb_pop_mac_header(skb
);
350 skb_reset_mac_header(skb
);
351 if (tunnel
->collect_md
) {
355 flags
= tpi
->flags
& (TUNNEL_CSUM
| TUNNEL_KEY
);
356 tun_id
= key32_to_tunnel_id(tpi
->key
);
357 tun_dst
= ip_tun_rx_dst(skb
, flags
, tun_id
, 0);
359 return PACKET_REJECT
;
362 ip_tunnel_rcv(tunnel
, skb
, tpi
, tun_dst
, log_ecn_error
);
372 static int ipgre_rcv(struct sk_buff
*skb
, const struct tnl_ptk_info
*tpi
,
375 struct net
*net
= dev_net(skb
->dev
);
376 struct ip_tunnel_net
*itn
;
379 if (tpi
->proto
== htons(ETH_P_TEB
))
380 itn
= net_generic(net
, gre_tap_net_id
);
382 itn
= net_generic(net
, ipgre_net_id
);
384 res
= __ipgre_rcv(skb
, tpi
, itn
, hdr_len
, false);
385 if (res
== PACKET_NEXT
&& tpi
->proto
== htons(ETH_P_TEB
)) {
386 /* ipgre tunnels in collect metadata mode should receive
387 * also ETH_P_TEB traffic.
389 itn
= net_generic(net
, ipgre_net_id
);
390 res
= __ipgre_rcv(skb
, tpi
, itn
, hdr_len
, true);
395 static int gre_rcv(struct sk_buff
*skb
)
397 struct tnl_ptk_info tpi
;
398 bool csum_err
= false;
401 #ifdef CONFIG_NET_IPGRE_BROADCAST
402 if (ipv4_is_multicast(ip_hdr(skb
)->daddr
)) {
403 /* Looped back packet, drop it! */
404 if (rt_is_output_route(skb_rtable(skb
)))
409 hdr_len
= gre_parse_header(skb
, &tpi
, &csum_err
, htons(ETH_P_IP
), 0);
413 if (unlikely(tpi
.proto
== htons(ETH_P_ERSPAN
) ||
414 tpi
.proto
== htons(ETH_P_ERSPAN2
))) {
415 if (erspan_rcv(skb
, &tpi
, hdr_len
) == PACKET_RCVD
)
420 if (ipgre_rcv(skb
, &tpi
, hdr_len
) == PACKET_RCVD
)
424 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_PORT_UNREACH
, 0);
430 static void __gre_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
431 const struct iphdr
*tnl_params
,
434 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
436 if (tunnel
->parms
.o_flags
& TUNNEL_SEQ
)
439 /* Push GRE header. */
440 gre_build_header(skb
, tunnel
->tun_hlen
,
441 tunnel
->parms
.o_flags
, proto
, tunnel
->parms
.o_key
,
442 htonl(tunnel
->o_seqno
));
444 ip_tunnel_xmit(skb
, dev
, tnl_params
, tnl_params
->protocol
);
447 static int gre_handle_offloads(struct sk_buff
*skb
, bool csum
)
449 return iptunnel_handle_offloads(skb
, csum
? SKB_GSO_GRE_CSUM
: SKB_GSO_GRE
);
452 static void gre_fb_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
455 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
456 struct ip_tunnel_info
*tun_info
;
457 const struct ip_tunnel_key
*key
;
461 tun_info
= skb_tunnel_info(skb
);
462 if (unlikely(!tun_info
|| !(tun_info
->mode
& IP_TUNNEL_INFO_TX
) ||
463 ip_tunnel_info_af(tun_info
) != AF_INET
))
466 key
= &tun_info
->key
;
467 tunnel_hlen
= gre_calc_hlen(key
->tun_flags
);
469 if (skb_cow_head(skb
, dev
->needed_headroom
))
472 /* Push Tunnel header. */
473 if (gre_handle_offloads(skb
, !!(tun_info
->key
.tun_flags
& TUNNEL_CSUM
)))
476 flags
= tun_info
->key
.tun_flags
&
477 (TUNNEL_CSUM
| TUNNEL_KEY
| TUNNEL_SEQ
);
478 gre_build_header(skb
, tunnel_hlen
, flags
, proto
,
479 tunnel_id_to_key32(tun_info
->key
.tun_id
),
480 (flags
& TUNNEL_SEQ
) ? htonl(tunnel
->o_seqno
++) : 0);
482 ip_md_tunnel_xmit(skb
, dev
, IPPROTO_GRE
, tunnel_hlen
);
488 dev
->stats
.tx_dropped
++;
491 static void erspan_fb_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
493 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
494 struct ip_tunnel_info
*tun_info
;
495 const struct ip_tunnel_key
*key
;
496 struct erspan_metadata
*md
;
497 bool truncate
= false;
504 tun_info
= skb_tunnel_info(skb
);
505 if (unlikely(!tun_info
|| !(tun_info
->mode
& IP_TUNNEL_INFO_TX
) ||
506 ip_tunnel_info_af(tun_info
) != AF_INET
))
509 key
= &tun_info
->key
;
510 if (!(tun_info
->key
.tun_flags
& TUNNEL_ERSPAN_OPT
))
512 md
= ip_tunnel_info_opts(tun_info
);
516 /* ERSPAN has fixed 8 byte GRE header */
517 version
= md
->version
;
518 tunnel_hlen
= 8 + erspan_hdr_len(version
);
520 if (skb_cow_head(skb
, dev
->needed_headroom
))
523 if (gre_handle_offloads(skb
, false))
526 if (skb
->len
> dev
->mtu
+ dev
->hard_header_len
) {
527 pskb_trim(skb
, dev
->mtu
+ dev
->hard_header_len
);
531 nhoff
= skb_network_header(skb
) - skb_mac_header(skb
);
532 if (skb
->protocol
== htons(ETH_P_IP
) &&
533 (ntohs(ip_hdr(skb
)->tot_len
) > skb
->len
- nhoff
))
536 thoff
= skb_transport_header(skb
) - skb_mac_header(skb
);
537 if (skb
->protocol
== htons(ETH_P_IPV6
) &&
538 (ntohs(ipv6_hdr(skb
)->payload_len
) > skb
->len
- thoff
))
542 erspan_build_header(skb
, ntohl(tunnel_id_to_key32(key
->tun_id
)),
543 ntohl(md
->u
.index
), truncate
, true);
544 proto
= htons(ETH_P_ERSPAN
);
545 } else if (version
== 2) {
546 erspan_build_header_v2(skb
,
547 ntohl(tunnel_id_to_key32(key
->tun_id
)),
549 get_hwid(&md
->u
.md2
),
551 proto
= htons(ETH_P_ERSPAN2
);
556 gre_build_header(skb
, 8, TUNNEL_SEQ
,
557 proto
, 0, htonl(tunnel
->o_seqno
++));
559 ip_md_tunnel_xmit(skb
, dev
, IPPROTO_GRE
, tunnel_hlen
);
565 dev
->stats
.tx_dropped
++;
568 static int gre_fill_metadata_dst(struct net_device
*dev
, struct sk_buff
*skb
)
570 struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
571 const struct ip_tunnel_key
*key
;
575 if (ip_tunnel_info_af(info
) != AF_INET
)
579 ip_tunnel_init_flow(&fl4
, IPPROTO_GRE
, key
->u
.ipv4
.dst
, key
->u
.ipv4
.src
,
580 tunnel_id_to_key32(key
->tun_id
), key
->tos
, 0,
581 skb
->mark
, skb_get_hash(skb
));
582 rt
= ip_route_output_key(dev_net(dev
), &fl4
);
587 info
->key
.u
.ipv4
.src
= fl4
.saddr
;
591 static netdev_tx_t
ipgre_xmit(struct sk_buff
*skb
,
592 struct net_device
*dev
)
594 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
595 const struct iphdr
*tnl_params
;
597 if (!pskb_inet_may_pull(skb
))
600 if (tunnel
->collect_md
) {
601 gre_fb_xmit(skb
, dev
, skb
->protocol
);
605 if (dev
->header_ops
) {
606 /* Need space for new headers */
607 if (skb_cow_head(skb
, dev
->needed_headroom
-
608 (tunnel
->hlen
+ sizeof(struct iphdr
))))
611 tnl_params
= (const struct iphdr
*)skb
->data
;
613 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
616 skb_pull(skb
, tunnel
->hlen
+ sizeof(struct iphdr
));
617 skb_reset_mac_header(skb
);
619 if (skb_cow_head(skb
, dev
->needed_headroom
))
622 tnl_params
= &tunnel
->parms
.iph
;
625 if (gre_handle_offloads(skb
, !!(tunnel
->parms
.o_flags
& TUNNEL_CSUM
)))
628 __gre_xmit(skb
, dev
, tnl_params
, skb
->protocol
);
633 dev
->stats
.tx_dropped
++;
637 static netdev_tx_t
erspan_xmit(struct sk_buff
*skb
,
638 struct net_device
*dev
)
640 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
641 bool truncate
= false;
644 if (!pskb_inet_may_pull(skb
))
647 if (tunnel
->collect_md
) {
648 erspan_fb_xmit(skb
, dev
);
652 if (gre_handle_offloads(skb
, false))
655 if (skb_cow_head(skb
, dev
->needed_headroom
))
658 if (skb
->len
> dev
->mtu
+ dev
->hard_header_len
) {
659 pskb_trim(skb
, dev
->mtu
+ dev
->hard_header_len
);
663 /* Push ERSPAN header */
664 if (tunnel
->erspan_ver
== 1) {
665 erspan_build_header(skb
, ntohl(tunnel
->parms
.o_key
),
668 proto
= htons(ETH_P_ERSPAN
);
669 } else if (tunnel
->erspan_ver
== 2) {
670 erspan_build_header_v2(skb
, ntohl(tunnel
->parms
.o_key
),
671 tunnel
->dir
, tunnel
->hwid
,
673 proto
= htons(ETH_P_ERSPAN2
);
678 tunnel
->parms
.o_flags
&= ~TUNNEL_KEY
;
679 __gre_xmit(skb
, dev
, &tunnel
->parms
.iph
, proto
);
684 dev
->stats
.tx_dropped
++;
688 static netdev_tx_t
gre_tap_xmit(struct sk_buff
*skb
,
689 struct net_device
*dev
)
691 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
693 if (!pskb_inet_may_pull(skb
))
696 if (tunnel
->collect_md
) {
697 gre_fb_xmit(skb
, dev
, htons(ETH_P_TEB
));
701 if (gre_handle_offloads(skb
, !!(tunnel
->parms
.o_flags
& TUNNEL_CSUM
)))
704 if (skb_cow_head(skb
, dev
->needed_headroom
))
707 __gre_xmit(skb
, dev
, &tunnel
->parms
.iph
, htons(ETH_P_TEB
));
712 dev
->stats
.tx_dropped
++;
716 static void ipgre_link_update(struct net_device
*dev
, bool set_mtu
)
718 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
721 len
= tunnel
->tun_hlen
;
722 tunnel
->tun_hlen
= gre_calc_hlen(tunnel
->parms
.o_flags
);
723 len
= tunnel
->tun_hlen
- len
;
724 tunnel
->hlen
= tunnel
->hlen
+ len
;
726 dev
->needed_headroom
= dev
->needed_headroom
+ len
;
728 dev
->mtu
= max_t(int, dev
->mtu
- len
, 68);
730 if (!(tunnel
->parms
.o_flags
& TUNNEL_SEQ
)) {
731 if (!(tunnel
->parms
.o_flags
& TUNNEL_CSUM
) ||
732 tunnel
->encap
.type
== TUNNEL_ENCAP_NONE
) {
733 dev
->features
|= NETIF_F_GSO_SOFTWARE
;
734 dev
->hw_features
|= NETIF_F_GSO_SOFTWARE
;
736 dev
->features
&= ~NETIF_F_GSO_SOFTWARE
;
737 dev
->hw_features
&= ~NETIF_F_GSO_SOFTWARE
;
739 dev
->features
|= NETIF_F_LLTX
;
741 dev
->hw_features
&= ~NETIF_F_GSO_SOFTWARE
;
742 dev
->features
&= ~(NETIF_F_LLTX
| NETIF_F_GSO_SOFTWARE
);
746 static int ipgre_tunnel_ioctl(struct net_device
*dev
,
747 struct ifreq
*ifr
, int cmd
)
749 struct ip_tunnel_parm p
;
752 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
755 if (cmd
== SIOCADDTUNNEL
|| cmd
== SIOCCHGTUNNEL
) {
756 if (p
.iph
.version
!= 4 || p
.iph
.protocol
!= IPPROTO_GRE
||
757 p
.iph
.ihl
!= 5 || (p
.iph
.frag_off
& htons(~IP_DF
)) ||
758 ((p
.i_flags
| p
.o_flags
) & (GRE_VERSION
| GRE_ROUTING
)))
762 p
.i_flags
= gre_flags_to_tnl_flags(p
.i_flags
);
763 p
.o_flags
= gre_flags_to_tnl_flags(p
.o_flags
);
765 err
= ip_tunnel_ioctl(dev
, &p
, cmd
);
769 if (cmd
== SIOCCHGTUNNEL
) {
770 struct ip_tunnel
*t
= netdev_priv(dev
);
772 t
->parms
.i_flags
= p
.i_flags
;
773 t
->parms
.o_flags
= p
.o_flags
;
775 if (strcmp(dev
->rtnl_link_ops
->kind
, "erspan"))
776 ipgre_link_update(dev
, true);
779 p
.i_flags
= gre_tnl_flags_to_gre_flags(p
.i_flags
);
780 p
.o_flags
= gre_tnl_flags_to_gre_flags(p
.o_flags
);
782 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
788 /* Nice toy. Unfortunately, useless in real life :-)
789 It allows to construct virtual multiprotocol broadcast "LAN"
790 over the Internet, provided multicast routing is tuned.
793 I have no idea was this bicycle invented before me,
794 so that I had to set ARPHRD_IPGRE to a random value.
795 I have an impression, that Cisco could make something similar,
796 but this feature is apparently missing in IOS<=11.2(8).
798 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
799 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
801 ping -t 255 224.66.66.66
803 If nobody answers, mbone does not work.
805 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
806 ip addr add 10.66.66.<somewhat>/24 dev Universe
808 ifconfig Universe add fe80::<Your_real_addr>/10
809 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
812 ftp fec0:6666:6666::193.233.7.65
815 static int ipgre_header(struct sk_buff
*skb
, struct net_device
*dev
,
817 const void *daddr
, const void *saddr
, unsigned int len
)
819 struct ip_tunnel
*t
= netdev_priv(dev
);
821 struct gre_base_hdr
*greh
;
823 iph
= skb_push(skb
, t
->hlen
+ sizeof(*iph
));
824 greh
= (struct gre_base_hdr
*)(iph
+1);
825 greh
->flags
= gre_tnl_flags_to_gre_flags(t
->parms
.o_flags
);
826 greh
->protocol
= htons(type
);
828 memcpy(iph
, &t
->parms
.iph
, sizeof(struct iphdr
));
830 /* Set the source hardware address. */
832 memcpy(&iph
->saddr
, saddr
, 4);
834 memcpy(&iph
->daddr
, daddr
, 4);
836 return t
->hlen
+ sizeof(*iph
);
838 return -(t
->hlen
+ sizeof(*iph
));
841 static int ipgre_header_parse(const struct sk_buff
*skb
, unsigned char *haddr
)
843 const struct iphdr
*iph
= (const struct iphdr
*) skb_mac_header(skb
);
844 memcpy(haddr
, &iph
->saddr
, 4);
848 static const struct header_ops ipgre_header_ops
= {
849 .create
= ipgre_header
,
850 .parse
= ipgre_header_parse
,
853 #ifdef CONFIG_NET_IPGRE_BROADCAST
854 static int ipgre_open(struct net_device
*dev
)
856 struct ip_tunnel
*t
= netdev_priv(dev
);
858 if (ipv4_is_multicast(t
->parms
.iph
.daddr
)) {
862 rt
= ip_route_output_gre(t
->net
, &fl4
,
866 RT_TOS(t
->parms
.iph
.tos
),
869 return -EADDRNOTAVAIL
;
872 if (!__in_dev_get_rtnl(dev
))
873 return -EADDRNOTAVAIL
;
874 t
->mlink
= dev
->ifindex
;
875 ip_mc_inc_group(__in_dev_get_rtnl(dev
), t
->parms
.iph
.daddr
);
880 static int ipgre_close(struct net_device
*dev
)
882 struct ip_tunnel
*t
= netdev_priv(dev
);
884 if (ipv4_is_multicast(t
->parms
.iph
.daddr
) && t
->mlink
) {
885 struct in_device
*in_dev
;
886 in_dev
= inetdev_by_index(t
->net
, t
->mlink
);
888 ip_mc_dec_group(in_dev
, t
->parms
.iph
.daddr
);
894 static const struct net_device_ops ipgre_netdev_ops
= {
895 .ndo_init
= ipgre_tunnel_init
,
896 .ndo_uninit
= ip_tunnel_uninit
,
897 #ifdef CONFIG_NET_IPGRE_BROADCAST
898 .ndo_open
= ipgre_open
,
899 .ndo_stop
= ipgre_close
,
901 .ndo_start_xmit
= ipgre_xmit
,
902 .ndo_do_ioctl
= ipgre_tunnel_ioctl
,
903 .ndo_change_mtu
= ip_tunnel_change_mtu
,
904 .ndo_get_stats64
= ip_tunnel_get_stats64
,
905 .ndo_get_iflink
= ip_tunnel_get_iflink
,
908 #define GRE_FEATURES (NETIF_F_SG | \
913 static void ipgre_tunnel_setup(struct net_device
*dev
)
915 dev
->netdev_ops
= &ipgre_netdev_ops
;
916 dev
->type
= ARPHRD_IPGRE
;
917 ip_tunnel_setup(dev
, ipgre_net_id
);
920 static void __gre_tunnel_init(struct net_device
*dev
)
922 struct ip_tunnel
*tunnel
;
924 tunnel
= netdev_priv(dev
);
925 tunnel
->tun_hlen
= gre_calc_hlen(tunnel
->parms
.o_flags
);
926 tunnel
->parms
.iph
.protocol
= IPPROTO_GRE
;
928 tunnel
->hlen
= tunnel
->tun_hlen
+ tunnel
->encap_hlen
;
930 dev
->features
|= GRE_FEATURES
;
931 dev
->hw_features
|= GRE_FEATURES
;
933 if (!(tunnel
->parms
.o_flags
& TUNNEL_SEQ
)) {
934 /* TCP offload with GRE SEQ is not supported, nor
935 * can we support 2 levels of outer headers requiring
938 if (!(tunnel
->parms
.o_flags
& TUNNEL_CSUM
) ||
939 (tunnel
->encap
.type
== TUNNEL_ENCAP_NONE
)) {
940 dev
->features
|= NETIF_F_GSO_SOFTWARE
;
941 dev
->hw_features
|= NETIF_F_GSO_SOFTWARE
;
944 /* Can use a lockless transmit, unless we generate
947 dev
->features
|= NETIF_F_LLTX
;
951 static int ipgre_tunnel_init(struct net_device
*dev
)
953 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
954 struct iphdr
*iph
= &tunnel
->parms
.iph
;
956 __gre_tunnel_init(dev
);
958 memcpy(dev
->dev_addr
, &iph
->saddr
, 4);
959 memcpy(dev
->broadcast
, &iph
->daddr
, 4);
961 dev
->flags
= IFF_NOARP
;
965 if (iph
->daddr
&& !tunnel
->collect_md
) {
966 #ifdef CONFIG_NET_IPGRE_BROADCAST
967 if (ipv4_is_multicast(iph
->daddr
)) {
970 dev
->flags
= IFF_BROADCAST
;
971 dev
->header_ops
= &ipgre_header_ops
;
974 } else if (!tunnel
->collect_md
) {
975 dev
->header_ops
= &ipgre_header_ops
;
978 return ip_tunnel_init(dev
);
981 static const struct gre_protocol ipgre_protocol
= {
983 .err_handler
= gre_err
,
986 static int __net_init
ipgre_init_net(struct net
*net
)
988 return ip_tunnel_init_net(net
, ipgre_net_id
, &ipgre_link_ops
, NULL
);
991 static void __net_exit
ipgre_exit_batch_net(struct list_head
*list_net
)
993 ip_tunnel_delete_nets(list_net
, ipgre_net_id
, &ipgre_link_ops
);
996 static struct pernet_operations ipgre_net_ops
= {
997 .init
= ipgre_init_net
,
998 .exit_batch
= ipgre_exit_batch_net
,
1000 .size
= sizeof(struct ip_tunnel_net
),
1003 static int ipgre_tunnel_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
1004 struct netlink_ext_ack
*extack
)
1012 if (data
[IFLA_GRE_IFLAGS
])
1013 flags
|= nla_get_be16(data
[IFLA_GRE_IFLAGS
]);
1014 if (data
[IFLA_GRE_OFLAGS
])
1015 flags
|= nla_get_be16(data
[IFLA_GRE_OFLAGS
]);
1016 if (flags
& (GRE_VERSION
|GRE_ROUTING
))
1019 if (data
[IFLA_GRE_COLLECT_METADATA
] &&
1020 data
[IFLA_GRE_ENCAP_TYPE
] &&
1021 nla_get_u16(data
[IFLA_GRE_ENCAP_TYPE
]) != TUNNEL_ENCAP_NONE
)
1027 static int ipgre_tap_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
1028 struct netlink_ext_ack
*extack
)
1032 if (tb
[IFLA_ADDRESS
]) {
1033 if (nla_len(tb
[IFLA_ADDRESS
]) != ETH_ALEN
)
1035 if (!is_valid_ether_addr(nla_data(tb
[IFLA_ADDRESS
])))
1036 return -EADDRNOTAVAIL
;
1042 if (data
[IFLA_GRE_REMOTE
]) {
1043 memcpy(&daddr
, nla_data(data
[IFLA_GRE_REMOTE
]), 4);
1049 return ipgre_tunnel_validate(tb
, data
, extack
);
1052 static int erspan_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
1053 struct netlink_ext_ack
*extack
)
1061 ret
= ipgre_tap_validate(tb
, data
, extack
);
1065 /* ERSPAN should only have GRE sequence and key flag */
1066 if (data
[IFLA_GRE_OFLAGS
])
1067 flags
|= nla_get_be16(data
[IFLA_GRE_OFLAGS
]);
1068 if (data
[IFLA_GRE_IFLAGS
])
1069 flags
|= nla_get_be16(data
[IFLA_GRE_IFLAGS
]);
1070 if (!data
[IFLA_GRE_COLLECT_METADATA
] &&
1071 flags
!= (GRE_SEQ
| GRE_KEY
))
1074 /* ERSPAN Session ID only has 10-bit. Since we reuse
1075 * 32-bit key field as ID, check it's range.
1077 if (data
[IFLA_GRE_IKEY
] &&
1078 (ntohl(nla_get_be32(data
[IFLA_GRE_IKEY
])) & ~ID_MASK
))
1081 if (data
[IFLA_GRE_OKEY
] &&
1082 (ntohl(nla_get_be32(data
[IFLA_GRE_OKEY
])) & ~ID_MASK
))
1088 static int ipgre_netlink_parms(struct net_device
*dev
,
1089 struct nlattr
*data
[],
1090 struct nlattr
*tb
[],
1091 struct ip_tunnel_parm
*parms
,
1094 struct ip_tunnel
*t
= netdev_priv(dev
);
1096 memset(parms
, 0, sizeof(*parms
));
1098 parms
->iph
.protocol
= IPPROTO_GRE
;
1103 if (data
[IFLA_GRE_LINK
])
1104 parms
->link
= nla_get_u32(data
[IFLA_GRE_LINK
]);
1106 if (data
[IFLA_GRE_IFLAGS
])
1107 parms
->i_flags
= gre_flags_to_tnl_flags(nla_get_be16(data
[IFLA_GRE_IFLAGS
]));
1109 if (data
[IFLA_GRE_OFLAGS
])
1110 parms
->o_flags
= gre_flags_to_tnl_flags(nla_get_be16(data
[IFLA_GRE_OFLAGS
]));
1112 if (data
[IFLA_GRE_IKEY
])
1113 parms
->i_key
= nla_get_be32(data
[IFLA_GRE_IKEY
]);
1115 if (data
[IFLA_GRE_OKEY
])
1116 parms
->o_key
= nla_get_be32(data
[IFLA_GRE_OKEY
]);
1118 if (data
[IFLA_GRE_LOCAL
])
1119 parms
->iph
.saddr
= nla_get_in_addr(data
[IFLA_GRE_LOCAL
]);
1121 if (data
[IFLA_GRE_REMOTE
])
1122 parms
->iph
.daddr
= nla_get_in_addr(data
[IFLA_GRE_REMOTE
]);
1124 if (data
[IFLA_GRE_TTL
])
1125 parms
->iph
.ttl
= nla_get_u8(data
[IFLA_GRE_TTL
]);
1127 if (data
[IFLA_GRE_TOS
])
1128 parms
->iph
.tos
= nla_get_u8(data
[IFLA_GRE_TOS
]);
1130 if (!data
[IFLA_GRE_PMTUDISC
] || nla_get_u8(data
[IFLA_GRE_PMTUDISC
])) {
1133 parms
->iph
.frag_off
= htons(IP_DF
);
1136 if (data
[IFLA_GRE_COLLECT_METADATA
]) {
1137 t
->collect_md
= true;
1138 if (dev
->type
== ARPHRD_IPGRE
)
1139 dev
->type
= ARPHRD_NONE
;
1142 if (data
[IFLA_GRE_IGNORE_DF
]) {
1143 if (nla_get_u8(data
[IFLA_GRE_IGNORE_DF
])
1144 && (parms
->iph
.frag_off
& htons(IP_DF
)))
1146 t
->ignore_df
= !!nla_get_u8(data
[IFLA_GRE_IGNORE_DF
]);
1149 if (data
[IFLA_GRE_FWMARK
])
1150 *fwmark
= nla_get_u32(data
[IFLA_GRE_FWMARK
]);
1152 if (data
[IFLA_GRE_ERSPAN_VER
]) {
1153 t
->erspan_ver
= nla_get_u8(data
[IFLA_GRE_ERSPAN_VER
]);
1155 if (t
->erspan_ver
!= 1 && t
->erspan_ver
!= 2)
1159 if (t
->erspan_ver
== 1) {
1160 if (data
[IFLA_GRE_ERSPAN_INDEX
]) {
1161 t
->index
= nla_get_u32(data
[IFLA_GRE_ERSPAN_INDEX
]);
1162 if (t
->index
& ~INDEX_MASK
)
1165 } else if (t
->erspan_ver
== 2) {
1166 if (data
[IFLA_GRE_ERSPAN_DIR
]) {
1167 t
->dir
= nla_get_u8(data
[IFLA_GRE_ERSPAN_DIR
]);
1168 if (t
->dir
& ~(DIR_MASK
>> DIR_OFFSET
))
1171 if (data
[IFLA_GRE_ERSPAN_HWID
]) {
1172 t
->hwid
= nla_get_u16(data
[IFLA_GRE_ERSPAN_HWID
]);
1173 if (t
->hwid
& ~(HWID_MASK
>> HWID_OFFSET
))
1181 /* This function returns true when ENCAP attributes are present in the nl msg */
1182 static bool ipgre_netlink_encap_parms(struct nlattr
*data
[],
1183 struct ip_tunnel_encap
*ipencap
)
1187 memset(ipencap
, 0, sizeof(*ipencap
));
1192 if (data
[IFLA_GRE_ENCAP_TYPE
]) {
1194 ipencap
->type
= nla_get_u16(data
[IFLA_GRE_ENCAP_TYPE
]);
1197 if (data
[IFLA_GRE_ENCAP_FLAGS
]) {
1199 ipencap
->flags
= nla_get_u16(data
[IFLA_GRE_ENCAP_FLAGS
]);
1202 if (data
[IFLA_GRE_ENCAP_SPORT
]) {
1204 ipencap
->sport
= nla_get_be16(data
[IFLA_GRE_ENCAP_SPORT
]);
1207 if (data
[IFLA_GRE_ENCAP_DPORT
]) {
1209 ipencap
->dport
= nla_get_be16(data
[IFLA_GRE_ENCAP_DPORT
]);
1215 static int gre_tap_init(struct net_device
*dev
)
1217 __gre_tunnel_init(dev
);
1218 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
1219 netif_keep_dst(dev
);
1221 return ip_tunnel_init(dev
);
1224 static const struct net_device_ops gre_tap_netdev_ops
= {
1225 .ndo_init
= gre_tap_init
,
1226 .ndo_uninit
= ip_tunnel_uninit
,
1227 .ndo_start_xmit
= gre_tap_xmit
,
1228 .ndo_set_mac_address
= eth_mac_addr
,
1229 .ndo_validate_addr
= eth_validate_addr
,
1230 .ndo_change_mtu
= ip_tunnel_change_mtu
,
1231 .ndo_get_stats64
= ip_tunnel_get_stats64
,
1232 .ndo_get_iflink
= ip_tunnel_get_iflink
,
1233 .ndo_fill_metadata_dst
= gre_fill_metadata_dst
,
1236 static int erspan_tunnel_init(struct net_device
*dev
)
1238 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1240 tunnel
->tun_hlen
= 8;
1241 tunnel
->parms
.iph
.protocol
= IPPROTO_GRE
;
1242 tunnel
->hlen
= tunnel
->tun_hlen
+ tunnel
->encap_hlen
+
1243 erspan_hdr_len(tunnel
->erspan_ver
);
1245 dev
->features
|= GRE_FEATURES
;
1246 dev
->hw_features
|= GRE_FEATURES
;
1247 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
1248 netif_keep_dst(dev
);
1250 return ip_tunnel_init(dev
);
1253 static const struct net_device_ops erspan_netdev_ops
= {
1254 .ndo_init
= erspan_tunnel_init
,
1255 .ndo_uninit
= ip_tunnel_uninit
,
1256 .ndo_start_xmit
= erspan_xmit
,
1257 .ndo_set_mac_address
= eth_mac_addr
,
1258 .ndo_validate_addr
= eth_validate_addr
,
1259 .ndo_change_mtu
= ip_tunnel_change_mtu
,
1260 .ndo_get_stats64
= ip_tunnel_get_stats64
,
1261 .ndo_get_iflink
= ip_tunnel_get_iflink
,
1262 .ndo_fill_metadata_dst
= gre_fill_metadata_dst
,
1265 static void ipgre_tap_setup(struct net_device
*dev
)
1269 dev
->netdev_ops
= &gre_tap_netdev_ops
;
1270 dev
->priv_flags
&= ~IFF_TX_SKB_SHARING
;
1271 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
1272 ip_tunnel_setup(dev
, gre_tap_net_id
);
1275 static int ipgre_newlink(struct net
*src_net
, struct net_device
*dev
,
1276 struct nlattr
*tb
[], struct nlattr
*data
[],
1277 struct netlink_ext_ack
*extack
)
1279 struct ip_tunnel_parm p
;
1280 struct ip_tunnel_encap ipencap
;
1284 if (ipgre_netlink_encap_parms(data
, &ipencap
)) {
1285 struct ip_tunnel
*t
= netdev_priv(dev
);
1286 err
= ip_tunnel_encap_setup(t
, &ipencap
);
1292 err
= ipgre_netlink_parms(dev
, data
, tb
, &p
, &fwmark
);
1295 return ip_tunnel_newlink(dev
, tb
, &p
, fwmark
);
1298 static int ipgre_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
1299 struct nlattr
*data
[],
1300 struct netlink_ext_ack
*extack
)
1302 struct ip_tunnel
*t
= netdev_priv(dev
);
1303 struct ip_tunnel_encap ipencap
;
1304 __u32 fwmark
= t
->fwmark
;
1305 struct ip_tunnel_parm p
;
1308 if (ipgre_netlink_encap_parms(data
, &ipencap
)) {
1309 err
= ip_tunnel_encap_setup(t
, &ipencap
);
1315 err
= ipgre_netlink_parms(dev
, data
, tb
, &p
, &fwmark
);
1319 err
= ip_tunnel_changelink(dev
, tb
, &p
, fwmark
);
1323 t
->parms
.i_flags
= p
.i_flags
;
1324 t
->parms
.o_flags
= p
.o_flags
;
1326 if (strcmp(dev
->rtnl_link_ops
->kind
, "erspan"))
1327 ipgre_link_update(dev
, !tb
[IFLA_MTU
]);
1332 static size_t ipgre_get_size(const struct net_device
*dev
)
1337 /* IFLA_GRE_IFLAGS */
1339 /* IFLA_GRE_OFLAGS */
1345 /* IFLA_GRE_LOCAL */
1347 /* IFLA_GRE_REMOTE */
1353 /* IFLA_GRE_PMTUDISC */
1355 /* IFLA_GRE_ENCAP_TYPE */
1357 /* IFLA_GRE_ENCAP_FLAGS */
1359 /* IFLA_GRE_ENCAP_SPORT */
1361 /* IFLA_GRE_ENCAP_DPORT */
1363 /* IFLA_GRE_COLLECT_METADATA */
1365 /* IFLA_GRE_IGNORE_DF */
1367 /* IFLA_GRE_FWMARK */
1369 /* IFLA_GRE_ERSPAN_INDEX */
1371 /* IFLA_GRE_ERSPAN_VER */
1373 /* IFLA_GRE_ERSPAN_DIR */
1375 /* IFLA_GRE_ERSPAN_HWID */
1380 static int ipgre_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
1382 struct ip_tunnel
*t
= netdev_priv(dev
);
1383 struct ip_tunnel_parm
*p
= &t
->parms
;
1384 __be16 o_flags
= p
->o_flags
;
1386 if (t
->erspan_ver
== 1 || t
->erspan_ver
== 2) {
1388 o_flags
|= TUNNEL_KEY
;
1390 if (nla_put_u8(skb
, IFLA_GRE_ERSPAN_VER
, t
->erspan_ver
))
1391 goto nla_put_failure
;
1393 if (t
->erspan_ver
== 1) {
1394 if (nla_put_u32(skb
, IFLA_GRE_ERSPAN_INDEX
, t
->index
))
1395 goto nla_put_failure
;
1397 if (nla_put_u8(skb
, IFLA_GRE_ERSPAN_DIR
, t
->dir
))
1398 goto nla_put_failure
;
1399 if (nla_put_u16(skb
, IFLA_GRE_ERSPAN_HWID
, t
->hwid
))
1400 goto nla_put_failure
;
1404 if (nla_put_u32(skb
, IFLA_GRE_LINK
, p
->link
) ||
1405 nla_put_be16(skb
, IFLA_GRE_IFLAGS
,
1406 gre_tnl_flags_to_gre_flags(p
->i_flags
)) ||
1407 nla_put_be16(skb
, IFLA_GRE_OFLAGS
,
1408 gre_tnl_flags_to_gre_flags(o_flags
)) ||
1409 nla_put_be32(skb
, IFLA_GRE_IKEY
, p
->i_key
) ||
1410 nla_put_be32(skb
, IFLA_GRE_OKEY
, p
->o_key
) ||
1411 nla_put_in_addr(skb
, IFLA_GRE_LOCAL
, p
->iph
.saddr
) ||
1412 nla_put_in_addr(skb
, IFLA_GRE_REMOTE
, p
->iph
.daddr
) ||
1413 nla_put_u8(skb
, IFLA_GRE_TTL
, p
->iph
.ttl
) ||
1414 nla_put_u8(skb
, IFLA_GRE_TOS
, p
->iph
.tos
) ||
1415 nla_put_u8(skb
, IFLA_GRE_PMTUDISC
,
1416 !!(p
->iph
.frag_off
& htons(IP_DF
))) ||
1417 nla_put_u32(skb
, IFLA_GRE_FWMARK
, t
->fwmark
))
1418 goto nla_put_failure
;
1420 if (nla_put_u16(skb
, IFLA_GRE_ENCAP_TYPE
,
1422 nla_put_be16(skb
, IFLA_GRE_ENCAP_SPORT
,
1424 nla_put_be16(skb
, IFLA_GRE_ENCAP_DPORT
,
1426 nla_put_u16(skb
, IFLA_GRE_ENCAP_FLAGS
,
1428 goto nla_put_failure
;
1430 if (nla_put_u8(skb
, IFLA_GRE_IGNORE_DF
, t
->ignore_df
))
1431 goto nla_put_failure
;
1433 if (t
->collect_md
) {
1434 if (nla_put_flag(skb
, IFLA_GRE_COLLECT_METADATA
))
1435 goto nla_put_failure
;
1444 static void erspan_setup(struct net_device
*dev
)
1446 struct ip_tunnel
*t
= netdev_priv(dev
);
1449 dev
->netdev_ops
= &erspan_netdev_ops
;
1450 dev
->priv_flags
&= ~IFF_TX_SKB_SHARING
;
1451 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
1452 ip_tunnel_setup(dev
, erspan_net_id
);
1456 static const struct nla_policy ipgre_policy
[IFLA_GRE_MAX
+ 1] = {
1457 [IFLA_GRE_LINK
] = { .type
= NLA_U32
},
1458 [IFLA_GRE_IFLAGS
] = { .type
= NLA_U16
},
1459 [IFLA_GRE_OFLAGS
] = { .type
= NLA_U16
},
1460 [IFLA_GRE_IKEY
] = { .type
= NLA_U32
},
1461 [IFLA_GRE_OKEY
] = { .type
= NLA_U32
},
1462 [IFLA_GRE_LOCAL
] = { .len
= FIELD_SIZEOF(struct iphdr
, saddr
) },
1463 [IFLA_GRE_REMOTE
] = { .len
= FIELD_SIZEOF(struct iphdr
, daddr
) },
1464 [IFLA_GRE_TTL
] = { .type
= NLA_U8
},
1465 [IFLA_GRE_TOS
] = { .type
= NLA_U8
},
1466 [IFLA_GRE_PMTUDISC
] = { .type
= NLA_U8
},
1467 [IFLA_GRE_ENCAP_TYPE
] = { .type
= NLA_U16
},
1468 [IFLA_GRE_ENCAP_FLAGS
] = { .type
= NLA_U16
},
1469 [IFLA_GRE_ENCAP_SPORT
] = { .type
= NLA_U16
},
1470 [IFLA_GRE_ENCAP_DPORT
] = { .type
= NLA_U16
},
1471 [IFLA_GRE_COLLECT_METADATA
] = { .type
= NLA_FLAG
},
1472 [IFLA_GRE_IGNORE_DF
] = { .type
= NLA_U8
},
1473 [IFLA_GRE_FWMARK
] = { .type
= NLA_U32
},
1474 [IFLA_GRE_ERSPAN_INDEX
] = { .type
= NLA_U32
},
1475 [IFLA_GRE_ERSPAN_VER
] = { .type
= NLA_U8
},
1476 [IFLA_GRE_ERSPAN_DIR
] = { .type
= NLA_U8
},
1477 [IFLA_GRE_ERSPAN_HWID
] = { .type
= NLA_U16
},
1480 static struct rtnl_link_ops ipgre_link_ops __read_mostly
= {
1482 .maxtype
= IFLA_GRE_MAX
,
1483 .policy
= ipgre_policy
,
1484 .priv_size
= sizeof(struct ip_tunnel
),
1485 .setup
= ipgre_tunnel_setup
,
1486 .validate
= ipgre_tunnel_validate
,
1487 .newlink
= ipgre_newlink
,
1488 .changelink
= ipgre_changelink
,
1489 .dellink
= ip_tunnel_dellink
,
1490 .get_size
= ipgre_get_size
,
1491 .fill_info
= ipgre_fill_info
,
1492 .get_link_net
= ip_tunnel_get_link_net
,
1495 static struct rtnl_link_ops ipgre_tap_ops __read_mostly
= {
1497 .maxtype
= IFLA_GRE_MAX
,
1498 .policy
= ipgre_policy
,
1499 .priv_size
= sizeof(struct ip_tunnel
),
1500 .setup
= ipgre_tap_setup
,
1501 .validate
= ipgre_tap_validate
,
1502 .newlink
= ipgre_newlink
,
1503 .changelink
= ipgre_changelink
,
1504 .dellink
= ip_tunnel_dellink
,
1505 .get_size
= ipgre_get_size
,
1506 .fill_info
= ipgre_fill_info
,
1507 .get_link_net
= ip_tunnel_get_link_net
,
1510 static struct rtnl_link_ops erspan_link_ops __read_mostly
= {
1512 .maxtype
= IFLA_GRE_MAX
,
1513 .policy
= ipgre_policy
,
1514 .priv_size
= sizeof(struct ip_tunnel
),
1515 .setup
= erspan_setup
,
1516 .validate
= erspan_validate
,
1517 .newlink
= ipgre_newlink
,
1518 .changelink
= ipgre_changelink
,
1519 .dellink
= ip_tunnel_dellink
,
1520 .get_size
= ipgre_get_size
,
1521 .fill_info
= ipgre_fill_info
,
1522 .get_link_net
= ip_tunnel_get_link_net
,
1525 struct net_device
*gretap_fb_dev_create(struct net
*net
, const char *name
,
1526 u8 name_assign_type
)
1528 struct nlattr
*tb
[IFLA_MAX
+ 1];
1529 struct net_device
*dev
;
1530 LIST_HEAD(list_kill
);
1531 struct ip_tunnel
*t
;
1534 memset(&tb
, 0, sizeof(tb
));
1536 dev
= rtnl_create_link(net
, name
, name_assign_type
,
1537 &ipgre_tap_ops
, tb
, NULL
);
1541 /* Configure flow based GRE device. */
1542 t
= netdev_priv(dev
);
1543 t
->collect_md
= true;
1545 err
= ipgre_newlink(net
, dev
, tb
, NULL
, NULL
);
1548 return ERR_PTR(err
);
1551 /* openvswitch users expect packet sizes to be unrestricted,
1552 * so set the largest MTU we can.
1554 err
= __ip_tunnel_change_mtu(dev
, IP_MAX_MTU
, false);
1558 err
= rtnl_configure_link(dev
, NULL
);
1564 ip_tunnel_dellink(dev
, &list_kill
);
1565 unregister_netdevice_many(&list_kill
);
1566 return ERR_PTR(err
);
1568 EXPORT_SYMBOL_GPL(gretap_fb_dev_create
);
1570 static int __net_init
ipgre_tap_init_net(struct net
*net
)
1572 return ip_tunnel_init_net(net
, gre_tap_net_id
, &ipgre_tap_ops
, "gretap0");
1575 static void __net_exit
ipgre_tap_exit_batch_net(struct list_head
*list_net
)
1577 ip_tunnel_delete_nets(list_net
, gre_tap_net_id
, &ipgre_tap_ops
);
1580 static struct pernet_operations ipgre_tap_net_ops
= {
1581 .init
= ipgre_tap_init_net
,
1582 .exit_batch
= ipgre_tap_exit_batch_net
,
1583 .id
= &gre_tap_net_id
,
1584 .size
= sizeof(struct ip_tunnel_net
),
1587 static int __net_init
erspan_init_net(struct net
*net
)
1589 return ip_tunnel_init_net(net
, erspan_net_id
,
1590 &erspan_link_ops
, "erspan0");
1593 static void __net_exit
erspan_exit_batch_net(struct list_head
*net_list
)
1595 ip_tunnel_delete_nets(net_list
, erspan_net_id
, &erspan_link_ops
);
1598 static struct pernet_operations erspan_net_ops
= {
1599 .init
= erspan_init_net
,
1600 .exit_batch
= erspan_exit_batch_net
,
1601 .id
= &erspan_net_id
,
1602 .size
= sizeof(struct ip_tunnel_net
),
1605 static int __init
ipgre_init(void)
1609 pr_info("GRE over IPv4 tunneling driver\n");
1611 err
= register_pernet_device(&ipgre_net_ops
);
1615 err
= register_pernet_device(&ipgre_tap_net_ops
);
1617 goto pnet_tap_failed
;
1619 err
= register_pernet_device(&erspan_net_ops
);
1621 goto pnet_erspan_failed
;
1623 err
= gre_add_protocol(&ipgre_protocol
, GREPROTO_CISCO
);
1625 pr_info("%s: can't add protocol\n", __func__
);
1626 goto add_proto_failed
;
1629 err
= rtnl_link_register(&ipgre_link_ops
);
1631 goto rtnl_link_failed
;
1633 err
= rtnl_link_register(&ipgre_tap_ops
);
1635 goto tap_ops_failed
;
1637 err
= rtnl_link_register(&erspan_link_ops
);
1639 goto erspan_link_failed
;
1644 rtnl_link_unregister(&ipgre_tap_ops
);
1646 rtnl_link_unregister(&ipgre_link_ops
);
1648 gre_del_protocol(&ipgre_protocol
, GREPROTO_CISCO
);
1650 unregister_pernet_device(&erspan_net_ops
);
1652 unregister_pernet_device(&ipgre_tap_net_ops
);
1654 unregister_pernet_device(&ipgre_net_ops
);
1658 static void __exit
ipgre_fini(void)
1660 rtnl_link_unregister(&ipgre_tap_ops
);
1661 rtnl_link_unregister(&ipgre_link_ops
);
1662 rtnl_link_unregister(&erspan_link_ops
);
1663 gre_del_protocol(&ipgre_protocol
, GREPROTO_CISCO
);
1664 unregister_pernet_device(&ipgre_tap_net_ops
);
1665 unregister_pernet_device(&ipgre_net_ops
);
1666 unregister_pernet_device(&erspan_net_ops
);
1669 module_init(ipgre_init
);
1670 module_exit(ipgre_fini
);
1671 MODULE_LICENSE("GPL");
1672 MODULE_ALIAS_RTNL_LINK("gre");
1673 MODULE_ALIAS_RTNL_LINK("gretap");
1674 MODULE_ALIAS_RTNL_LINK("erspan");
1675 MODULE_ALIAS_NETDEV("gre0");
1676 MODULE_ALIAS_NETDEV("gretap0");
1677 MODULE_ALIAS_NETDEV("erspan0");