1 // SPDX-License-Identifier: GPL-2.0-only
3 * GENEVE: Generic Network Virtualization Encapsulation
5 * Copyright (c) 2015 Red Hat, Inc.
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 #include <linux/ethtool.h>
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/etherdevice.h>
14 #include <linux/hash.h>
15 #include <net/ipv6_stubs.h>
16 #include <net/dst_metadata.h>
17 #include <net/gro_cells.h>
18 #include <net/rtnetlink.h>
19 #include <net/geneve.h>
21 #include <net/protocol.h>
23 #define GENEVE_NETDEV_VER "0.6"
25 #define GENEVE_N_VID (1u << 24)
26 #define GENEVE_VID_MASK (GENEVE_N_VID - 1)
28 #define VNI_HASH_BITS 10
29 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
31 static bool log_ecn_error
= true;
32 module_param(log_ecn_error
, bool, 0644);
33 MODULE_PARM_DESC(log_ecn_error
, "Log packets received with corrupted ECN");
36 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
37 #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
38 #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
40 /* per-network namespace private data for this module */
42 struct list_head geneve_list
;
43 struct list_head sock_list
;
46 static unsigned int geneve_net_id
;
48 struct geneve_dev_node
{
49 struct hlist_node hlist
;
50 struct geneve_dev
*geneve
;
53 struct geneve_config
{
54 struct ip_tunnel_info info
;
56 bool use_udp6_rx_checksums
;
58 enum ifla_geneve_df df
;
59 bool inner_proto_inherit
;
62 /* Pseudo network device */
64 struct geneve_dev_node hlist4
; /* vni hash table for IPv4 socket */
65 #if IS_ENABLED(CONFIG_IPV6)
66 struct geneve_dev_node hlist6
; /* vni hash table for IPv6 socket */
68 struct net
*net
; /* netns for packet i/o */
69 struct net_device
*dev
; /* netdev for geneve tunnel */
70 struct geneve_sock __rcu
*sock4
; /* IPv4 socket used for geneve tunnel */
71 #if IS_ENABLED(CONFIG_IPV6)
72 struct geneve_sock __rcu
*sock6
; /* IPv6 socket used for geneve tunnel */
74 struct list_head next
; /* geneve's per namespace list */
75 struct gro_cells gro_cells
;
76 struct geneve_config cfg
;
81 struct list_head list
;
85 struct hlist_head vni_list
[VNI_HASH_SIZE
];
88 static inline __u32
geneve_net_vni_hash(u8 vni
[3])
92 vnid
= (vni
[0] << 16) | (vni
[1] << 8) | vni
[2];
93 return hash_32(vnid
, VNI_HASH_BITS
);
96 static __be64
vni_to_tunnel_id(const __u8
*vni
)
99 return (vni
[0] << 16) | (vni
[1] << 8) | vni
[2];
101 return (__force __be64
)(((__force u64
)vni
[0] << 40) |
102 ((__force u64
)vni
[1] << 48) |
103 ((__force u64
)vni
[2] << 56));
107 /* Convert 64 bit tunnel ID to 24 bit VNI. */
108 static void tunnel_id_to_vni(__be64 tun_id
, __u8
*vni
)
111 vni
[0] = (__force __u8
)(tun_id
>> 16);
112 vni
[1] = (__force __u8
)(tun_id
>> 8);
113 vni
[2] = (__force __u8
)tun_id
;
115 vni
[0] = (__force __u8
)((__force u64
)tun_id
>> 40);
116 vni
[1] = (__force __u8
)((__force u64
)tun_id
>> 48);
117 vni
[2] = (__force __u8
)((__force u64
)tun_id
>> 56);
121 static bool eq_tun_id_and_vni(u8
*tun_id
, u8
*vni
)
123 return !memcmp(vni
, &tun_id
[5], 3);
126 static sa_family_t
geneve_get_sk_family(struct geneve_sock
*gs
)
128 return gs
->sock
->sk
->sk_family
;
131 static struct geneve_dev
*geneve_lookup(struct geneve_sock
*gs
,
132 __be32 addr
, u8 vni
[])
134 struct hlist_head
*vni_list_head
;
135 struct geneve_dev_node
*node
;
138 /* Find the device for this VNI */
139 hash
= geneve_net_vni_hash(vni
);
140 vni_list_head
= &gs
->vni_list
[hash
];
141 hlist_for_each_entry_rcu(node
, vni_list_head
, hlist
) {
142 if (eq_tun_id_and_vni((u8
*)&node
->geneve
->cfg
.info
.key
.tun_id
, vni
) &&
143 addr
== node
->geneve
->cfg
.info
.key
.u
.ipv4
.dst
)
149 #if IS_ENABLED(CONFIG_IPV6)
150 static struct geneve_dev
*geneve6_lookup(struct geneve_sock
*gs
,
151 struct in6_addr addr6
, u8 vni
[])
153 struct hlist_head
*vni_list_head
;
154 struct geneve_dev_node
*node
;
157 /* Find the device for this VNI */
158 hash
= geneve_net_vni_hash(vni
);
159 vni_list_head
= &gs
->vni_list
[hash
];
160 hlist_for_each_entry_rcu(node
, vni_list_head
, hlist
) {
161 if (eq_tun_id_and_vni((u8
*)&node
->geneve
->cfg
.info
.key
.tun_id
, vni
) &&
162 ipv6_addr_equal(&addr6
, &node
->geneve
->cfg
.info
.key
.u
.ipv6
.dst
))
169 static inline struct genevehdr
*geneve_hdr(const struct sk_buff
*skb
)
171 return (struct genevehdr
*)(udp_hdr(skb
) + 1);
174 static struct geneve_dev
*geneve_lookup_skb(struct geneve_sock
*gs
,
177 static u8 zero_vni
[3];
180 if (geneve_get_sk_family(gs
) == AF_INET
) {
184 iph
= ip_hdr(skb
); /* outer IP header... */
186 if (gs
->collect_md
) {
190 vni
= geneve_hdr(skb
)->vni
;
194 return geneve_lookup(gs
, addr
, vni
);
195 #if IS_ENABLED(CONFIG_IPV6)
196 } else if (geneve_get_sk_family(gs
) == AF_INET6
) {
197 static struct in6_addr zero_addr6
;
198 struct ipv6hdr
*ip6h
;
199 struct in6_addr addr6
;
201 ip6h
= ipv6_hdr(skb
); /* outer IPv6 header... */
203 if (gs
->collect_md
) {
207 vni
= geneve_hdr(skb
)->vni
;
211 return geneve6_lookup(gs
, addr6
, vni
);
217 /* geneve receive/decap routine */
218 static void geneve_rx(struct geneve_dev
*geneve
, struct geneve_sock
*gs
,
221 struct genevehdr
*gnvh
= geneve_hdr(skb
);
222 struct metadata_dst
*tun_dst
= NULL
;
227 if (ip_tunnel_collect_metadata() || gs
->collect_md
) {
228 IP_TUNNEL_DECLARE_FLAGS(flags
) = { };
230 __set_bit(IP_TUNNEL_KEY_BIT
, flags
);
231 __assign_bit(IP_TUNNEL_OAM_BIT
, flags
, gnvh
->oam
);
232 __assign_bit(IP_TUNNEL_CRIT_OPT_BIT
, flags
, gnvh
->critical
);
234 tun_dst
= udp_tun_rx_dst(skb
, geneve_get_sk_family(gs
), flags
,
235 vni_to_tunnel_id(gnvh
->vni
),
238 DEV_STATS_INC(geneve
->dev
, rx_dropped
);
241 /* Update tunnel dst according to Geneve options. */
242 ip_tunnel_flags_zero(flags
);
243 __set_bit(IP_TUNNEL_GENEVE_OPT_BIT
, flags
);
244 ip_tunnel_info_opts_set(&tun_dst
->u
.tun_info
,
245 gnvh
->options
, gnvh
->opt_len
* 4,
248 /* Drop packets w/ critical options,
249 * since we don't support any...
251 if (gnvh
->critical
) {
252 DEV_STATS_INC(geneve
->dev
, rx_frame_errors
);
253 DEV_STATS_INC(geneve
->dev
, rx_errors
);
259 skb_dst_set(skb
, &tun_dst
->dst
);
261 if (gnvh
->proto_type
== htons(ETH_P_TEB
)) {
262 skb_reset_mac_header(skb
);
263 skb
->protocol
= eth_type_trans(skb
, geneve
->dev
);
264 skb_postpull_rcsum(skb
, eth_hdr(skb
), ETH_HLEN
);
266 /* Ignore packet loops (and multicast echo) */
267 if (ether_addr_equal(eth_hdr(skb
)->h_source
,
268 geneve
->dev
->dev_addr
)) {
269 DEV_STATS_INC(geneve
->dev
, rx_errors
);
273 skb_reset_mac_header(skb
);
274 skb
->dev
= geneve
->dev
;
275 skb
->pkt_type
= PACKET_HOST
;
278 /* Save offset of outer header relative to skb->head,
279 * because we are going to reset the network header to the inner header
280 * and might change skb->head.
282 nh
= skb_network_header(skb
) - skb
->head
;
284 skb_reset_network_header(skb
);
286 if (!pskb_inet_may_pull(skb
)) {
287 DEV_STATS_INC(geneve
->dev
, rx_length_errors
);
288 DEV_STATS_INC(geneve
->dev
, rx_errors
);
292 /* Get the outer header. */
293 oiph
= skb
->head
+ nh
;
295 if (geneve_get_sk_family(gs
) == AF_INET
)
296 err
= IP_ECN_decapsulate(oiph
, skb
);
297 #if IS_ENABLED(CONFIG_IPV6)
299 err
= IP6_ECN_decapsulate(oiph
, skb
);
304 if (geneve_get_sk_family(gs
) == AF_INET
)
305 net_info_ratelimited("non-ECT from %pI4 "
307 &((struct iphdr
*)oiph
)->saddr
,
308 ((struct iphdr
*)oiph
)->tos
);
309 #if IS_ENABLED(CONFIG_IPV6)
311 net_info_ratelimited("non-ECT from %pI6\n",
312 &((struct ipv6hdr
*)oiph
)->saddr
);
316 DEV_STATS_INC(geneve
->dev
, rx_frame_errors
);
317 DEV_STATS_INC(geneve
->dev
, rx_errors
);
323 err
= gro_cells_receive(&geneve
->gro_cells
, skb
);
324 if (likely(err
== NET_RX_SUCCESS
))
325 dev_sw_netstats_rx_add(geneve
->dev
, len
);
329 /* Consume bad packet */
333 /* Setup stats when device is created */
334 static int geneve_init(struct net_device
*dev
)
336 struct geneve_dev
*geneve
= netdev_priv(dev
);
339 err
= gro_cells_init(&geneve
->gro_cells
, dev
);
343 err
= dst_cache_init(&geneve
->cfg
.info
.dst_cache
, GFP_KERNEL
);
345 gro_cells_destroy(&geneve
->gro_cells
);
348 netdev_lockdep_set_classes(dev
);
352 static void geneve_uninit(struct net_device
*dev
)
354 struct geneve_dev
*geneve
= netdev_priv(dev
);
356 dst_cache_destroy(&geneve
->cfg
.info
.dst_cache
);
357 gro_cells_destroy(&geneve
->gro_cells
);
360 /* Callback from net/ipv4/udp.c to receive packets */
361 static int geneve_udp_encap_recv(struct sock
*sk
, struct sk_buff
*skb
)
363 struct genevehdr
*geneveh
;
364 struct geneve_dev
*geneve
;
365 struct geneve_sock
*gs
;
369 /* Need UDP and Geneve header to be present */
370 if (unlikely(!pskb_may_pull(skb
, GENEVE_BASE_HLEN
)))
373 /* Return packets with reserved bits set */
374 geneveh
= geneve_hdr(skb
);
375 if (unlikely(geneveh
->ver
!= GENEVE_VER
))
378 gs
= rcu_dereference_sk_user_data(sk
);
382 geneve
= geneve_lookup_skb(gs
, skb
);
386 inner_proto
= geneveh
->proto_type
;
388 if (unlikely((!geneve
->cfg
.inner_proto_inherit
&&
389 inner_proto
!= htons(ETH_P_TEB
)))) {
390 DEV_STATS_INC(geneve
->dev
, rx_dropped
);
394 opts_len
= geneveh
->opt_len
* 4;
395 if (iptunnel_pull_header(skb
, GENEVE_BASE_HLEN
+ opts_len
, inner_proto
,
396 !net_eq(geneve
->net
, dev_net(geneve
->dev
)))) {
397 DEV_STATS_INC(geneve
->dev
, rx_dropped
);
401 geneve_rx(geneve
, gs
, skb
);
405 /* Consume bad packet */
410 /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
411 static int geneve_udp_encap_err_lookup(struct sock
*sk
, struct sk_buff
*skb
)
413 struct genevehdr
*geneveh
;
414 struct geneve_sock
*gs
;
415 u8 zero_vni
[3] = { 0 };
418 if (!pskb_may_pull(skb
, skb_transport_offset(skb
) + GENEVE_BASE_HLEN
))
421 geneveh
= geneve_hdr(skb
);
422 if (geneveh
->ver
!= GENEVE_VER
)
425 if (geneveh
->proto_type
!= htons(ETH_P_TEB
))
428 gs
= rcu_dereference_sk_user_data(sk
);
432 if (geneve_get_sk_family(gs
) == AF_INET
) {
433 struct iphdr
*iph
= ip_hdr(skb
);
436 if (!gs
->collect_md
) {
437 vni
= geneve_hdr(skb
)->vni
;
441 return geneve_lookup(gs
, addr4
, vni
) ? 0 : -ENOENT
;
444 #if IS_ENABLED(CONFIG_IPV6)
445 if (geneve_get_sk_family(gs
) == AF_INET6
) {
446 struct ipv6hdr
*ip6h
= ipv6_hdr(skb
);
447 struct in6_addr addr6
;
449 memset(&addr6
, 0, sizeof(struct in6_addr
));
451 if (!gs
->collect_md
) {
452 vni
= geneve_hdr(skb
)->vni
;
456 return geneve6_lookup(gs
, addr6
, vni
) ? 0 : -ENOENT
;
460 return -EPFNOSUPPORT
;
463 static struct socket
*geneve_create_sock(struct net
*net
, bool ipv6
,
464 __be16 port
, bool ipv6_rx_csum
)
467 struct udp_port_cfg udp_conf
;
470 memset(&udp_conf
, 0, sizeof(udp_conf
));
473 udp_conf
.family
= AF_INET6
;
474 udp_conf
.ipv6_v6only
= 1;
475 udp_conf
.use_udp6_rx_checksums
= ipv6_rx_csum
;
477 udp_conf
.family
= AF_INET
;
478 udp_conf
.local_ip
.s_addr
= htonl(INADDR_ANY
);
481 udp_conf
.local_udp_port
= port
;
483 /* Open UDP socket */
484 err
= udp_sock_create(net
, &udp_conf
, &sock
);
488 udp_allow_gso(sock
->sk
);
492 static int geneve_hlen(struct genevehdr
*gh
)
494 return sizeof(*gh
) + gh
->opt_len
* 4;
497 static struct sk_buff
*geneve_gro_receive(struct sock
*sk
,
498 struct list_head
*head
,
501 struct sk_buff
*pp
= NULL
;
503 struct genevehdr
*gh
, *gh2
;
504 unsigned int hlen
, gh_len
, off_gnv
;
505 const struct packet_offload
*ptype
;
509 off_gnv
= skb_gro_offset(skb
);
510 hlen
= off_gnv
+ sizeof(*gh
);
511 gh
= skb_gro_header(skb
, hlen
, off_gnv
);
515 if (gh
->ver
!= GENEVE_VER
|| gh
->oam
)
517 gh_len
= geneve_hlen(gh
);
519 hlen
= off_gnv
+ gh_len
;
520 if (!skb_gro_may_pull(skb
, hlen
)) {
521 gh
= skb_gro_header_slow(skb
, hlen
, off_gnv
);
526 list_for_each_entry(p
, head
, list
) {
527 if (!NAPI_GRO_CB(p
)->same_flow
)
530 gh2
= (struct genevehdr
*)(p
->data
+ off_gnv
);
531 if (gh
->opt_len
!= gh2
->opt_len
||
532 memcmp(gh
, gh2
, gh_len
)) {
533 NAPI_GRO_CB(p
)->same_flow
= 0;
538 skb_gro_pull(skb
, gh_len
);
539 skb_gro_postpull_rcsum(skb
, gh
, gh_len
);
540 type
= gh
->proto_type
;
541 if (likely(type
== htons(ETH_P_TEB
)))
542 return call_gro_receive(eth_gro_receive
, head
, skb
);
544 ptype
= gro_find_receive_by_type(type
);
548 pp
= call_gro_receive(ptype
->callbacks
.gro_receive
, head
, skb
);
552 skb_gro_flush_final(skb
, pp
, flush
);
557 static int geneve_gro_complete(struct sock
*sk
, struct sk_buff
*skb
,
560 struct genevehdr
*gh
;
561 struct packet_offload
*ptype
;
566 gh
= (struct genevehdr
*)(skb
->data
+ nhoff
);
567 gh_len
= geneve_hlen(gh
);
568 type
= gh
->proto_type
;
570 /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */
571 if (likely(type
== htons(ETH_P_TEB
)))
572 return eth_gro_complete(skb
, nhoff
+ gh_len
);
574 ptype
= gro_find_complete_by_type(type
);
576 err
= ptype
->callbacks
.gro_complete(skb
, nhoff
+ gh_len
);
578 skb_set_inner_mac_header(skb
, nhoff
+ gh_len
);
583 /* Create new listen socket if needed */
584 static struct geneve_sock
*geneve_socket_create(struct net
*net
, __be16 port
,
585 bool ipv6
, bool ipv6_rx_csum
)
587 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
588 struct geneve_sock
*gs
;
590 struct udp_tunnel_sock_cfg tunnel_cfg
;
593 gs
= kzalloc(sizeof(*gs
), GFP_KERNEL
);
595 return ERR_PTR(-ENOMEM
);
597 sock
= geneve_create_sock(net
, ipv6
, port
, ipv6_rx_csum
);
600 return ERR_CAST(sock
);
605 for (h
= 0; h
< VNI_HASH_SIZE
; ++h
)
606 INIT_HLIST_HEAD(&gs
->vni_list
[h
]);
608 /* Initialize the geneve udp offloads structure */
609 udp_tunnel_notify_add_rx_port(gs
->sock
, UDP_TUNNEL_TYPE_GENEVE
);
611 /* Mark socket as an encapsulation socket */
612 memset(&tunnel_cfg
, 0, sizeof(tunnel_cfg
));
613 tunnel_cfg
.sk_user_data
= gs
;
614 tunnel_cfg
.encap_type
= 1;
615 tunnel_cfg
.gro_receive
= geneve_gro_receive
;
616 tunnel_cfg
.gro_complete
= geneve_gro_complete
;
617 tunnel_cfg
.encap_rcv
= geneve_udp_encap_recv
;
618 tunnel_cfg
.encap_err_lookup
= geneve_udp_encap_err_lookup
;
619 tunnel_cfg
.encap_destroy
= NULL
;
620 setup_udp_tunnel_sock(net
, sock
, &tunnel_cfg
);
621 list_add(&gs
->list
, &gn
->sock_list
);
625 static void __geneve_sock_release(struct geneve_sock
*gs
)
627 if (!gs
|| --gs
->refcnt
)
631 udp_tunnel_notify_del_rx_port(gs
->sock
, UDP_TUNNEL_TYPE_GENEVE
);
632 udp_tunnel_sock_release(gs
->sock
);
636 static void geneve_sock_release(struct geneve_dev
*geneve
)
638 struct geneve_sock
*gs4
= rtnl_dereference(geneve
->sock4
);
639 #if IS_ENABLED(CONFIG_IPV6)
640 struct geneve_sock
*gs6
= rtnl_dereference(geneve
->sock6
);
642 rcu_assign_pointer(geneve
->sock6
, NULL
);
645 rcu_assign_pointer(geneve
->sock4
, NULL
);
648 __geneve_sock_release(gs4
);
649 #if IS_ENABLED(CONFIG_IPV6)
650 __geneve_sock_release(gs6
);
654 static struct geneve_sock
*geneve_find_sock(struct geneve_net
*gn
,
658 struct geneve_sock
*gs
;
660 list_for_each_entry(gs
, &gn
->sock_list
, list
) {
661 if (inet_sk(gs
->sock
->sk
)->inet_sport
== dst_port
&&
662 geneve_get_sk_family(gs
) == family
) {
669 static int geneve_sock_add(struct geneve_dev
*geneve
, bool ipv6
)
671 struct net
*net
= geneve
->net
;
672 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
673 struct geneve_dev_node
*node
;
674 struct geneve_sock
*gs
;
678 gs
= geneve_find_sock(gn
, ipv6
? AF_INET6
: AF_INET
, geneve
->cfg
.info
.key
.tp_dst
);
684 gs
= geneve_socket_create(net
, geneve
->cfg
.info
.key
.tp_dst
, ipv6
,
685 geneve
->cfg
.use_udp6_rx_checksums
);
690 gs
->collect_md
= geneve
->cfg
.collect_md
;
691 #if IS_ENABLED(CONFIG_IPV6)
693 rcu_assign_pointer(geneve
->sock6
, gs
);
694 node
= &geneve
->hlist6
;
698 rcu_assign_pointer(geneve
->sock4
, gs
);
699 node
= &geneve
->hlist4
;
701 node
->geneve
= geneve
;
703 tunnel_id_to_vni(geneve
->cfg
.info
.key
.tun_id
, vni
);
704 hash
= geneve_net_vni_hash(vni
);
705 hlist_add_head_rcu(&node
->hlist
, &gs
->vni_list
[hash
]);
709 static int geneve_open(struct net_device
*dev
)
711 struct geneve_dev
*geneve
= netdev_priv(dev
);
712 bool metadata
= geneve
->cfg
.collect_md
;
716 ipv6
= geneve
->cfg
.info
.mode
& IP_TUNNEL_INFO_IPV6
|| metadata
;
717 ipv4
= !ipv6
|| metadata
;
718 #if IS_ENABLED(CONFIG_IPV6)
720 ret
= geneve_sock_add(geneve
, true);
721 if (ret
< 0 && ret
!= -EAFNOSUPPORT
)
726 ret
= geneve_sock_add(geneve
, false);
728 geneve_sock_release(geneve
);
733 static int geneve_stop(struct net_device
*dev
)
735 struct geneve_dev
*geneve
= netdev_priv(dev
);
737 hlist_del_init_rcu(&geneve
->hlist4
.hlist
);
738 #if IS_ENABLED(CONFIG_IPV6)
739 hlist_del_init_rcu(&geneve
->hlist6
.hlist
);
741 geneve_sock_release(geneve
);
745 static void geneve_build_header(struct genevehdr
*geneveh
,
746 const struct ip_tunnel_info
*info
,
749 geneveh
->ver
= GENEVE_VER
;
750 geneveh
->opt_len
= info
->options_len
/ 4;
751 geneveh
->oam
= test_bit(IP_TUNNEL_OAM_BIT
, info
->key
.tun_flags
);
752 geneveh
->critical
= test_bit(IP_TUNNEL_CRIT_OPT_BIT
,
753 info
->key
.tun_flags
);
755 tunnel_id_to_vni(info
->key
.tun_id
, geneveh
->vni
);
756 geneveh
->proto_type
= inner_proto
;
759 if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT
, info
->key
.tun_flags
))
760 ip_tunnel_info_opts_get(geneveh
->options
, info
);
763 static int geneve_build_skb(struct dst_entry
*dst
, struct sk_buff
*skb
,
764 const struct ip_tunnel_info
*info
,
765 bool xnet
, int ip_hdr_len
,
766 bool inner_proto_inherit
)
768 bool udp_sum
= test_bit(IP_TUNNEL_CSUM_BIT
, info
->key
.tun_flags
);
769 struct genevehdr
*gnvh
;
774 skb_reset_mac_header(skb
);
775 skb_scrub_packet(skb
, xnet
);
777 min_headroom
= LL_RESERVED_SPACE(dst
->dev
) + dst
->header_len
+
778 GENEVE_BASE_HLEN
+ info
->options_len
+ ip_hdr_len
;
779 err
= skb_cow_head(skb
, min_headroom
);
783 err
= udp_tunnel_handle_offloads(skb
, udp_sum
);
787 gnvh
= __skb_push(skb
, sizeof(*gnvh
) + info
->options_len
);
788 inner_proto
= inner_proto_inherit
? skb
->protocol
: htons(ETH_P_TEB
);
789 geneve_build_header(gnvh
, info
, inner_proto
);
790 skb_set_inner_protocol(skb
, inner_proto
);
798 static u8
geneve_get_dsfield(struct sk_buff
*skb
, struct net_device
*dev
,
799 const struct ip_tunnel_info
*info
,
802 struct geneve_dev
*geneve
= netdev_priv(dev
);
805 dsfield
= info
->key
.tos
;
806 if (dsfield
== 1 && !geneve
->cfg
.collect_md
) {
807 dsfield
= ip_tunnel_get_dsfield(ip_hdr(skb
), skb
);
814 static int geneve_xmit_skb(struct sk_buff
*skb
, struct net_device
*dev
,
815 struct geneve_dev
*geneve
,
816 const struct ip_tunnel_info
*info
)
818 bool inner_proto_inherit
= geneve
->cfg
.inner_proto_inherit
;
819 bool xnet
= !net_eq(geneve
->net
, dev_net(geneve
->dev
));
820 struct geneve_sock
*gs4
= rcu_dereference(geneve
->sock4
);
821 const struct ip_tunnel_key
*key
= &info
->key
;
830 if (skb_vlan_inet_prepare(skb
, inner_proto_inherit
))
836 use_cache
= ip_tunnel_dst_cache_usable(skb
, info
);
837 tos
= geneve_get_dsfield(skb
, dev
, info
, &use_cache
);
838 sport
= udp_flow_src_port(geneve
->net
, skb
, 1, USHRT_MAX
, true);
840 rt
= udp_tunnel_dst_lookup(skb
, dev
, geneve
->net
, 0, &saddr
,
842 sport
, geneve
->cfg
.info
.key
.tp_dst
, tos
,
844 (struct dst_cache
*)&info
->dst_cache
: NULL
);
848 err
= skb_tunnel_check_pmtu(skb
, &rt
->dst
,
849 GENEVE_IPV4_HLEN
+ info
->options_len
,
850 netif_is_any_bridge_port(dev
));
852 dst_release(&rt
->dst
);
855 struct ip_tunnel_info
*info
;
857 info
= skb_tunnel_info(skb
);
859 struct ip_tunnel_info
*unclone
;
861 unclone
= skb_tunnel_info_unclone(skb
);
862 if (unlikely(!unclone
)) {
863 dst_release(&rt
->dst
);
867 unclone
->key
.u
.ipv4
.dst
= saddr
;
868 unclone
->key
.u
.ipv4
.src
= info
->key
.u
.ipv4
.dst
;
871 if (!pskb_may_pull(skb
, ETH_HLEN
)) {
872 dst_release(&rt
->dst
);
876 skb
->protocol
= eth_type_trans(skb
, geneve
->dev
);
878 dst_release(&rt
->dst
);
882 tos
= ip_tunnel_ecn_encap(tos
, ip_hdr(skb
), skb
);
883 if (geneve
->cfg
.collect_md
) {
886 df
= test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT
, key
->tun_flags
) ?
889 if (geneve
->cfg
.ttl_inherit
)
890 ttl
= ip_tunnel_get_ttl(ip_hdr(skb
), skb
);
893 ttl
= ttl
? : ip4_dst_hoplimit(&rt
->dst
);
895 if (geneve
->cfg
.df
== GENEVE_DF_SET
) {
897 } else if (geneve
->cfg
.df
== GENEVE_DF_INHERIT
) {
898 struct ethhdr
*eth
= eth_hdr(skb
);
900 if (ntohs(eth
->h_proto
) == ETH_P_IPV6
) {
902 } else if (ntohs(eth
->h_proto
) == ETH_P_IP
) {
903 struct iphdr
*iph
= ip_hdr(skb
);
905 if (iph
->frag_off
& htons(IP_DF
))
911 err
= geneve_build_skb(&rt
->dst
, skb
, info
, xnet
, sizeof(struct iphdr
),
912 inner_proto_inherit
);
916 udp_tunnel_xmit_skb(rt
, gs4
->sock
->sk
, skb
, saddr
, info
->key
.u
.ipv4
.dst
,
917 tos
, ttl
, df
, sport
, geneve
->cfg
.info
.key
.tp_dst
,
918 !net_eq(geneve
->net
, dev_net(geneve
->dev
)),
919 !test_bit(IP_TUNNEL_CSUM_BIT
,
920 info
->key
.tun_flags
));
924 #if IS_ENABLED(CONFIG_IPV6)
925 static int geneve6_xmit_skb(struct sk_buff
*skb
, struct net_device
*dev
,
926 struct geneve_dev
*geneve
,
927 const struct ip_tunnel_info
*info
)
929 bool inner_proto_inherit
= geneve
->cfg
.inner_proto_inherit
;
930 bool xnet
= !net_eq(geneve
->net
, dev_net(geneve
->dev
));
931 struct geneve_sock
*gs6
= rcu_dereference(geneve
->sock6
);
932 const struct ip_tunnel_key
*key
= &info
->key
;
933 struct dst_entry
*dst
= NULL
;
934 struct in6_addr saddr
;
940 if (skb_vlan_inet_prepare(skb
, inner_proto_inherit
))
946 use_cache
= ip_tunnel_dst_cache_usable(skb
, info
);
947 prio
= geneve_get_dsfield(skb
, dev
, info
, &use_cache
);
948 sport
= udp_flow_src_port(geneve
->net
, skb
, 1, USHRT_MAX
, true);
950 dst
= udp_tunnel6_dst_lookup(skb
, dev
, geneve
->net
, gs6
->sock
, 0,
952 geneve
->cfg
.info
.key
.tp_dst
, prio
,
954 (struct dst_cache
*)&info
->dst_cache
: NULL
);
958 err
= skb_tunnel_check_pmtu(skb
, dst
,
959 GENEVE_IPV6_HLEN
+ info
->options_len
,
960 netif_is_any_bridge_port(dev
));
965 struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
968 struct ip_tunnel_info
*unclone
;
970 unclone
= skb_tunnel_info_unclone(skb
);
971 if (unlikely(!unclone
)) {
976 unclone
->key
.u
.ipv6
.dst
= saddr
;
977 unclone
->key
.u
.ipv6
.src
= info
->key
.u
.ipv6
.dst
;
980 if (!pskb_may_pull(skb
, ETH_HLEN
)) {
985 skb
->protocol
= eth_type_trans(skb
, geneve
->dev
);
991 prio
= ip_tunnel_ecn_encap(prio
, ip_hdr(skb
), skb
);
992 if (geneve
->cfg
.collect_md
) {
995 if (geneve
->cfg
.ttl_inherit
)
996 ttl
= ip_tunnel_get_ttl(ip_hdr(skb
), skb
);
999 ttl
= ttl
? : ip6_dst_hoplimit(dst
);
1001 err
= geneve_build_skb(dst
, skb
, info
, xnet
, sizeof(struct ipv6hdr
),
1002 inner_proto_inherit
);
1006 udp_tunnel6_xmit_skb(dst
, gs6
->sock
->sk
, skb
, dev
,
1007 &saddr
, &key
->u
.ipv6
.dst
, prio
, ttl
,
1008 info
->key
.label
, sport
, geneve
->cfg
.info
.key
.tp_dst
,
1009 !test_bit(IP_TUNNEL_CSUM_BIT
,
1010 info
->key
.tun_flags
));
1015 static netdev_tx_t
geneve_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
1017 struct geneve_dev
*geneve
= netdev_priv(dev
);
1018 struct ip_tunnel_info
*info
= NULL
;
1021 if (geneve
->cfg
.collect_md
) {
1022 info
= skb_tunnel_info(skb
);
1023 if (unlikely(!info
|| !(info
->mode
& IP_TUNNEL_INFO_TX
))) {
1024 netdev_dbg(dev
, "no tunnel metadata\n");
1026 DEV_STATS_INC(dev
, tx_dropped
);
1027 return NETDEV_TX_OK
;
1030 info
= &geneve
->cfg
.info
;
1034 #if IS_ENABLED(CONFIG_IPV6)
1035 if (info
->mode
& IP_TUNNEL_INFO_IPV6
)
1036 err
= geneve6_xmit_skb(skb
, dev
, geneve
, info
);
1039 err
= geneve_xmit_skb(skb
, dev
, geneve
, info
);
1043 return NETDEV_TX_OK
;
1045 if (err
!= -EMSGSIZE
)
1049 DEV_STATS_INC(dev
, collisions
);
1050 else if (err
== -ENETUNREACH
)
1051 DEV_STATS_INC(dev
, tx_carrier_errors
);
1053 DEV_STATS_INC(dev
, tx_errors
);
1054 return NETDEV_TX_OK
;
1057 static int geneve_change_mtu(struct net_device
*dev
, int new_mtu
)
1059 if (new_mtu
> dev
->max_mtu
)
1060 new_mtu
= dev
->max_mtu
;
1061 else if (new_mtu
< dev
->min_mtu
)
1062 new_mtu
= dev
->min_mtu
;
1064 WRITE_ONCE(dev
->mtu
, new_mtu
);
1068 static int geneve_fill_metadata_dst(struct net_device
*dev
, struct sk_buff
*skb
)
1070 struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
1071 struct geneve_dev
*geneve
= netdev_priv(dev
);
1074 if (ip_tunnel_info_af(info
) == AF_INET
) {
1076 struct geneve_sock
*gs4
= rcu_dereference(geneve
->sock4
);
1084 use_cache
= ip_tunnel_dst_cache_usable(skb
, info
);
1085 tos
= geneve_get_dsfield(skb
, dev
, info
, &use_cache
);
1086 sport
= udp_flow_src_port(geneve
->net
, skb
,
1087 1, USHRT_MAX
, true);
1089 rt
= udp_tunnel_dst_lookup(skb
, dev
, geneve
->net
, 0, &saddr
,
1091 sport
, geneve
->cfg
.info
.key
.tp_dst
,
1093 use_cache
? &info
->dst_cache
: NULL
);
1098 info
->key
.u
.ipv4
.src
= saddr
;
1099 #if IS_ENABLED(CONFIG_IPV6)
1100 } else if (ip_tunnel_info_af(info
) == AF_INET6
) {
1101 struct dst_entry
*dst
;
1102 struct geneve_sock
*gs6
= rcu_dereference(geneve
->sock6
);
1103 struct in6_addr saddr
;
1110 use_cache
= ip_tunnel_dst_cache_usable(skb
, info
);
1111 prio
= geneve_get_dsfield(skb
, dev
, info
, &use_cache
);
1112 sport
= udp_flow_src_port(geneve
->net
, skb
,
1113 1, USHRT_MAX
, true);
1115 dst
= udp_tunnel6_dst_lookup(skb
, dev
, geneve
->net
, gs6
->sock
, 0,
1116 &saddr
, &info
->key
, sport
,
1117 geneve
->cfg
.info
.key
.tp_dst
, prio
,
1118 use_cache
? &info
->dst_cache
: NULL
);
1120 return PTR_ERR(dst
);
1123 info
->key
.u
.ipv6
.src
= saddr
;
1129 info
->key
.tp_src
= sport
;
1130 info
->key
.tp_dst
= geneve
->cfg
.info
.key
.tp_dst
;
1134 static const struct net_device_ops geneve_netdev_ops
= {
1135 .ndo_init
= geneve_init
,
1136 .ndo_uninit
= geneve_uninit
,
1137 .ndo_open
= geneve_open
,
1138 .ndo_stop
= geneve_stop
,
1139 .ndo_start_xmit
= geneve_xmit
,
1140 .ndo_change_mtu
= geneve_change_mtu
,
1141 .ndo_validate_addr
= eth_validate_addr
,
1142 .ndo_set_mac_address
= eth_mac_addr
,
1143 .ndo_fill_metadata_dst
= geneve_fill_metadata_dst
,
1146 static void geneve_get_drvinfo(struct net_device
*dev
,
1147 struct ethtool_drvinfo
*drvinfo
)
1149 strscpy(drvinfo
->version
, GENEVE_NETDEV_VER
, sizeof(drvinfo
->version
));
1150 strscpy(drvinfo
->driver
, "geneve", sizeof(drvinfo
->driver
));
1153 static const struct ethtool_ops geneve_ethtool_ops
= {
1154 .get_drvinfo
= geneve_get_drvinfo
,
1155 .get_link
= ethtool_op_get_link
,
1158 /* Info for udev, that this is a virtual tunnel endpoint */
1159 static const struct device_type geneve_type
= {
1163 /* Calls the ndo_udp_tunnel_add of the caller in order to
1164 * supply the listening GENEVE udp ports. Callers are expected
1165 * to implement the ndo_udp_tunnel_add.
1167 static void geneve_offload_rx_ports(struct net_device
*dev
, bool push
)
1169 struct net
*net
= dev_net(dev
);
1170 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1171 struct geneve_sock
*gs
;
1174 list_for_each_entry_rcu(gs
, &gn
->sock_list
, list
) {
1176 udp_tunnel_push_rx_port(dev
, gs
->sock
,
1177 UDP_TUNNEL_TYPE_GENEVE
);
1179 udp_tunnel_drop_rx_port(dev
, gs
->sock
,
1180 UDP_TUNNEL_TYPE_GENEVE
);
1186 /* Initialize the device structure. */
1187 static void geneve_setup(struct net_device
*dev
)
1191 dev
->netdev_ops
= &geneve_netdev_ops
;
1192 dev
->ethtool_ops
= &geneve_ethtool_ops
;
1193 dev
->needs_free_netdev
= true;
1195 SET_NETDEV_DEVTYPE(dev
, &geneve_type
);
1197 dev
->features
|= NETIF_F_SG
| NETIF_F_HW_CSUM
| NETIF_F_FRAGLIST
;
1198 dev
->features
|= NETIF_F_RXCSUM
;
1199 dev
->features
|= NETIF_F_GSO_SOFTWARE
;
1201 dev
->hw_features
|= NETIF_F_SG
| NETIF_F_HW_CSUM
| NETIF_F_FRAGLIST
;
1202 dev
->hw_features
|= NETIF_F_RXCSUM
;
1203 dev
->hw_features
|= NETIF_F_GSO_SOFTWARE
;
1205 dev
->pcpu_stat_type
= NETDEV_PCPU_STAT_TSTATS
;
1206 /* MTU range: 68 - (something less than 65535) */
1207 dev
->min_mtu
= ETH_MIN_MTU
;
1208 /* The max_mtu calculation does not take account of GENEVE
1209 * options, to avoid excluding potentially valid
1210 * configurations. This will be further reduced by IPvX hdr size.
1212 dev
->max_mtu
= IP_MAX_MTU
- GENEVE_BASE_HLEN
- dev
->hard_header_len
;
1214 netif_keep_dst(dev
);
1215 dev
->priv_flags
&= ~IFF_TX_SKB_SHARING
;
1216 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
| IFF_NO_QUEUE
;
1218 eth_hw_addr_random(dev
);
1221 static const struct nla_policy geneve_policy
[IFLA_GENEVE_MAX
+ 1] = {
1222 [IFLA_GENEVE_UNSPEC
] = { .strict_start_type
= IFLA_GENEVE_INNER_PROTO_INHERIT
},
1223 [IFLA_GENEVE_ID
] = { .type
= NLA_U32
},
1224 [IFLA_GENEVE_REMOTE
] = { .len
= sizeof_field(struct iphdr
, daddr
) },
1225 [IFLA_GENEVE_REMOTE6
] = { .len
= sizeof(struct in6_addr
) },
1226 [IFLA_GENEVE_TTL
] = { .type
= NLA_U8
},
1227 [IFLA_GENEVE_TOS
] = { .type
= NLA_U8
},
1228 [IFLA_GENEVE_LABEL
] = { .type
= NLA_U32
},
1229 [IFLA_GENEVE_PORT
] = { .type
= NLA_U16
},
1230 [IFLA_GENEVE_COLLECT_METADATA
] = { .type
= NLA_FLAG
},
1231 [IFLA_GENEVE_UDP_CSUM
] = { .type
= NLA_U8
},
1232 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX
] = { .type
= NLA_U8
},
1233 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX
] = { .type
= NLA_U8
},
1234 [IFLA_GENEVE_TTL_INHERIT
] = { .type
= NLA_U8
},
1235 [IFLA_GENEVE_DF
] = { .type
= NLA_U8
},
1236 [IFLA_GENEVE_INNER_PROTO_INHERIT
] = { .type
= NLA_FLAG
},
1239 static int geneve_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
1240 struct netlink_ext_ack
*extack
)
1242 if (tb
[IFLA_ADDRESS
]) {
1243 if (nla_len(tb
[IFLA_ADDRESS
]) != ETH_ALEN
) {
1244 NL_SET_ERR_MSG_ATTR(extack
, tb
[IFLA_ADDRESS
],
1245 "Provided link layer address is not Ethernet");
1249 if (!is_valid_ether_addr(nla_data(tb
[IFLA_ADDRESS
]))) {
1250 NL_SET_ERR_MSG_ATTR(extack
, tb
[IFLA_ADDRESS
],
1251 "Provided Ethernet address is not unicast");
1252 return -EADDRNOTAVAIL
;
1257 NL_SET_ERR_MSG(extack
,
1258 "Not enough attributes provided to perform the operation");
1262 if (data
[IFLA_GENEVE_ID
]) {
1263 __u32 vni
= nla_get_u32(data
[IFLA_GENEVE_ID
]);
1265 if (vni
>= GENEVE_N_VID
) {
1266 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_ID
],
1267 "Geneve ID must be lower than 16777216");
1272 if (data
[IFLA_GENEVE_DF
]) {
1273 enum ifla_geneve_df df
= nla_get_u8(data
[IFLA_GENEVE_DF
]);
1275 if (df
< 0 || df
> GENEVE_DF_MAX
) {
1276 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_DF
],
1277 "Invalid DF attribute");
1285 static struct geneve_dev
*geneve_find_dev(struct geneve_net
*gn
,
1286 const struct ip_tunnel_info
*info
,
1287 bool *tun_on_same_port
,
1288 bool *tun_collect_md
)
1290 struct geneve_dev
*geneve
, *t
= NULL
;
1292 *tun_on_same_port
= false;
1293 *tun_collect_md
= false;
1294 list_for_each_entry(geneve
, &gn
->geneve_list
, next
) {
1295 if (info
->key
.tp_dst
== geneve
->cfg
.info
.key
.tp_dst
) {
1296 *tun_collect_md
= geneve
->cfg
.collect_md
;
1297 *tun_on_same_port
= true;
1299 if (info
->key
.tun_id
== geneve
->cfg
.info
.key
.tun_id
&&
1300 info
->key
.tp_dst
== geneve
->cfg
.info
.key
.tp_dst
&&
1301 !memcmp(&info
->key
.u
, &geneve
->cfg
.info
.key
.u
, sizeof(info
->key
.u
)))
1307 static bool is_tnl_info_zero(const struct ip_tunnel_info
*info
)
1309 return !(info
->key
.tun_id
|| info
->key
.tos
||
1310 !ip_tunnel_flags_empty(info
->key
.tun_flags
) ||
1311 info
->key
.ttl
|| info
->key
.label
|| info
->key
.tp_src
||
1312 memchr_inv(&info
->key
.u
, 0, sizeof(info
->key
.u
)));
1315 static bool geneve_dst_addr_equal(struct ip_tunnel_info
*a
,
1316 struct ip_tunnel_info
*b
)
1318 if (ip_tunnel_info_af(a
) == AF_INET
)
1319 return a
->key
.u
.ipv4
.dst
== b
->key
.u
.ipv4
.dst
;
1321 return ipv6_addr_equal(&a
->key
.u
.ipv6
.dst
, &b
->key
.u
.ipv6
.dst
);
1324 static int geneve_configure(struct net
*net
, struct net_device
*dev
,
1325 struct netlink_ext_ack
*extack
,
1326 const struct geneve_config
*cfg
)
1328 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1329 struct geneve_dev
*t
, *geneve
= netdev_priv(dev
);
1330 const struct ip_tunnel_info
*info
= &cfg
->info
;
1331 bool tun_collect_md
, tun_on_same_port
;
1334 if (cfg
->collect_md
&& !is_tnl_info_zero(info
)) {
1335 NL_SET_ERR_MSG(extack
,
1336 "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified");
1343 t
= geneve_find_dev(gn
, info
, &tun_on_same_port
, &tun_collect_md
);
1347 /* make enough headroom for basic scenario */
1348 encap_len
= GENEVE_BASE_HLEN
+ ETH_HLEN
;
1349 if (!cfg
->collect_md
&& ip_tunnel_info_af(info
) == AF_INET
) {
1350 encap_len
+= sizeof(struct iphdr
);
1351 dev
->max_mtu
-= sizeof(struct iphdr
);
1353 encap_len
+= sizeof(struct ipv6hdr
);
1354 dev
->max_mtu
-= sizeof(struct ipv6hdr
);
1356 dev
->needed_headroom
= encap_len
+ ETH_HLEN
;
1358 if (cfg
->collect_md
) {
1359 if (tun_on_same_port
) {
1360 NL_SET_ERR_MSG(extack
,
1361 "There can be only one externally controlled device on a destination port");
1365 if (tun_collect_md
) {
1366 NL_SET_ERR_MSG(extack
,
1367 "There already exists an externally controlled device on this destination port");
1372 dst_cache_reset(&geneve
->cfg
.info
.dst_cache
);
1373 memcpy(&geneve
->cfg
, cfg
, sizeof(*cfg
));
1375 if (geneve
->cfg
.inner_proto_inherit
) {
1376 dev
->header_ops
= NULL
;
1377 dev
->type
= ARPHRD_NONE
;
1378 dev
->hard_header_len
= 0;
1380 dev
->flags
= IFF_POINTOPOINT
| IFF_NOARP
;
1383 err
= register_netdevice(dev
);
1387 list_add(&geneve
->next
, &gn
->geneve_list
);
1391 static void init_tnl_info(struct ip_tunnel_info
*info
, __u16 dst_port
)
1393 memset(info
, 0, sizeof(*info
));
1394 info
->key
.tp_dst
= htons(dst_port
);
1397 static int geneve_nl2info(struct nlattr
*tb
[], struct nlattr
*data
[],
1398 struct netlink_ext_ack
*extack
,
1399 struct geneve_config
*cfg
, bool changelink
)
1401 struct ip_tunnel_info
*info
= &cfg
->info
;
1404 if (data
[IFLA_GENEVE_REMOTE
] && data
[IFLA_GENEVE_REMOTE6
]) {
1405 NL_SET_ERR_MSG(extack
,
1406 "Cannot specify both IPv4 and IPv6 Remote addresses");
1410 if (data
[IFLA_GENEVE_REMOTE
]) {
1411 if (changelink
&& (ip_tunnel_info_af(info
) == AF_INET6
)) {
1412 attrtype
= IFLA_GENEVE_REMOTE
;
1416 info
->key
.u
.ipv4
.dst
=
1417 nla_get_in_addr(data
[IFLA_GENEVE_REMOTE
]);
1419 if (ipv4_is_multicast(info
->key
.u
.ipv4
.dst
)) {
1420 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_REMOTE
],
1421 "Remote IPv4 address cannot be Multicast");
1426 if (data
[IFLA_GENEVE_REMOTE6
]) {
1427 #if IS_ENABLED(CONFIG_IPV6)
1428 if (changelink
&& (ip_tunnel_info_af(info
) == AF_INET
)) {
1429 attrtype
= IFLA_GENEVE_REMOTE6
;
1433 info
->mode
= IP_TUNNEL_INFO_IPV6
;
1434 info
->key
.u
.ipv6
.dst
=
1435 nla_get_in6_addr(data
[IFLA_GENEVE_REMOTE6
]);
1437 if (ipv6_addr_type(&info
->key
.u
.ipv6
.dst
) &
1438 IPV6_ADDR_LINKLOCAL
) {
1439 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_REMOTE6
],
1440 "Remote IPv6 address cannot be link-local");
1443 if (ipv6_addr_is_multicast(&info
->key
.u
.ipv6
.dst
)) {
1444 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_REMOTE6
],
1445 "Remote IPv6 address cannot be Multicast");
1448 __set_bit(IP_TUNNEL_CSUM_BIT
, info
->key
.tun_flags
);
1449 cfg
->use_udp6_rx_checksums
= true;
1451 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_REMOTE6
],
1452 "IPv6 support not enabled in the kernel");
1453 return -EPFNOSUPPORT
;
1457 if (data
[IFLA_GENEVE_ID
]) {
1462 vni
= nla_get_u32(data
[IFLA_GENEVE_ID
]);
1463 tvni
[0] = (vni
& 0x00ff0000) >> 16;
1464 tvni
[1] = (vni
& 0x0000ff00) >> 8;
1465 tvni
[2] = vni
& 0x000000ff;
1467 tunid
= vni_to_tunnel_id(tvni
);
1468 if (changelink
&& (tunid
!= info
->key
.tun_id
)) {
1469 attrtype
= IFLA_GENEVE_ID
;
1472 info
->key
.tun_id
= tunid
;
1475 if (data
[IFLA_GENEVE_TTL_INHERIT
]) {
1476 if (nla_get_u8(data
[IFLA_GENEVE_TTL_INHERIT
]))
1477 cfg
->ttl_inherit
= true;
1479 cfg
->ttl_inherit
= false;
1480 } else if (data
[IFLA_GENEVE_TTL
]) {
1481 info
->key
.ttl
= nla_get_u8(data
[IFLA_GENEVE_TTL
]);
1482 cfg
->ttl_inherit
= false;
1485 if (data
[IFLA_GENEVE_TOS
])
1486 info
->key
.tos
= nla_get_u8(data
[IFLA_GENEVE_TOS
]);
1488 if (data
[IFLA_GENEVE_DF
])
1489 cfg
->df
= nla_get_u8(data
[IFLA_GENEVE_DF
]);
1491 if (data
[IFLA_GENEVE_LABEL
]) {
1492 info
->key
.label
= nla_get_be32(data
[IFLA_GENEVE_LABEL
]) &
1493 IPV6_FLOWLABEL_MASK
;
1494 if (info
->key
.label
&& (!(info
->mode
& IP_TUNNEL_INFO_IPV6
))) {
1495 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_LABEL
],
1496 "Label attribute only applies for IPv6 Geneve devices");
1501 if (data
[IFLA_GENEVE_PORT
]) {
1503 attrtype
= IFLA_GENEVE_PORT
;
1506 info
->key
.tp_dst
= nla_get_be16(data
[IFLA_GENEVE_PORT
]);
1509 if (data
[IFLA_GENEVE_COLLECT_METADATA
]) {
1511 attrtype
= IFLA_GENEVE_COLLECT_METADATA
;
1514 cfg
->collect_md
= true;
1517 if (data
[IFLA_GENEVE_UDP_CSUM
]) {
1519 attrtype
= IFLA_GENEVE_UDP_CSUM
;
1522 if (nla_get_u8(data
[IFLA_GENEVE_UDP_CSUM
]))
1523 __set_bit(IP_TUNNEL_CSUM_BIT
, info
->key
.tun_flags
);
1526 if (data
[IFLA_GENEVE_UDP_ZERO_CSUM6_TX
]) {
1527 #if IS_ENABLED(CONFIG_IPV6)
1529 attrtype
= IFLA_GENEVE_UDP_ZERO_CSUM6_TX
;
1532 if (nla_get_u8(data
[IFLA_GENEVE_UDP_ZERO_CSUM6_TX
]))
1533 __clear_bit(IP_TUNNEL_CSUM_BIT
, info
->key
.tun_flags
);
1535 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_UDP_ZERO_CSUM6_TX
],
1536 "IPv6 support not enabled in the kernel");
1537 return -EPFNOSUPPORT
;
1541 if (data
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX
]) {
1542 #if IS_ENABLED(CONFIG_IPV6)
1544 attrtype
= IFLA_GENEVE_UDP_ZERO_CSUM6_RX
;
1547 if (nla_get_u8(data
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX
]))
1548 cfg
->use_udp6_rx_checksums
= false;
1550 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX
],
1551 "IPv6 support not enabled in the kernel");
1552 return -EPFNOSUPPORT
;
1556 if (data
[IFLA_GENEVE_INNER_PROTO_INHERIT
]) {
1558 attrtype
= IFLA_GENEVE_INNER_PROTO_INHERIT
;
1561 cfg
->inner_proto_inherit
= true;
1566 NL_SET_ERR_MSG_ATTR(extack
, data
[attrtype
],
1567 "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, and UDP checksum attributes are not supported");
1571 static void geneve_link_config(struct net_device
*dev
,
1572 struct ip_tunnel_info
*info
, struct nlattr
*tb
[])
1574 struct geneve_dev
*geneve
= netdev_priv(dev
);
1578 geneve_change_mtu(dev
, nla_get_u32(tb
[IFLA_MTU
]));
1582 switch (ip_tunnel_info_af(info
)) {
1584 struct flowi4 fl4
= { .daddr
= info
->key
.u
.ipv4
.dst
};
1585 struct rtable
*rt
= ip_route_output_key(geneve
->net
, &fl4
);
1587 if (!IS_ERR(rt
) && rt
->dst
.dev
) {
1588 ldev_mtu
= rt
->dst
.dev
->mtu
- GENEVE_IPV4_HLEN
;
1593 #if IS_ENABLED(CONFIG_IPV6)
1595 struct rt6_info
*rt
;
1597 if (!__in6_dev_get(dev
))
1600 rt
= rt6_lookup(geneve
->net
, &info
->key
.u
.ipv6
.dst
, NULL
, 0,
1603 if (rt
&& rt
->dst
.dev
)
1604 ldev_mtu
= rt
->dst
.dev
->mtu
- GENEVE_IPV6_HLEN
;
1614 geneve_change_mtu(dev
, ldev_mtu
- info
->options_len
);
1617 static int geneve_newlink(struct net
*net
, struct net_device
*dev
,
1618 struct nlattr
*tb
[], struct nlattr
*data
[],
1619 struct netlink_ext_ack
*extack
)
1621 struct geneve_config cfg
= {
1622 .df
= GENEVE_DF_UNSET
,
1623 .use_udp6_rx_checksums
= false,
1624 .ttl_inherit
= false,
1625 .collect_md
= false,
1629 init_tnl_info(&cfg
.info
, GENEVE_UDP_PORT
);
1630 err
= geneve_nl2info(tb
, data
, extack
, &cfg
, false);
1634 err
= geneve_configure(net
, dev
, extack
, &cfg
);
1638 geneve_link_config(dev
, &cfg
.info
, tb
);
1643 /* Quiesces the geneve device data path for both TX and RX.
1645 * On transmit geneve checks for non-NULL geneve_sock before it proceeds.
1646 * So, if we set that socket to NULL under RCU and wait for synchronize_net()
1647 * to complete for the existing set of in-flight packets to be transmitted,
1648 * then we would have quiesced the transmit data path. All the future packets
1649 * will get dropped until we unquiesce the data path.
1651 * On receive geneve dereference the geneve_sock stashed in the socket. So,
1652 * if we set that to NULL under RCU and wait for synchronize_net() to
1653 * complete, then we would have quiesced the receive data path.
1655 static void geneve_quiesce(struct geneve_dev
*geneve
, struct geneve_sock
**gs4
,
1656 struct geneve_sock
**gs6
)
1658 *gs4
= rtnl_dereference(geneve
->sock4
);
1659 rcu_assign_pointer(geneve
->sock4
, NULL
);
1661 rcu_assign_sk_user_data((*gs4
)->sock
->sk
, NULL
);
1662 #if IS_ENABLED(CONFIG_IPV6)
1663 *gs6
= rtnl_dereference(geneve
->sock6
);
1664 rcu_assign_pointer(geneve
->sock6
, NULL
);
1666 rcu_assign_sk_user_data((*gs6
)->sock
->sk
, NULL
);
1673 /* Resumes the geneve device data path for both TX and RX. */
1674 static void geneve_unquiesce(struct geneve_dev
*geneve
, struct geneve_sock
*gs4
,
1675 struct geneve_sock __maybe_unused
*gs6
)
1677 rcu_assign_pointer(geneve
->sock4
, gs4
);
1679 rcu_assign_sk_user_data(gs4
->sock
->sk
, gs4
);
1680 #if IS_ENABLED(CONFIG_IPV6)
1681 rcu_assign_pointer(geneve
->sock6
, gs6
);
1683 rcu_assign_sk_user_data(gs6
->sock
->sk
, gs6
);
1688 static int geneve_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
1689 struct nlattr
*data
[],
1690 struct netlink_ext_ack
*extack
)
1692 struct geneve_dev
*geneve
= netdev_priv(dev
);
1693 struct geneve_sock
*gs4
, *gs6
;
1694 struct geneve_config cfg
;
1697 /* If the geneve device is configured for metadata (or externally
1698 * controlled, for example, OVS), then nothing can be changed.
1700 if (geneve
->cfg
.collect_md
)
1703 /* Start with the existing info. */
1704 memcpy(&cfg
, &geneve
->cfg
, sizeof(cfg
));
1705 err
= geneve_nl2info(tb
, data
, extack
, &cfg
, true);
1709 if (!geneve_dst_addr_equal(&geneve
->cfg
.info
, &cfg
.info
)) {
1710 dst_cache_reset(&cfg
.info
.dst_cache
);
1711 geneve_link_config(dev
, &cfg
.info
, tb
);
1714 geneve_quiesce(geneve
, &gs4
, &gs6
);
1715 memcpy(&geneve
->cfg
, &cfg
, sizeof(cfg
));
1716 geneve_unquiesce(geneve
, gs4
, gs6
);
1721 static void geneve_dellink(struct net_device
*dev
, struct list_head
*head
)
1723 struct geneve_dev
*geneve
= netdev_priv(dev
);
1725 list_del(&geneve
->next
);
1726 unregister_netdevice_queue(dev
, head
);
1729 static size_t geneve_get_size(const struct net_device
*dev
)
1731 return nla_total_size(sizeof(__u32
)) + /* IFLA_GENEVE_ID */
1732 nla_total_size(sizeof(struct in6_addr
)) + /* IFLA_GENEVE_REMOTE{6} */
1733 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_TTL */
1734 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_TOS */
1735 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_DF */
1736 nla_total_size(sizeof(__be32
)) + /* IFLA_GENEVE_LABEL */
1737 nla_total_size(sizeof(__be16
)) + /* IFLA_GENEVE_PORT */
1738 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
1739 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_UDP_CSUM */
1740 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
1741 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
1742 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_TTL_INHERIT */
1743 nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */
1747 static int geneve_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
1749 struct geneve_dev
*geneve
= netdev_priv(dev
);
1750 struct ip_tunnel_info
*info
= &geneve
->cfg
.info
;
1751 bool ttl_inherit
= geneve
->cfg
.ttl_inherit
;
1752 bool metadata
= geneve
->cfg
.collect_md
;
1756 tunnel_id_to_vni(info
->key
.tun_id
, tmp_vni
);
1757 vni
= (tmp_vni
[0] << 16) | (tmp_vni
[1] << 8) | tmp_vni
[2];
1758 if (nla_put_u32(skb
, IFLA_GENEVE_ID
, vni
))
1759 goto nla_put_failure
;
1761 if (!metadata
&& ip_tunnel_info_af(info
) == AF_INET
) {
1762 if (nla_put_in_addr(skb
, IFLA_GENEVE_REMOTE
,
1763 info
->key
.u
.ipv4
.dst
))
1764 goto nla_put_failure
;
1765 if (nla_put_u8(skb
, IFLA_GENEVE_UDP_CSUM
,
1766 test_bit(IP_TUNNEL_CSUM_BIT
,
1767 info
->key
.tun_flags
)))
1768 goto nla_put_failure
;
1770 #if IS_ENABLED(CONFIG_IPV6)
1771 } else if (!metadata
) {
1772 if (nla_put_in6_addr(skb
, IFLA_GENEVE_REMOTE6
,
1773 &info
->key
.u
.ipv6
.dst
))
1774 goto nla_put_failure
;
1775 if (nla_put_u8(skb
, IFLA_GENEVE_UDP_ZERO_CSUM6_TX
,
1776 !test_bit(IP_TUNNEL_CSUM_BIT
,
1777 info
->key
.tun_flags
)))
1778 goto nla_put_failure
;
1782 if (nla_put_u8(skb
, IFLA_GENEVE_TTL
, info
->key
.ttl
) ||
1783 nla_put_u8(skb
, IFLA_GENEVE_TOS
, info
->key
.tos
) ||
1784 nla_put_be32(skb
, IFLA_GENEVE_LABEL
, info
->key
.label
))
1785 goto nla_put_failure
;
1787 if (nla_put_u8(skb
, IFLA_GENEVE_DF
, geneve
->cfg
.df
))
1788 goto nla_put_failure
;
1790 if (nla_put_be16(skb
, IFLA_GENEVE_PORT
, info
->key
.tp_dst
))
1791 goto nla_put_failure
;
1793 if (metadata
&& nla_put_flag(skb
, IFLA_GENEVE_COLLECT_METADATA
))
1794 goto nla_put_failure
;
1796 #if IS_ENABLED(CONFIG_IPV6)
1797 if (nla_put_u8(skb
, IFLA_GENEVE_UDP_ZERO_CSUM6_RX
,
1798 !geneve
->cfg
.use_udp6_rx_checksums
))
1799 goto nla_put_failure
;
1802 if (nla_put_u8(skb
, IFLA_GENEVE_TTL_INHERIT
, ttl_inherit
))
1803 goto nla_put_failure
;
1805 if (geneve
->cfg
.inner_proto_inherit
&&
1806 nla_put_flag(skb
, IFLA_GENEVE_INNER_PROTO_INHERIT
))
1807 goto nla_put_failure
;
1815 static struct rtnl_link_ops geneve_link_ops __read_mostly
= {
1817 .maxtype
= IFLA_GENEVE_MAX
,
1818 .policy
= geneve_policy
,
1819 .priv_size
= sizeof(struct geneve_dev
),
1820 .setup
= geneve_setup
,
1821 .validate
= geneve_validate
,
1822 .newlink
= geneve_newlink
,
1823 .changelink
= geneve_changelink
,
1824 .dellink
= geneve_dellink
,
1825 .get_size
= geneve_get_size
,
1826 .fill_info
= geneve_fill_info
,
1829 struct net_device
*geneve_dev_create_fb(struct net
*net
, const char *name
,
1830 u8 name_assign_type
, u16 dst_port
)
1832 struct nlattr
*tb
[IFLA_MAX
+ 1];
1833 struct net_device
*dev
;
1834 LIST_HEAD(list_kill
);
1836 struct geneve_config cfg
= {
1837 .df
= GENEVE_DF_UNSET
,
1838 .use_udp6_rx_checksums
= true,
1839 .ttl_inherit
= false,
1843 memset(tb
, 0, sizeof(tb
));
1844 dev
= rtnl_create_link(net
, name
, name_assign_type
,
1845 &geneve_link_ops
, tb
, NULL
);
1849 init_tnl_info(&cfg
.info
, dst_port
);
1850 err
= geneve_configure(net
, dev
, NULL
, &cfg
);
1853 return ERR_PTR(err
);
1856 /* openvswitch users expect packet sizes to be unrestricted,
1857 * so set the largest MTU we can.
1859 err
= geneve_change_mtu(dev
, IP_MAX_MTU
);
1863 err
= rtnl_configure_link(dev
, NULL
, 0, NULL
);
1869 geneve_dellink(dev
, &list_kill
);
1870 unregister_netdevice_many(&list_kill
);
1871 return ERR_PTR(err
);
1873 EXPORT_SYMBOL_GPL(geneve_dev_create_fb
);
1875 static int geneve_netdevice_event(struct notifier_block
*unused
,
1876 unsigned long event
, void *ptr
)
1878 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
1880 if (event
== NETDEV_UDP_TUNNEL_PUSH_INFO
)
1881 geneve_offload_rx_ports(dev
, true);
1882 else if (event
== NETDEV_UDP_TUNNEL_DROP_INFO
)
1883 geneve_offload_rx_ports(dev
, false);
1888 static struct notifier_block geneve_notifier_block __read_mostly
= {
1889 .notifier_call
= geneve_netdevice_event
,
1892 static __net_init
int geneve_init_net(struct net
*net
)
1894 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1896 INIT_LIST_HEAD(&gn
->geneve_list
);
1897 INIT_LIST_HEAD(&gn
->sock_list
);
1901 static void geneve_destroy_tunnels(struct net
*net
, struct list_head
*head
)
1903 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1904 struct geneve_dev
*geneve
, *next
;
1905 struct net_device
*dev
, *aux
;
1907 /* gather any geneve devices that were moved into this ns */
1908 for_each_netdev_safe(net
, dev
, aux
)
1909 if (dev
->rtnl_link_ops
== &geneve_link_ops
)
1910 unregister_netdevice_queue(dev
, head
);
1912 /* now gather any other geneve devices that were created in this ns */
1913 list_for_each_entry_safe(geneve
, next
, &gn
->geneve_list
, next
) {
1914 /* If geneve->dev is in the same netns, it was already added
1915 * to the list by the previous loop.
1917 if (!net_eq(dev_net(geneve
->dev
), net
))
1918 unregister_netdevice_queue(geneve
->dev
, head
);
1922 static void __net_exit
geneve_exit_batch_rtnl(struct list_head
*net_list
,
1923 struct list_head
*dev_to_kill
)
1927 list_for_each_entry(net
, net_list
, exit_list
)
1928 geneve_destroy_tunnels(net
, dev_to_kill
);
1931 static void __net_exit
geneve_exit_net(struct net
*net
)
1933 const struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1935 WARN_ON_ONCE(!list_empty(&gn
->sock_list
));
1938 static struct pernet_operations geneve_net_ops
= {
1939 .init
= geneve_init_net
,
1940 .exit_batch_rtnl
= geneve_exit_batch_rtnl
,
1941 .exit
= geneve_exit_net
,
1942 .id
= &geneve_net_id
,
1943 .size
= sizeof(struct geneve_net
),
1946 static int __init
geneve_init_module(void)
1950 rc
= register_pernet_subsys(&geneve_net_ops
);
1954 rc
= register_netdevice_notifier(&geneve_notifier_block
);
1958 rc
= rtnl_link_register(&geneve_link_ops
);
1964 unregister_netdevice_notifier(&geneve_notifier_block
);
1966 unregister_pernet_subsys(&geneve_net_ops
);
1970 late_initcall(geneve_init_module
);
1972 static void __exit
geneve_cleanup_module(void)
1974 rtnl_link_unregister(&geneve_link_ops
);
1975 unregister_netdevice_notifier(&geneve_notifier_block
);
1976 unregister_pernet_subsys(&geneve_net_ops
);
1978 module_exit(geneve_cleanup_module
);
1980 MODULE_LICENSE("GPL");
1981 MODULE_VERSION(GENEVE_NETDEV_VER
);
1982 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
1983 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
1984 MODULE_ALIAS_RTNL_LINK("geneve");