2 * GENEVE: Generic Network Virtualization Encapsulation
4 * Copyright (c) 2015 Red Hat, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/netdevice.h>
16 #include <linux/etherdevice.h>
17 #include <linux/hash.h>
18 #include <net/rtnetlink.h>
19 #include <net/geneve.h>
21 #define GENEVE_NETDEV_VER "0.6"
23 #define GENEVE_UDP_PORT 6081
25 #define GENEVE_N_VID (1u << 24)
26 #define GENEVE_VID_MASK (GENEVE_N_VID - 1)
28 #define VNI_HASH_BITS 10
29 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
31 static bool log_ecn_error
= true;
32 module_param(log_ecn_error
, bool, 0644);
33 MODULE_PARM_DESC(log_ecn_error
, "Log packets received with corrupted ECN");
35 /* per-network namespace private data for this module */
37 struct list_head geneve_list
;
38 struct hlist_head vni_list
[VNI_HASH_SIZE
];
41 /* Pseudo network device */
43 struct hlist_node hlist
; /* vni hash table */
44 struct net
*net
; /* netns for packet i/o */
45 struct net_device
*dev
; /* netdev for geneve tunnel */
46 struct geneve_sock
*sock
; /* socket used for geneve tunnel */
47 u8 vni
[3]; /* virtual network ID for tunnel */
48 u8 ttl
; /* TTL override */
49 u8 tos
; /* TOS override */
50 struct sockaddr_in remote
; /* IPv4 address for link partner */
51 struct list_head next
; /* geneve's per namespace list */
54 static int geneve_net_id
;
56 static inline __u32
geneve_net_vni_hash(u8 vni
[3])
60 vnid
= (vni
[0] << 16) | (vni
[1] << 8) | vni
[2];
61 return hash_32(vnid
, VNI_HASH_BITS
);
64 /* geneve receive/decap routine */
65 static void geneve_rx(struct geneve_sock
*gs
, struct sk_buff
*skb
)
67 struct genevehdr
*gnvh
= geneve_hdr(skb
);
68 struct geneve_dev
*dummy
, *geneve
= NULL
;
69 struct geneve_net
*gn
;
70 struct iphdr
*iph
= NULL
;
71 struct pcpu_sw_netstats
*stats
;
72 struct hlist_head
*vni_list_head
;
76 iph
= ip_hdr(skb
); /* Still outer IP header... */
80 /* Find the device for this VNI */
81 hash
= geneve_net_vni_hash(gnvh
->vni
);
82 vni_list_head
= &gn
->vni_list
[hash
];
83 hlist_for_each_entry_rcu(dummy
, vni_list_head
, hlist
) {
84 if (!memcmp(gnvh
->vni
, dummy
->vni
, sizeof(dummy
->vni
)) &&
85 iph
->saddr
== dummy
->remote
.sin_addr
.s_addr
) {
93 /* Drop packets w/ critical options,
94 * since we don't support any...
99 skb_reset_mac_header(skb
);
100 skb_scrub_packet(skb
, !net_eq(geneve
->net
, dev_net(geneve
->dev
)));
101 skb
->protocol
= eth_type_trans(skb
, geneve
->dev
);
102 skb_postpull_rcsum(skb
, eth_hdr(skb
), ETH_HLEN
);
104 /* Ignore packet loops (and multicast echo) */
105 if (ether_addr_equal(eth_hdr(skb
)->h_source
, geneve
->dev
->dev_addr
))
108 skb_reset_network_header(skb
);
110 iph
= ip_hdr(skb
); /* Now inner IP header... */
111 err
= IP_ECN_decapsulate(iph
, skb
);
115 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
116 &iph
->saddr
, iph
->tos
);
118 ++geneve
->dev
->stats
.rx_frame_errors
;
119 ++geneve
->dev
->stats
.rx_errors
;
124 stats
= this_cpu_ptr(geneve
->dev
->tstats
);
125 u64_stats_update_begin(&stats
->syncp
);
127 stats
->rx_bytes
+= skb
->len
;
128 u64_stats_update_end(&stats
->syncp
);
134 /* Consume bad packet */
138 /* Setup stats when device is created */
139 static int geneve_init(struct net_device
*dev
)
141 dev
->tstats
= netdev_alloc_pcpu_stats(struct pcpu_sw_netstats
);
148 static void geneve_uninit(struct net_device
*dev
)
150 free_percpu(dev
->tstats
);
153 static int geneve_open(struct net_device
*dev
)
155 struct geneve_dev
*geneve
= netdev_priv(dev
);
156 struct net
*net
= geneve
->net
;
157 struct geneve_net
*gn
= net_generic(geneve
->net
, geneve_net_id
);
158 struct geneve_sock
*gs
;
160 gs
= geneve_sock_add(net
, htons(GENEVE_UDP_PORT
), geneve_rx
, gn
,
170 static int geneve_stop(struct net_device
*dev
)
172 struct geneve_dev
*geneve
= netdev_priv(dev
);
173 struct geneve_sock
*gs
= geneve
->sock
;
175 geneve_sock_release(gs
);
180 static netdev_tx_t
geneve_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
182 struct geneve_dev
*geneve
= netdev_priv(dev
);
183 struct geneve_sock
*gs
= geneve
->sock
;
184 struct rtable
*rt
= NULL
;
185 const struct iphdr
*iip
; /* interior IP header */
193 skb_reset_mac_header(skb
);
195 /* TODO: port min/max limits should be configurable */
196 sport
= udp_flow_src_port(dev_net(dev
), skb
, 0, 0, true);
200 tos
= ip_tunnel_get_dsfield(iip
, skb
);
202 memset(&fl4
, 0, sizeof(fl4
));
203 fl4
.flowi4_tos
= RT_TOS(tos
);
204 fl4
.daddr
= geneve
->remote
.sin_addr
.s_addr
;
205 rt
= ip_route_output_key(geneve
->net
, &fl4
);
207 netdev_dbg(dev
, "no route to %pI4\n", &fl4
.daddr
);
208 dev
->stats
.tx_carrier_errors
++;
211 if (rt
->dst
.dev
== dev
) { /* is this necessary? */
212 netdev_dbg(dev
, "circular route to %pI4\n", &fl4
.daddr
);
213 dev
->stats
.collisions
++;
217 tos
= ip_tunnel_ecn_encap(tos
, iip
, skb
);
220 if (!ttl
&& IN_MULTICAST(ntohl(fl4
.daddr
)))
223 ttl
= ttl
? : ip4_dst_hoplimit(&rt
->dst
);
225 /* no need to handle local destination and encap bypass...yet... */
227 err
= geneve_xmit_skb(gs
, rt
, skb
, fl4
.saddr
, fl4
.daddr
,
228 tos
, ttl
, 0, sport
, htons(GENEVE_UDP_PORT
), 0,
229 geneve
->vni
, 0, NULL
, false,
230 !net_eq(geneve
->net
, dev_net(geneve
->dev
)));
234 iptunnel_xmit_stats(err
, &dev
->stats
, dev
->tstats
);
241 dev
->stats
.tx_errors
++;
246 static const struct net_device_ops geneve_netdev_ops
= {
247 .ndo_init
= geneve_init
,
248 .ndo_uninit
= geneve_uninit
,
249 .ndo_open
= geneve_open
,
250 .ndo_stop
= geneve_stop
,
251 .ndo_start_xmit
= geneve_xmit
,
252 .ndo_get_stats64
= ip_tunnel_get_stats64
,
253 .ndo_change_mtu
= eth_change_mtu
,
254 .ndo_validate_addr
= eth_validate_addr
,
255 .ndo_set_mac_address
= eth_mac_addr
,
258 static void geneve_get_drvinfo(struct net_device
*dev
,
259 struct ethtool_drvinfo
*drvinfo
)
261 strlcpy(drvinfo
->version
, GENEVE_NETDEV_VER
, sizeof(drvinfo
->version
));
262 strlcpy(drvinfo
->driver
, "geneve", sizeof(drvinfo
->driver
));
265 static const struct ethtool_ops geneve_ethtool_ops
= {
266 .get_drvinfo
= geneve_get_drvinfo
,
267 .get_link
= ethtool_op_get_link
,
270 /* Info for udev, that this is a virtual tunnel endpoint */
271 static struct device_type geneve_type
= {
275 /* Initialize the device structure. */
276 static void geneve_setup(struct net_device
*dev
)
280 dev
->netdev_ops
= &geneve_netdev_ops
;
281 dev
->ethtool_ops
= &geneve_ethtool_ops
;
282 dev
->destructor
= free_netdev
;
284 SET_NETDEV_DEVTYPE(dev
, &geneve_type
);
286 dev
->tx_queue_len
= 0;
287 dev
->features
|= NETIF_F_LLTX
;
288 dev
->features
|= NETIF_F_SG
| NETIF_F_HW_CSUM
;
289 dev
->features
|= NETIF_F_RXCSUM
;
290 dev
->features
|= NETIF_F_GSO_SOFTWARE
;
292 dev
->vlan_features
= dev
->features
;
293 dev
->features
|= NETIF_F_HW_VLAN_CTAG_TX
| NETIF_F_HW_VLAN_STAG_TX
;
295 dev
->hw_features
|= NETIF_F_SG
| NETIF_F_HW_CSUM
| NETIF_F_RXCSUM
;
296 dev
->hw_features
|= NETIF_F_GSO_SOFTWARE
;
297 dev
->hw_features
|= NETIF_F_HW_VLAN_CTAG_TX
| NETIF_F_HW_VLAN_STAG_TX
;
300 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
303 static const struct nla_policy geneve_policy
[IFLA_GENEVE_MAX
+ 1] = {
304 [IFLA_GENEVE_ID
] = { .type
= NLA_U32
},
305 [IFLA_GENEVE_REMOTE
] = { .len
= FIELD_SIZEOF(struct iphdr
, daddr
) },
306 [IFLA_GENEVE_TTL
] = { .type
= NLA_U8
},
307 [IFLA_GENEVE_TOS
] = { .type
= NLA_U8
},
310 static int geneve_validate(struct nlattr
*tb
[], struct nlattr
*data
[])
312 if (tb
[IFLA_ADDRESS
]) {
313 if (nla_len(tb
[IFLA_ADDRESS
]) != ETH_ALEN
)
316 if (!is_valid_ether_addr(nla_data(tb
[IFLA_ADDRESS
])))
317 return -EADDRNOTAVAIL
;
323 if (data
[IFLA_GENEVE_ID
]) {
324 __u32 vni
= nla_get_u32(data
[IFLA_GENEVE_ID
]);
326 if (vni
>= GENEVE_VID_MASK
)
333 static int geneve_newlink(struct net
*net
, struct net_device
*dev
,
334 struct nlattr
*tb
[], struct nlattr
*data
[])
336 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
337 struct geneve_dev
*dummy
, *geneve
= netdev_priv(dev
);
338 struct hlist_head
*vni_list_head
;
339 struct sockaddr_in remote
; /* IPv4 address for link partner */
343 if (!data
[IFLA_GENEVE_ID
] || !data
[IFLA_GENEVE_REMOTE
])
349 vni
= nla_get_u32(data
[IFLA_GENEVE_ID
]);
350 geneve
->vni
[0] = (vni
& 0x00ff0000) >> 16;
351 geneve
->vni
[1] = (vni
& 0x0000ff00) >> 8;
352 geneve
->vni
[2] = vni
& 0x000000ff;
354 geneve
->remote
.sin_addr
.s_addr
=
355 nla_get_in_addr(data
[IFLA_GENEVE_REMOTE
]);
356 if (IN_MULTICAST(ntohl(geneve
->remote
.sin_addr
.s_addr
)))
359 remote
= geneve
->remote
;
360 hash
= geneve_net_vni_hash(geneve
->vni
);
361 vni_list_head
= &gn
->vni_list
[hash
];
362 hlist_for_each_entry_rcu(dummy
, vni_list_head
, hlist
) {
363 if (!memcmp(geneve
->vni
, dummy
->vni
, sizeof(dummy
->vni
)) &&
364 !memcmp(&remote
, &dummy
->remote
, sizeof(dummy
->remote
)))
368 if (tb
[IFLA_ADDRESS
] == NULL
)
369 eth_hw_addr_random(dev
);
371 err
= register_netdevice(dev
);
375 if (data
[IFLA_GENEVE_TTL
])
376 geneve
->ttl
= nla_get_u8(data
[IFLA_GENEVE_TTL
]);
378 if (data
[IFLA_GENEVE_TOS
])
379 geneve
->tos
= nla_get_u8(data
[IFLA_GENEVE_TOS
]);
381 list_add(&geneve
->next
, &gn
->geneve_list
);
383 hlist_add_head_rcu(&geneve
->hlist
, &gn
->vni_list
[hash
]);
388 static void geneve_dellink(struct net_device
*dev
, struct list_head
*head
)
390 struct geneve_dev
*geneve
= netdev_priv(dev
);
392 if (!hlist_unhashed(&geneve
->hlist
))
393 hlist_del_rcu(&geneve
->hlist
);
395 list_del(&geneve
->next
);
396 unregister_netdevice_queue(dev
, head
);
399 static size_t geneve_get_size(const struct net_device
*dev
)
401 return nla_total_size(sizeof(__u32
)) + /* IFLA_GENEVE_ID */
402 nla_total_size(sizeof(struct in_addr
)) + /* IFLA_GENEVE_REMOTE */
403 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_TTL */
404 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_TOS */
408 static int geneve_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
410 struct geneve_dev
*geneve
= netdev_priv(dev
);
413 vni
= (geneve
->vni
[0] << 16) | (geneve
->vni
[1] << 8) | geneve
->vni
[2];
414 if (nla_put_u32(skb
, IFLA_GENEVE_ID
, vni
))
415 goto nla_put_failure
;
417 if (nla_put_in_addr(skb
, IFLA_GENEVE_REMOTE
,
418 geneve
->remote
.sin_addr
.s_addr
))
419 goto nla_put_failure
;
421 if (nla_put_u8(skb
, IFLA_GENEVE_TTL
, geneve
->ttl
) ||
422 nla_put_u8(skb
, IFLA_GENEVE_TOS
, geneve
->tos
))
423 goto nla_put_failure
;
431 static struct rtnl_link_ops geneve_link_ops __read_mostly
= {
433 .maxtype
= IFLA_GENEVE_MAX
,
434 .policy
= geneve_policy
,
435 .priv_size
= sizeof(struct geneve_dev
),
436 .setup
= geneve_setup
,
437 .validate
= geneve_validate
,
438 .newlink
= geneve_newlink
,
439 .dellink
= geneve_dellink
,
440 .get_size
= geneve_get_size
,
441 .fill_info
= geneve_fill_info
,
444 static __net_init
int geneve_init_net(struct net
*net
)
446 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
449 INIT_LIST_HEAD(&gn
->geneve_list
);
451 for (h
= 0; h
< VNI_HASH_SIZE
; ++h
)
452 INIT_HLIST_HEAD(&gn
->vni_list
[h
]);
457 static void __net_exit
geneve_exit_net(struct net
*net
)
459 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
460 struct geneve_dev
*geneve
, *next
;
461 struct net_device
*dev
, *aux
;
466 /* gather any geneve devices that were moved into this ns */
467 for_each_netdev_safe(net
, dev
, aux
)
468 if (dev
->rtnl_link_ops
== &geneve_link_ops
)
469 unregister_netdevice_queue(dev
, &list
);
471 /* now gather any other geneve devices that were created in this ns */
472 list_for_each_entry_safe(geneve
, next
, &gn
->geneve_list
, next
) {
473 /* If geneve->dev is in the same netns, it was already added
474 * to the list by the previous loop.
476 if (!net_eq(dev_net(geneve
->dev
), net
))
477 unregister_netdevice_queue(geneve
->dev
, &list
);
480 /* unregister the devices gathered above */
481 unregister_netdevice_many(&list
);
485 static struct pernet_operations geneve_net_ops
= {
486 .init
= geneve_init_net
,
487 .exit
= geneve_exit_net
,
488 .id
= &geneve_net_id
,
489 .size
= sizeof(struct geneve_net
),
492 static int __init
geneve_init_module(void)
496 rc
= register_pernet_subsys(&geneve_net_ops
);
500 rc
= rtnl_link_register(&geneve_link_ops
);
506 unregister_pernet_subsys(&geneve_net_ops
);
510 late_initcall(geneve_init_module
);
512 static void __exit
geneve_cleanup_module(void)
514 rtnl_link_unregister(&geneve_link_ops
);
515 unregister_pernet_subsys(&geneve_net_ops
);
517 module_exit(geneve_cleanup_module
);
519 MODULE_LICENSE("GPL");
520 MODULE_VERSION(GENEVE_NETDEV_VER
);
521 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
522 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
523 MODULE_ALIAS_RTNL_LINK("geneve");