1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
10 #include "ratelimiter.h"
14 #include <linux/module.h>
15 #include <linux/rtnetlink.h>
16 #include <linux/inet.h>
17 #include <linux/netdevice.h>
18 #include <linux/inetdevice.h>
19 #include <linux/if_arp.h>
20 #include <linux/icmp.h>
21 #include <linux/suspend.h>
22 #include <net/dst_metadata.h>
25 #include <net/rtnetlink.h>
26 #include <net/ip_tunnels.h>
27 #include <net/addrconf.h>
29 static LIST_HEAD(device_list
);
31 static int wg_open(struct net_device
*dev
)
33 struct in_device
*dev_v4
= __in_dev_get_rtnl(dev
);
34 struct inet6_dev
*dev_v6
= __in6_dev_get(dev
);
35 struct wg_device
*wg
= netdev_priv(dev
);
40 /* At some point we might put this check near the ip_rt_send_
41 * redirect call of ip_forward in net/ipv4/ip_forward.c, similar
42 * to the current secpath check.
44 IN_DEV_CONF_SET(dev_v4
, SEND_REDIRECTS
, false);
45 IPV4_DEVCONF_ALL(dev_net(dev
), SEND_REDIRECTS
) = false;
48 dev_v6
->cnf
.addr_gen_mode
= IN6_ADDR_GEN_MODE_NONE
;
50 mutex_lock(&wg
->device_update_lock
);
51 ret
= wg_socket_init(wg
, wg
->incoming_port
);
54 list_for_each_entry(peer
, &wg
->peer_list
, peer_list
) {
55 wg_packet_send_staged_packets(peer
);
56 if (peer
->persistent_keepalive_interval
)
57 wg_packet_send_keepalive(peer
);
60 mutex_unlock(&wg
->device_update_lock
);
64 static int wg_pm_notification(struct notifier_block
*nb
, unsigned long action
, void *data
)
69 /* If the machine is constantly suspending and resuming, as part of
70 * its normal operation rather than as a somewhat rare event, then we
71 * don't actually want to clear keys.
73 if (IS_ENABLED(CONFIG_PM_AUTOSLEEP
) ||
74 IS_ENABLED(CONFIG_PM_USERSPACE_AUTOSLEEP
))
77 if (action
!= PM_HIBERNATION_PREPARE
&& action
!= PM_SUSPEND_PREPARE
)
81 list_for_each_entry(wg
, &device_list
, device_list
) {
82 mutex_lock(&wg
->device_update_lock
);
83 list_for_each_entry(peer
, &wg
->peer_list
, peer_list
) {
84 del_timer(&peer
->timer_zero_key_material
);
85 wg_noise_handshake_clear(&peer
->handshake
);
86 wg_noise_keypairs_clear(&peer
->keypairs
);
88 mutex_unlock(&wg
->device_update_lock
);
95 static struct notifier_block pm_notifier
= { .notifier_call
= wg_pm_notification
};
97 static int wg_vm_notification(struct notifier_block
*nb
, unsigned long action
, void *data
)
100 struct wg_peer
*peer
;
103 list_for_each_entry(wg
, &device_list
, device_list
) {
104 mutex_lock(&wg
->device_update_lock
);
105 list_for_each_entry(peer
, &wg
->peer_list
, peer_list
)
106 wg_noise_expire_current_peer_keypairs(peer
);
107 mutex_unlock(&wg
->device_update_lock
);
113 static struct notifier_block vm_notifier
= { .notifier_call
= wg_vm_notification
};
115 static int wg_stop(struct net_device
*dev
)
117 struct wg_device
*wg
= netdev_priv(dev
);
118 struct wg_peer
*peer
;
121 mutex_lock(&wg
->device_update_lock
);
122 list_for_each_entry(peer
, &wg
->peer_list
, peer_list
) {
123 wg_packet_purge_staged_packets(peer
);
124 wg_timers_stop(peer
);
125 wg_noise_handshake_clear(&peer
->handshake
);
126 wg_noise_keypairs_clear(&peer
->keypairs
);
127 wg_noise_reset_last_sent_handshake(&peer
->last_sent_handshake
);
129 mutex_unlock(&wg
->device_update_lock
);
130 while ((skb
= ptr_ring_consume(&wg
->handshake_queue
.ring
)) != NULL
)
132 atomic_set(&wg
->handshake_queue_len
, 0);
133 wg_socket_reinit(wg
, NULL
, NULL
);
137 static netdev_tx_t
wg_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
139 struct wg_device
*wg
= netdev_priv(dev
);
140 struct sk_buff_head packets
;
141 struct wg_peer
*peer
;
142 struct sk_buff
*next
;
147 if (unlikely(!wg_check_packet_protocol(skb
))) {
148 ret
= -EPROTONOSUPPORT
;
149 net_dbg_ratelimited("%s: Invalid IP packet\n", dev
->name
);
153 peer
= wg_allowedips_lookup_dst(&wg
->peer_allowedips
, skb
);
154 if (unlikely(!peer
)) {
156 if (skb
->protocol
== htons(ETH_P_IP
))
157 net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n",
158 dev
->name
, &ip_hdr(skb
)->daddr
);
159 else if (skb
->protocol
== htons(ETH_P_IPV6
))
160 net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n",
161 dev
->name
, &ipv6_hdr(skb
)->daddr
);
165 family
= READ_ONCE(peer
->endpoint
.addr
.sa_family
);
166 if (unlikely(family
!= AF_INET
&& family
!= AF_INET6
)) {
168 net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n",
169 dev
->name
, peer
->internal_id
);
173 mtu
= skb_valid_dst(skb
) ? dst_mtu(skb_dst(skb
)) : dev
->mtu
;
175 __skb_queue_head_init(&packets
);
176 if (!skb_is_gso(skb
)) {
177 skb_mark_not_on_list(skb
);
179 struct sk_buff
*segs
= skb_gso_segment(skb
, 0);
189 skb_list_walk_safe(skb
, skb
, next
) {
190 skb_mark_not_on_list(skb
);
192 skb
= skb_share_check(skb
, GFP_ATOMIC
);
196 /* We only need to keep the original dst around for icmp,
197 * so at this point we're in a position to drop it.
201 PACKET_CB(skb
)->mtu
= mtu
;
203 __skb_queue_tail(&packets
, skb
);
206 spin_lock_bh(&peer
->staged_packet_queue
.lock
);
207 /* If the queue is getting too big, we start removing the oldest packets
208 * until it's small again. We do this before adding the new packet, so
209 * we don't remove GSO segments that are in excess.
211 while (skb_queue_len(&peer
->staged_packet_queue
) > MAX_STAGED_PACKETS
) {
212 dev_kfree_skb(__skb_dequeue(&peer
->staged_packet_queue
));
213 DEV_STATS_INC(dev
, tx_dropped
);
215 skb_queue_splice_tail(&packets
, &peer
->staged_packet_queue
);
216 spin_unlock_bh(&peer
->staged_packet_queue
.lock
);
218 wg_packet_send_staged_packets(peer
);
226 if (skb
->protocol
== htons(ETH_P_IP
))
227 icmp_ndo_send(skb
, ICMP_DEST_UNREACH
, ICMP_HOST_UNREACH
, 0);
228 else if (skb
->protocol
== htons(ETH_P_IPV6
))
229 icmpv6_ndo_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0);
231 DEV_STATS_INC(dev
, tx_errors
);
236 static const struct net_device_ops netdev_ops
= {
239 .ndo_start_xmit
= wg_xmit
,
242 static void wg_destruct(struct net_device
*dev
)
244 struct wg_device
*wg
= netdev_priv(dev
);
247 list_del(&wg
->device_list
);
249 mutex_lock(&wg
->device_update_lock
);
250 rcu_assign_pointer(wg
->creating_net
, NULL
);
251 wg
->incoming_port
= 0;
252 wg_socket_reinit(wg
, NULL
, NULL
);
253 /* The final references are cleared in the below calls to destroy_workqueue. */
254 wg_peer_remove_all(wg
);
255 destroy_workqueue(wg
->handshake_receive_wq
);
256 destroy_workqueue(wg
->handshake_send_wq
);
257 destroy_workqueue(wg
->packet_crypt_wq
);
258 wg_packet_queue_free(&wg
->handshake_queue
, true);
259 wg_packet_queue_free(&wg
->decrypt_queue
, false);
260 wg_packet_queue_free(&wg
->encrypt_queue
, false);
261 rcu_barrier(); /* Wait for all the peers to be actually freed. */
262 wg_ratelimiter_uninit();
263 memzero_explicit(&wg
->static_identity
, sizeof(wg
->static_identity
));
264 kvfree(wg
->index_hashtable
);
265 kvfree(wg
->peer_hashtable
);
266 mutex_unlock(&wg
->device_update_lock
);
268 pr_debug("%s: Interface destroyed\n", dev
->name
);
272 static const struct device_type device_type
= { .name
= KBUILD_MODNAME
};
274 static void wg_setup(struct net_device
*dev
)
276 struct wg_device
*wg
= netdev_priv(dev
);
277 enum { WG_NETDEV_FEATURES
= NETIF_F_HW_CSUM
| NETIF_F_RXCSUM
|
278 NETIF_F_SG
| NETIF_F_GSO
|
279 NETIF_F_GSO_SOFTWARE
| NETIF_F_HIGHDMA
};
280 const int overhead
= MESSAGE_MINIMUM_LENGTH
+ sizeof(struct udphdr
) +
281 max(sizeof(struct ipv6hdr
), sizeof(struct iphdr
));
283 dev
->netdev_ops
= &netdev_ops
;
284 dev
->header_ops
= &ip_tunnel_header_ops
;
285 dev
->hard_header_len
= 0;
287 dev
->needed_headroom
= DATA_PACKET_HEAD_ROOM
;
288 dev
->needed_tailroom
= noise_encrypted_len(MESSAGE_PADDING_MULTIPLE
);
289 dev
->type
= ARPHRD_NONE
;
290 dev
->flags
= IFF_POINTOPOINT
| IFF_NOARP
;
291 dev
->priv_flags
|= IFF_NO_QUEUE
;
293 dev
->features
|= WG_NETDEV_FEATURES
;
294 dev
->hw_features
|= WG_NETDEV_FEATURES
;
295 dev
->hw_enc_features
|= WG_NETDEV_FEATURES
;
296 dev
->mtu
= ETH_DATA_LEN
- overhead
;
297 dev
->max_mtu
= round_down(INT_MAX
, MESSAGE_PADDING_MULTIPLE
) - overhead
;
298 dev
->pcpu_stat_type
= NETDEV_PCPU_STAT_TSTATS
;
300 SET_NETDEV_DEVTYPE(dev
, &device_type
);
302 /* We need to keep the dst around in case of icmp replies. */
305 netif_set_tso_max_size(dev
, GSO_MAX_SIZE
);
310 static int wg_newlink(struct net
*src_net
, struct net_device
*dev
,
311 struct nlattr
*tb
[], struct nlattr
*data
[],
312 struct netlink_ext_ack
*extack
)
314 struct wg_device
*wg
= netdev_priv(dev
);
317 rcu_assign_pointer(wg
->creating_net
, src_net
);
318 init_rwsem(&wg
->static_identity
.lock
);
319 mutex_init(&wg
->socket_update_lock
);
320 mutex_init(&wg
->device_update_lock
);
321 wg_allowedips_init(&wg
->peer_allowedips
);
322 wg_cookie_checker_init(&wg
->cookie_checker
, wg
);
323 INIT_LIST_HEAD(&wg
->peer_list
);
324 wg
->device_update_gen
= 1;
326 wg
->peer_hashtable
= wg_pubkey_hashtable_alloc();
327 if (!wg
->peer_hashtable
)
330 wg
->index_hashtable
= wg_index_hashtable_alloc();
331 if (!wg
->index_hashtable
)
332 goto err_free_peer_hashtable
;
334 wg
->handshake_receive_wq
= alloc_workqueue("wg-kex-%s",
335 WQ_CPU_INTENSIVE
| WQ_FREEZABLE
, 0, dev
->name
);
336 if (!wg
->handshake_receive_wq
)
337 goto err_free_index_hashtable
;
339 wg
->handshake_send_wq
= alloc_workqueue("wg-kex-%s",
340 WQ_UNBOUND
| WQ_FREEZABLE
, 0, dev
->name
);
341 if (!wg
->handshake_send_wq
)
342 goto err_destroy_handshake_receive
;
344 wg
->packet_crypt_wq
= alloc_workqueue("wg-crypt-%s",
345 WQ_CPU_INTENSIVE
| WQ_MEM_RECLAIM
, 0, dev
->name
);
346 if (!wg
->packet_crypt_wq
)
347 goto err_destroy_handshake_send
;
349 ret
= wg_packet_queue_init(&wg
->encrypt_queue
, wg_packet_encrypt_worker
,
352 goto err_destroy_packet_crypt
;
354 ret
= wg_packet_queue_init(&wg
->decrypt_queue
, wg_packet_decrypt_worker
,
357 goto err_free_encrypt_queue
;
359 ret
= wg_packet_queue_init(&wg
->handshake_queue
, wg_packet_handshake_receive_worker
,
360 MAX_QUEUED_INCOMING_HANDSHAKES
);
362 goto err_free_decrypt_queue
;
364 ret
= wg_ratelimiter_init();
366 goto err_free_handshake_queue
;
368 ret
= register_netdevice(dev
);
370 goto err_uninit_ratelimiter
;
372 list_add(&wg
->device_list
, &device_list
);
374 /* We wait until the end to assign priv_destructor, so that
375 * register_netdevice doesn't call it for us if it fails.
377 dev
->priv_destructor
= wg_destruct
;
379 pr_debug("%s: Interface created\n", dev
->name
);
382 err_uninit_ratelimiter
:
383 wg_ratelimiter_uninit();
384 err_free_handshake_queue
:
385 wg_packet_queue_free(&wg
->handshake_queue
, false);
386 err_free_decrypt_queue
:
387 wg_packet_queue_free(&wg
->decrypt_queue
, false);
388 err_free_encrypt_queue
:
389 wg_packet_queue_free(&wg
->encrypt_queue
, false);
390 err_destroy_packet_crypt
:
391 destroy_workqueue(wg
->packet_crypt_wq
);
392 err_destroy_handshake_send
:
393 destroy_workqueue(wg
->handshake_send_wq
);
394 err_destroy_handshake_receive
:
395 destroy_workqueue(wg
->handshake_receive_wq
);
396 err_free_index_hashtable
:
397 kvfree(wg
->index_hashtable
);
398 err_free_peer_hashtable
:
399 kvfree(wg
->peer_hashtable
);
403 static struct rtnl_link_ops link_ops __read_mostly
= {
404 .kind
= KBUILD_MODNAME
,
405 .priv_size
= sizeof(struct wg_device
),
407 .newlink
= wg_newlink
,
410 static void wg_netns_pre_exit(struct net
*net
)
412 struct wg_device
*wg
;
413 struct wg_peer
*peer
;
416 list_for_each_entry(wg
, &device_list
, device_list
) {
417 if (rcu_access_pointer(wg
->creating_net
) == net
) {
418 pr_debug("%s: Creating namespace exiting\n", wg
->dev
->name
);
419 netif_carrier_off(wg
->dev
);
420 mutex_lock(&wg
->device_update_lock
);
421 rcu_assign_pointer(wg
->creating_net
, NULL
);
422 wg_socket_reinit(wg
, NULL
, NULL
);
423 list_for_each_entry(peer
, &wg
->peer_list
, peer_list
)
424 wg_socket_clear_peer_endpoint_src(peer
);
425 mutex_unlock(&wg
->device_update_lock
);
431 static struct pernet_operations pernet_ops
= {
432 .pre_exit
= wg_netns_pre_exit
435 int __init
wg_device_init(void)
439 ret
= register_pm_notifier(&pm_notifier
);
443 ret
= register_random_vmfork_notifier(&vm_notifier
);
447 ret
= register_pernet_device(&pernet_ops
);
451 ret
= rtnl_link_register(&link_ops
);
458 unregister_pernet_device(&pernet_ops
);
460 unregister_random_vmfork_notifier(&vm_notifier
);
462 unregister_pm_notifier(&pm_notifier
);
466 void wg_device_uninit(void)
468 rtnl_link_unregister(&link_ops
);
469 unregister_pernet_device(&pernet_ops
);
470 unregister_random_vmfork_notifier(&vm_notifier
);
471 unregister_pm_notifier(&pm_notifier
);