1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Linux INET6 implementation
7 * Pedro Roque <roque@di.fc.ul.pt>
9 * Based on linux/ipv4/udp.c
12 * Hideaki YOSHIFUJI : sin6_scope_id support
13 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
14 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
15 * a single port at the same time.
16 * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data
17 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file.
20 #include <linux/bpf-cgroup.h>
21 #include <linux/errno.h>
22 #include <linux/types.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/net.h>
26 #include <linux/in6.h>
27 #include <linux/netdevice.h>
28 #include <linux/if_arp.h>
29 #include <linux/ipv6.h>
30 #include <linux/icmpv6.h>
31 #include <linux/init.h>
32 #include <linux/module.h>
33 #include <linux/skbuff.h>
34 #include <linux/slab.h>
35 #include <linux/uaccess.h>
36 #include <linux/indirect_call_wrapper.h>
37 #include <trace/events/udp.h>
39 #include <net/addrconf.h>
40 #include <net/ndisc.h>
41 #include <net/protocol.h>
42 #include <net/transp_v6.h>
43 #include <net/ip6_route.h>
46 #include <net/tcp_states.h>
47 #include <net/ip6_checksum.h>
48 #include <net/ip6_tunnel.h>
50 #include <net/inet_hashtables.h>
51 #include <net/inet6_hashtables.h>
52 #include <net/busy_poll.h>
53 #include <net/sock_reuseport.h>
56 #include <linux/proc_fs.h>
57 #include <linux/seq_file.h>
58 #include <trace/events/skb.h>
61 static void udpv6_destruct_sock(struct sock
*sk
)
63 udp_destruct_common(sk
);
64 inet6_sock_destruct(sk
);
67 int udpv6_init_sock(struct sock
*sk
)
69 udp_lib_init_sock(sk
);
70 sk
->sk_destruct
= udpv6_destruct_sock
;
71 set_bit(SOCK_SUPPORT_ZC
, &sk
->sk_socket
->flags
);
75 INDIRECT_CALLABLE_SCOPE
76 u32
udp6_ehashfn(const struct net
*net
,
77 const struct in6_addr
*laddr
,
79 const struct in6_addr
*faddr
,
84 net_get_random_once(&udp6_ehash_secret
,
85 sizeof(udp6_ehash_secret
));
86 net_get_random_once(&udp_ipv6_hash_secret
,
87 sizeof(udp_ipv6_hash_secret
));
89 lhash
= (__force u32
)laddr
->s6_addr32
[3];
90 fhash
= __ipv6_addr_jhash(faddr
, udp_ipv6_hash_secret
);
92 return __inet6_ehashfn(lhash
, lport
, fhash
, fport
,
93 udp6_ehash_secret
+ net_hash_mix(net
));
96 int udp_v6_get_port(struct sock
*sk
, unsigned short snum
)
98 unsigned int hash2_nulladdr
=
99 ipv6_portaddr_hash(sock_net(sk
), &in6addr_any
, snum
);
100 unsigned int hash2_partial
=
101 ipv6_portaddr_hash(sock_net(sk
), &sk
->sk_v6_rcv_saddr
, 0);
103 /* precompute partial secondary hash */
104 udp_sk(sk
)->udp_portaddr_hash
= hash2_partial
;
105 return udp_lib_get_port(sk
, snum
, hash2_nulladdr
);
108 void udp_v6_rehash(struct sock
*sk
)
110 u16 new_hash
= ipv6_portaddr_hash(sock_net(sk
),
111 &sk
->sk_v6_rcv_saddr
,
112 inet_sk(sk
)->inet_num
);
115 if (ipv6_addr_v4mapped(&sk
->sk_v6_rcv_saddr
)) {
116 new_hash4
= udp_ehashfn(sock_net(sk
),
117 sk
->sk_rcv_saddr
, sk
->sk_num
,
118 sk
->sk_daddr
, sk
->sk_dport
);
120 new_hash4
= udp6_ehashfn(sock_net(sk
),
121 &sk
->sk_v6_rcv_saddr
, sk
->sk_num
,
122 &sk
->sk_v6_daddr
, sk
->sk_dport
);
125 udp_lib_rehash(sk
, new_hash
, new_hash4
);
128 static int compute_score(struct sock
*sk
, const struct net
*net
,
129 const struct in6_addr
*saddr
, __be16 sport
,
130 const struct in6_addr
*daddr
, unsigned short hnum
,
133 int bound_dev_if
, score
;
134 struct inet_sock
*inet
;
137 if (!net_eq(sock_net(sk
), net
) ||
138 udp_sk(sk
)->udp_port_hash
!= hnum
||
139 sk
->sk_family
!= PF_INET6
)
142 if (!ipv6_addr_equal(&sk
->sk_v6_rcv_saddr
, daddr
))
148 if (inet
->inet_dport
) {
149 if (inet
->inet_dport
!= sport
)
154 if (!ipv6_addr_any(&sk
->sk_v6_daddr
)) {
155 if (!ipv6_addr_equal(&sk
->sk_v6_daddr
, saddr
))
160 bound_dev_if
= READ_ONCE(sk
->sk_bound_dev_if
);
161 dev_match
= udp_sk_bound_dev_eq(net
, bound_dev_if
, dif
, sdif
);
167 if (READ_ONCE(sk
->sk_incoming_cpu
) == raw_smp_processor_id())
174 * udp6_lib_lookup1() - Simplified lookup using primary hash (destination port)
175 * @net: Network namespace
176 * @saddr: Source address, network order
177 * @sport: Source port, network order
178 * @daddr: Destination address, network order
179 * @hnum: Destination port, host order
180 * @dif: Destination interface index
181 * @sdif: Destination bridge port index, if relevant
182 * @udptable: Set of UDP hash tables
184 * Simplified lookup to be used as fallback if no sockets are found due to a
185 * potential race between (receive) address change, and lookup happening before
186 * the rehash operation. This function ignores SO_REUSEPORT groups while scoring
187 * result sockets, because if we have one, we don't need the fallback at all.
189 * Called under rcu_read_lock().
191 * Return: socket with highest matching score if any, NULL if none
193 static struct sock
*udp6_lib_lookup1(const struct net
*net
,
194 const struct in6_addr
*saddr
, __be16 sport
,
195 const struct in6_addr
*daddr
,
196 unsigned int hnum
, int dif
, int sdif
,
197 const struct udp_table
*udptable
)
199 unsigned int slot
= udp_hashfn(net
, hnum
, udptable
->mask
);
200 struct udp_hslot
*hslot
= &udptable
->hash
[slot
];
201 struct sock
*sk
, *result
= NULL
;
202 int score
, badness
= 0;
204 sk_for_each_rcu(sk
, &hslot
->head
) {
205 score
= compute_score(sk
, net
,
206 saddr
, sport
, daddr
, hnum
, dif
, sdif
);
207 if (score
> badness
) {
216 /* called with rcu_read_lock() */
217 static struct sock
*udp6_lib_lookup2(const struct net
*net
,
218 const struct in6_addr
*saddr
, __be16 sport
,
219 const struct in6_addr
*daddr
, unsigned int hnum
,
220 int dif
, int sdif
, struct udp_hslot
*hslot2
,
223 struct sock
*sk
, *result
;
229 udp_portaddr_for_each_entry_rcu(sk
, &hslot2
->head
) {
230 need_rescore
= false;
232 score
= compute_score(need_rescore
? result
: sk
, net
, saddr
,
233 sport
, daddr
, hnum
, dif
, sdif
);
234 if (score
> badness
) {
240 if (sk
->sk_state
== TCP_ESTABLISHED
) {
245 result
= inet6_lookup_reuseport(net
, sk
, skb
, sizeof(struct udphdr
),
246 saddr
, sport
, daddr
, hnum
, udp6_ehashfn
);
252 /* Fall back to scoring if group has connections */
253 if (!reuseport_has_conns(sk
))
256 /* Reuseport logic returned an error, keep original score. */
260 /* compute_score is too long of a function to be
261 * inlined, and calling it again here yields
262 * measureable overhead for some
263 * workloads. Work around it by jumping
264 * backwards to rescore 'result'.
273 #if IS_ENABLED(CONFIG_BASE_SMALL)
274 static struct sock
*udp6_lib_lookup4(const struct net
*net
,
275 const struct in6_addr
*saddr
, __be16 sport
,
276 const struct in6_addr
*daddr
,
277 unsigned int hnum
, int dif
, int sdif
,
278 struct udp_table
*udptable
)
283 static void udp6_hash4(struct sock
*sk
)
286 #else /* !CONFIG_BASE_SMALL */
287 static struct sock
*udp6_lib_lookup4(const struct net
*net
,
288 const struct in6_addr
*saddr
, __be16 sport
,
289 const struct in6_addr
*daddr
,
290 unsigned int hnum
, int dif
, int sdif
,
291 struct udp_table
*udptable
)
293 const __portpair ports
= INET_COMBINED_PORTS(sport
, hnum
);
294 const struct hlist_nulls_node
*node
;
295 struct udp_hslot
*hslot4
;
296 unsigned int hash4
, slot
;
300 hash4
= udp6_ehashfn(net
, daddr
, hnum
, saddr
, sport
);
301 slot
= hash4
& udptable
->mask
;
302 hslot4
= &udptable
->hash4
[slot
];
305 udp_lrpa_for_each_entry_rcu(up
, node
, &hslot4
->nulls_head
) {
306 sk
= (struct sock
*)up
;
307 if (inet6_match(net
, sk
, saddr
, daddr
, ports
, dif
, sdif
))
311 /* if the nulls value we got at the end of this lookup is not the
312 * expected one, we must restart lookup. We probably met an item that
313 * was moved to another chain due to rehash.
315 if (get_nulls_value(node
) != slot
)
321 static void udp6_hash4(struct sock
*sk
)
323 struct net
*net
= sock_net(sk
);
326 if (ipv6_addr_v4mapped(&sk
->sk_v6_rcv_saddr
)) {
331 if (sk_unhashed(sk
) || ipv6_addr_any(&sk
->sk_v6_rcv_saddr
))
334 hash
= udp6_ehashfn(net
, &sk
->sk_v6_rcv_saddr
, sk
->sk_num
,
335 &sk
->sk_v6_daddr
, sk
->sk_dport
);
337 udp_lib_hash4(sk
, hash
);
339 #endif /* CONFIG_BASE_SMALL */
341 /* rcu_read_lock() must be held */
342 struct sock
*__udp6_lib_lookup(const struct net
*net
,
343 const struct in6_addr
*saddr
, __be16 sport
,
344 const struct in6_addr
*daddr
, __be16 dport
,
345 int dif
, int sdif
, struct udp_table
*udptable
,
348 unsigned short hnum
= ntohs(dport
);
349 struct udp_hslot
*hslot2
;
350 struct sock
*result
, *sk
;
353 hash2
= ipv6_portaddr_hash(net
, daddr
, hnum
);
354 hslot2
= udp_hashslot2(udptable
, hash2
);
356 if (udp_has_hash4(hslot2
)) {
357 result
= udp6_lib_lookup4(net
, saddr
, sport
, daddr
, hnum
,
358 dif
, sdif
, udptable
);
359 if (result
) /* udp6_lib_lookup4 return sk or NULL */
363 /* Lookup connected or non-wildcard sockets */
364 result
= udp6_lib_lookup2(net
, saddr
, sport
,
365 daddr
, hnum
, dif
, sdif
,
367 if (!IS_ERR_OR_NULL(result
) && result
->sk_state
== TCP_ESTABLISHED
)
370 /* Lookup redirect from BPF */
371 if (static_branch_unlikely(&bpf_sk_lookup_enabled
) &&
372 udptable
== net
->ipv4
.udp_table
) {
373 sk
= inet6_lookup_run_sk_lookup(net
, IPPROTO_UDP
, skb
, sizeof(struct udphdr
),
374 saddr
, sport
, daddr
, hnum
, dif
,
382 /* Got non-wildcard socket or error on first lookup */
386 /* Lookup wildcard sockets */
387 hash2
= ipv6_portaddr_hash(net
, &in6addr_any
, hnum
);
388 hslot2
= udp_hashslot2(udptable
, hash2
);
390 result
= udp6_lib_lookup2(net
, saddr
, sport
,
391 &in6addr_any
, hnum
, dif
, sdif
,
393 if (!IS_ERR_OR_NULL(result
))
396 /* Cover address change/lookup/rehash race: see __udp4_lib_lookup() */
397 result
= udp6_lib_lookup1(net
, saddr
, sport
, daddr
, hnum
, dif
, sdif
,
405 EXPORT_SYMBOL_GPL(__udp6_lib_lookup
);
407 static struct sock
*__udp6_lib_lookup_skb(struct sk_buff
*skb
,
408 __be16 sport
, __be16 dport
,
409 struct udp_table
*udptable
)
411 const struct ipv6hdr
*iph
= ipv6_hdr(skb
);
413 return __udp6_lib_lookup(dev_net(skb
->dev
), &iph
->saddr
, sport
,
414 &iph
->daddr
, dport
, inet6_iif(skb
),
415 inet6_sdif(skb
), udptable
, skb
);
418 struct sock
*udp6_lib_lookup_skb(const struct sk_buff
*skb
,
419 __be16 sport
, __be16 dport
)
421 const u16 offset
= NAPI_GRO_CB(skb
)->network_offsets
[skb
->encapsulation
];
422 const struct ipv6hdr
*iph
= (struct ipv6hdr
*)(skb
->data
+ offset
);
423 struct net
*net
= dev_net(skb
->dev
);
426 inet6_get_iif_sdif(skb
, &iif
, &sdif
);
428 return __udp6_lib_lookup(net
, &iph
->saddr
, sport
,
429 &iph
->daddr
, dport
, iif
,
430 sdif
, net
->ipv4
.udp_table
, NULL
);
433 /* Must be called under rcu_read_lock().
434 * Does increment socket refcount.
436 #if IS_ENABLED(CONFIG_NF_TPROXY_IPV6) || IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
437 struct sock
*udp6_lib_lookup(const struct net
*net
, const struct in6_addr
*saddr
, __be16 sport
,
438 const struct in6_addr
*daddr
, __be16 dport
, int dif
)
442 sk
= __udp6_lib_lookup(net
, saddr
, sport
, daddr
, dport
,
443 dif
, 0, net
->ipv4
.udp_table
, NULL
);
444 if (sk
&& !refcount_inc_not_zero(&sk
->sk_refcnt
))
448 EXPORT_SYMBOL_GPL(udp6_lib_lookup
);
451 /* do not use the scratch area len for jumbogram: their length execeeds the
452 * scratch area space; note that the IP6CB flags is still in the first
453 * cacheline, so checking for jumbograms is cheap
455 static int udp6_skb_len(struct sk_buff
*skb
)
457 return unlikely(inet6_is_jumbogram(skb
)) ? skb
->len
: udp_skb_len(skb
);
461 * This should be easy, if there is something there we
462 * return it, otherwise we block.
465 int udpv6_recvmsg(struct sock
*sk
, struct msghdr
*msg
, size_t len
,
466 int flags
, int *addr_len
)
468 struct ipv6_pinfo
*np
= inet6_sk(sk
);
469 struct inet_sock
*inet
= inet_sk(sk
);
471 unsigned int ulen
, copied
;
472 int off
, err
, peeking
= flags
& MSG_PEEK
;
473 int is_udplite
= IS_UDPLITE(sk
);
474 struct udp_mib __percpu
*mib
;
475 bool checksum_valid
= false;
478 if (flags
& MSG_ERRQUEUE
)
479 return ipv6_recv_error(sk
, msg
, len
, addr_len
);
481 if (np
->rxpmtu
&& np
->rxopt
.bits
.rxpmtu
)
482 return ipv6_recv_rxpmtu(sk
, msg
, len
, addr_len
);
485 off
= sk_peek_offset(sk
, flags
);
486 skb
= __skb_recv_udp(sk
, flags
, &off
, &err
);
490 ulen
= udp6_skb_len(skb
);
492 if (copied
> ulen
- off
)
494 else if (copied
< ulen
)
495 msg
->msg_flags
|= MSG_TRUNC
;
497 is_udp4
= (skb
->protocol
== htons(ETH_P_IP
));
498 mib
= __UDPX_MIB(sk
, is_udp4
);
501 * If checksum is needed at all, try to do it while copying the
502 * data. If the data is truncated, or if we only want a partial
503 * coverage checksum (UDP-Lite), do it before the copy.
506 if (copied
< ulen
|| peeking
||
507 (is_udplite
&& UDP_SKB_CB(skb
)->partial_cov
)) {
508 checksum_valid
= udp_skb_csum_unnecessary(skb
) ||
509 !__udp_lib_checksum_complete(skb
);
514 if (checksum_valid
|| udp_skb_csum_unnecessary(skb
)) {
515 if (udp_skb_is_linear(skb
))
516 err
= copy_linear_skb(skb
, copied
, off
, &msg
->msg_iter
);
518 err
= skb_copy_datagram_msg(skb
, off
, msg
, copied
);
520 err
= skb_copy_and_csum_datagram_msg(skb
, off
, msg
);
526 atomic_inc(&sk
->sk_drops
);
527 SNMP_INC_STATS(mib
, UDP_MIB_INERRORS
);
533 SNMP_INC_STATS(mib
, UDP_MIB_INDATAGRAMS
);
535 sock_recv_cmsgs(msg
, sk
, skb
);
537 /* Copy the address. */
539 DECLARE_SOCKADDR(struct sockaddr_in6
*, sin6
, msg
->msg_name
);
540 sin6
->sin6_family
= AF_INET6
;
541 sin6
->sin6_port
= udp_hdr(skb
)->source
;
542 sin6
->sin6_flowinfo
= 0;
545 ipv6_addr_set_v4mapped(ip_hdr(skb
)->saddr
,
547 sin6
->sin6_scope_id
= 0;
549 sin6
->sin6_addr
= ipv6_hdr(skb
)->saddr
;
550 sin6
->sin6_scope_id
=
551 ipv6_iface_scope_id(&sin6
->sin6_addr
,
554 *addr_len
= sizeof(*sin6
);
556 BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk
,
557 (struct sockaddr
*)sin6
,
561 if (udp_test_bit(GRO_ENABLED
, sk
))
562 udp_cmsg_recv(msg
, sk
, skb
);
565 ip6_datagram_recv_common_ctl(sk
, msg
, skb
);
568 if (inet_cmsg_flags(inet
))
569 ip_cmsg_recv_offset(msg
, sk
, skb
,
570 sizeof(struct udphdr
), off
);
573 ip6_datagram_recv_specific_ctl(sk
, msg
, skb
);
577 if (flags
& MSG_TRUNC
)
580 skb_consume_udp(sk
, skb
, peeking
? -err
: err
);
584 if (!__sk_queue_drop_skb(sk
, &udp_sk(sk
)->reader_queue
, skb
, flags
,
585 udp_skb_destructor
)) {
586 SNMP_INC_STATS(mib
, UDP_MIB_CSUMERRORS
);
587 SNMP_INC_STATS(mib
, UDP_MIB_INERRORS
);
591 /* starting over for a new packet, but check if we need to yield */
593 msg
->msg_flags
&= ~MSG_TRUNC
;
597 DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key
);
598 void udpv6_encap_enable(void)
600 static_branch_inc(&udpv6_encap_needed_key
);
602 EXPORT_SYMBOL(udpv6_encap_enable
);
604 /* Handler for tunnels with arbitrary destination ports: no socket lookup, go
605 * through error handlers in encapsulations looking for a match.
607 static int __udp6_lib_err_encap_no_sk(struct sk_buff
*skb
,
608 struct inet6_skb_parm
*opt
,
609 u8 type
, u8 code
, int offset
, __be32 info
)
613 for (i
= 0; i
< MAX_IPTUN_ENCAP_OPS
; i
++) {
614 int (*handler
)(struct sk_buff
*skb
, struct inet6_skb_parm
*opt
,
615 u8 type
, u8 code
, int offset
, __be32 info
);
616 const struct ip6_tnl_encap_ops
*encap
;
618 encap
= rcu_dereference(ip6tun_encaps
[i
]);
621 handler
= encap
->err_handler
;
622 if (handler
&& !handler(skb
, opt
, type
, code
, offset
, info
))
629 /* Try to match ICMP errors to UDP tunnels by looking up a socket without
630 * reversing source and destination port: this will match tunnels that force the
631 * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
632 * lwtunnels might actually break this assumption by being configured with
633 * different destination ports on endpoints, in this case we won't be able to
634 * trace ICMP messages back to them.
636 * If this doesn't match any socket, probe tunnels with arbitrary destination
637 * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
638 * we've sent packets to won't necessarily match the local destination port.
640 * Then ask the tunnel implementation to match the error against a valid
643 * Return an error if we can't find a match, the socket if we need further
644 * processing, zero otherwise.
646 static struct sock
*__udp6_lib_err_encap(struct net
*net
,
647 const struct ipv6hdr
*hdr
, int offset
,
649 struct udp_table
*udptable
,
652 struct inet6_skb_parm
*opt
,
653 u8 type
, u8 code
, __be32 info
)
655 int (*lookup
)(struct sock
*sk
, struct sk_buff
*skb
);
656 int network_offset
, transport_offset
;
659 network_offset
= skb_network_offset(skb
);
660 transport_offset
= skb_transport_offset(skb
);
662 /* Network header needs to point to the outer IPv6 header inside ICMP */
663 skb_reset_network_header(skb
);
665 /* Transport header needs to point to the UDP header */
666 skb_set_transport_header(skb
, offset
);
671 lookup
= READ_ONCE(up
->encap_err_lookup
);
672 if (lookup
&& lookup(sk
, skb
))
678 sk
= __udp6_lib_lookup(net
, &hdr
->daddr
, uh
->source
,
679 &hdr
->saddr
, uh
->dest
,
680 inet6_iif(skb
), 0, udptable
, skb
);
684 lookup
= READ_ONCE(up
->encap_err_lookup
);
685 if (!lookup
|| lookup(sk
, skb
))
691 sk
= ERR_PTR(__udp6_lib_err_encap_no_sk(skb
, opt
, type
, code
,
695 skb_set_transport_header(skb
, transport_offset
);
696 skb_set_network_header(skb
, network_offset
);
701 int __udp6_lib_err(struct sk_buff
*skb
, struct inet6_skb_parm
*opt
,
702 u8 type
, u8 code
, int offset
, __be32 info
,
703 struct udp_table
*udptable
)
705 struct ipv6_pinfo
*np
;
706 const struct ipv6hdr
*hdr
= (const struct ipv6hdr
*)skb
->data
;
707 const struct in6_addr
*saddr
= &hdr
->saddr
;
708 const struct in6_addr
*daddr
= seg6_get_daddr(skb
, opt
) ? : &hdr
->daddr
;
709 struct udphdr
*uh
= (struct udphdr
*)(skb
->data
+offset
);
714 struct net
*net
= dev_net(skb
->dev
);
716 sk
= __udp6_lib_lookup(net
, daddr
, uh
->dest
, saddr
, uh
->source
,
717 inet6_iif(skb
), inet6_sdif(skb
), udptable
, NULL
);
719 if (!sk
|| READ_ONCE(udp_sk(sk
)->encap_type
)) {
720 /* No socket for error: try tunnels before discarding */
721 if (static_branch_unlikely(&udpv6_encap_needed_key
)) {
722 sk
= __udp6_lib_err_encap(net
, hdr
, offset
, uh
,
724 opt
, type
, code
, info
);
728 sk
= ERR_PTR(-ENOENT
);
731 __ICMP6_INC_STATS(net
, __in6_dev_get(skb
->dev
),
739 harderr
= icmpv6_err_convert(type
, code
, &err
);
742 if (type
== ICMPV6_PKT_TOOBIG
) {
743 if (!ip6_sk_accept_pmtu(sk
))
745 ip6_sk_update_pmtu(skb
, sk
, info
);
746 if (READ_ONCE(np
->pmtudisc
) != IPV6_PMTUDISC_DONT
)
749 if (type
== NDISC_REDIRECT
) {
751 ip6_redirect(skb
, sock_net(sk
), inet6_iif(skb
),
752 READ_ONCE(sk
->sk_mark
), sk
->sk_uid
);
754 ip6_sk_redirect(skb
, sk
);
759 /* Tunnels don't have an application socket: don't pass errors back */
761 if (udp_sk(sk
)->encap_err_rcv
)
762 udp_sk(sk
)->encap_err_rcv(sk
, skb
, err
, uh
->dest
,
763 ntohl(info
), (u8
*)(uh
+1));
767 if (!inet6_test_bit(RECVERR6
, sk
)) {
768 if (!harderr
|| sk
->sk_state
!= TCP_ESTABLISHED
)
771 ipv6_icmp_error(sk
, skb
, err
, uh
->dest
, ntohl(info
), (u8
*)(uh
+1));
780 static int __udpv6_queue_rcv_skb(struct sock
*sk
, struct sk_buff
*skb
)
784 if (!ipv6_addr_any(&sk
->sk_v6_daddr
)) {
785 sock_rps_save_rxhash(sk
, skb
);
786 sk_mark_napi_id(sk
, skb
);
787 sk_incoming_cpu_update(sk
);
789 sk_mark_napi_id_once(sk
, skb
);
792 rc
= __udp_enqueue_schedule_skb(sk
, skb
);
794 int is_udplite
= IS_UDPLITE(sk
);
795 enum skb_drop_reason drop_reason
;
797 /* Note that an ENOMEM error is charged twice */
799 UDP6_INC_STATS(sock_net(sk
),
800 UDP_MIB_RCVBUFERRORS
, is_udplite
);
801 drop_reason
= SKB_DROP_REASON_SOCKET_RCVBUFF
;
803 UDP6_INC_STATS(sock_net(sk
),
804 UDP_MIB_MEMERRORS
, is_udplite
);
805 drop_reason
= SKB_DROP_REASON_PROTO_MEM
;
807 UDP6_INC_STATS(sock_net(sk
), UDP_MIB_INERRORS
, is_udplite
);
808 trace_udp_fail_queue_rcv_skb(rc
, sk
, skb
);
809 sk_skb_reason_drop(sk
, skb
, drop_reason
);
816 static __inline__
int udpv6_err(struct sk_buff
*skb
,
817 struct inet6_skb_parm
*opt
, u8 type
,
818 u8 code
, int offset
, __be32 info
)
820 return __udp6_lib_err(skb
, opt
, type
, code
, offset
, info
,
821 dev_net(skb
->dev
)->ipv4
.udp_table
);
824 static int udpv6_queue_rcv_one_skb(struct sock
*sk
, struct sk_buff
*skb
)
826 enum skb_drop_reason drop_reason
= SKB_DROP_REASON_NOT_SPECIFIED
;
827 struct udp_sock
*up
= udp_sk(sk
);
828 int is_udplite
= IS_UDPLITE(sk
);
830 if (!xfrm6_policy_check(sk
, XFRM_POLICY_IN
, skb
)) {
831 drop_reason
= SKB_DROP_REASON_XFRM_POLICY
;
836 if (static_branch_unlikely(&udpv6_encap_needed_key
) &&
837 READ_ONCE(up
->encap_type
)) {
838 int (*encap_rcv
)(struct sock
*sk
, struct sk_buff
*skb
);
841 * This is an encapsulation socket so pass the skb to
842 * the socket's udp_encap_rcv() hook. Otherwise, just
843 * fall through and pass this up the UDP socket.
844 * up->encap_rcv() returns the following value:
845 * =0 if skb was successfully passed to the encap
846 * handler or was discarded by it.
847 * >0 if skb should be passed on to UDP.
848 * <0 if skb should be resubmitted as proto -N
851 /* if we're overly short, let UDP handle it */
852 encap_rcv
= READ_ONCE(up
->encap_rcv
);
856 /* Verify checksum before giving to encap */
857 if (udp_lib_checksum_complete(skb
))
860 ret
= encap_rcv(sk
, skb
);
862 __UDP6_INC_STATS(sock_net(sk
),
869 /* FALLTHROUGH -- it's a UDP Packet */
873 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
875 if (udp_test_bit(UDPLITE_RECV_CC
, sk
) && UDP_SKB_CB(skb
)->partial_cov
) {
876 u16 pcrlen
= READ_ONCE(up
->pcrlen
);
878 if (pcrlen
== 0) { /* full coverage was set */
879 net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
880 UDP_SKB_CB(skb
)->cscov
, skb
->len
);
883 if (UDP_SKB_CB(skb
)->cscov
< pcrlen
) {
884 net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n",
885 UDP_SKB_CB(skb
)->cscov
, pcrlen
);
890 prefetch(&sk
->sk_rmem_alloc
);
891 if (rcu_access_pointer(sk
->sk_filter
) &&
892 udp_lib_checksum_complete(skb
))
895 if (sk_filter_trim_cap(sk
, skb
, sizeof(struct udphdr
))) {
896 drop_reason
= SKB_DROP_REASON_SOCKET_FILTER
;
900 udp_csum_pull_header(skb
);
904 return __udpv6_queue_rcv_skb(sk
, skb
);
907 drop_reason
= SKB_DROP_REASON_UDP_CSUM
;
908 __UDP6_INC_STATS(sock_net(sk
), UDP_MIB_CSUMERRORS
, is_udplite
);
910 __UDP6_INC_STATS(sock_net(sk
), UDP_MIB_INERRORS
, is_udplite
);
911 atomic_inc(&sk
->sk_drops
);
912 sk_skb_reason_drop(sk
, skb
, drop_reason
);
916 static int udpv6_queue_rcv_skb(struct sock
*sk
, struct sk_buff
*skb
)
918 struct sk_buff
*next
, *segs
;
921 if (likely(!udp_unexpected_gso(sk
, skb
)))
922 return udpv6_queue_rcv_one_skb(sk
, skb
);
924 __skb_push(skb
, -skb_mac_offset(skb
));
925 segs
= udp_rcv_segment(sk
, skb
, false);
926 skb_list_walk_safe(segs
, skb
, next
) {
927 __skb_pull(skb
, skb_transport_offset(skb
));
929 udp_post_segment_fix_csum(skb
);
930 ret
= udpv6_queue_rcv_one_skb(sk
, skb
);
932 ip6_protocol_deliver_rcu(dev_net(skb
->dev
), skb
, ret
,
938 static bool __udp_v6_is_mcast_sock(struct net
*net
, const struct sock
*sk
,
939 __be16 loc_port
, const struct in6_addr
*loc_addr
,
940 __be16 rmt_port
, const struct in6_addr
*rmt_addr
,
941 int dif
, int sdif
, unsigned short hnum
)
943 const struct inet_sock
*inet
= inet_sk(sk
);
945 if (!net_eq(sock_net(sk
), net
))
948 if (udp_sk(sk
)->udp_port_hash
!= hnum
||
949 sk
->sk_family
!= PF_INET6
||
950 (inet
->inet_dport
&& inet
->inet_dport
!= rmt_port
) ||
951 (!ipv6_addr_any(&sk
->sk_v6_daddr
) &&
952 !ipv6_addr_equal(&sk
->sk_v6_daddr
, rmt_addr
)) ||
953 !udp_sk_bound_dev_eq(net
, READ_ONCE(sk
->sk_bound_dev_if
), dif
, sdif
) ||
954 (!ipv6_addr_any(&sk
->sk_v6_rcv_saddr
) &&
955 !ipv6_addr_equal(&sk
->sk_v6_rcv_saddr
, loc_addr
)))
957 if (!inet6_mc_check(sk
, loc_addr
, rmt_addr
))
962 static void udp6_csum_zero_error(struct sk_buff
*skb
)
964 /* RFC 2460 section 8.1 says that we SHOULD log
965 * this error. Well, it is reasonable.
967 net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
968 &ipv6_hdr(skb
)->saddr
, ntohs(udp_hdr(skb
)->source
),
969 &ipv6_hdr(skb
)->daddr
, ntohs(udp_hdr(skb
)->dest
));
973 * Note: called only from the BH handler context,
974 * so we don't need to lock the hashes.
976 static int __udp6_lib_mcast_deliver(struct net
*net
, struct sk_buff
*skb
,
977 const struct in6_addr
*saddr
, const struct in6_addr
*daddr
,
978 struct udp_table
*udptable
, int proto
)
980 struct sock
*sk
, *first
= NULL
;
981 const struct udphdr
*uh
= udp_hdr(skb
);
982 unsigned short hnum
= ntohs(uh
->dest
);
983 struct udp_hslot
*hslot
= udp_hashslot(udptable
, net
, hnum
);
984 unsigned int offset
= offsetof(typeof(*sk
), sk_node
);
985 unsigned int hash2
= 0, hash2_any
= 0, use_hash2
= (hslot
->count
> 10);
986 int dif
= inet6_iif(skb
);
987 int sdif
= inet6_sdif(skb
);
988 struct hlist_node
*node
;
989 struct sk_buff
*nskb
;
992 hash2_any
= ipv6_portaddr_hash(net
, &in6addr_any
, hnum
) &
994 hash2
= ipv6_portaddr_hash(net
, daddr
, hnum
) & udptable
->mask
;
996 hslot
= &udptable
->hash2
[hash2
].hslot
;
997 offset
= offsetof(typeof(*sk
), __sk_common
.skc_portaddr_node
);
1000 sk_for_each_entry_offset_rcu(sk
, node
, &hslot
->head
, offset
) {
1001 if (!__udp_v6_is_mcast_sock(net
, sk
, uh
->dest
, daddr
,
1002 uh
->source
, saddr
, dif
, sdif
,
1005 /* If zero checksum and no_check is not on for
1006 * the socket then skip it.
1008 if (!uh
->check
&& !udp_get_no_check6_rx(sk
))
1014 nskb
= skb_clone(skb
, GFP_ATOMIC
);
1015 if (unlikely(!nskb
)) {
1016 atomic_inc(&sk
->sk_drops
);
1017 __UDP6_INC_STATS(net
, UDP_MIB_RCVBUFERRORS
,
1019 __UDP6_INC_STATS(net
, UDP_MIB_INERRORS
,
1024 if (udpv6_queue_rcv_skb(sk
, nskb
) > 0)
1028 /* Also lookup *:port if we are using hash2 and haven't done so yet. */
1029 if (use_hash2
&& hash2
!= hash2_any
) {
1035 if (udpv6_queue_rcv_skb(first
, skb
) > 0)
1039 __UDP6_INC_STATS(net
, UDP_MIB_IGNOREDMULTI
,
1040 proto
== IPPROTO_UDPLITE
);
1045 static void udp6_sk_rx_dst_set(struct sock
*sk
, struct dst_entry
*dst
)
1047 if (udp_sk_rx_dst_set(sk
, dst
))
1048 sk
->sk_rx_dst_cookie
= rt6_get_cookie(dst_rt6_info(dst
));
1051 /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
1052 * return code conversion for ip layer consumption
1054 static int udp6_unicast_rcv_skb(struct sock
*sk
, struct sk_buff
*skb
,
1059 if (inet_get_convert_csum(sk
) && uh
->check
&& !IS_UDPLITE(sk
))
1060 skb_checksum_try_convert(skb
, IPPROTO_UDP
, ip6_compute_pseudo
);
1062 ret
= udpv6_queue_rcv_skb(sk
, skb
);
1064 /* a return value > 0 means to resubmit the input */
1070 int __udp6_lib_rcv(struct sk_buff
*skb
, struct udp_table
*udptable
,
1073 enum skb_drop_reason reason
= SKB_DROP_REASON_NOT_SPECIFIED
;
1074 const struct in6_addr
*saddr
, *daddr
;
1075 struct net
*net
= dev_net(skb
->dev
);
1076 struct sock
*sk
= NULL
;
1081 if (!pskb_may_pull(skb
, sizeof(struct udphdr
)))
1084 saddr
= &ipv6_hdr(skb
)->saddr
;
1085 daddr
= &ipv6_hdr(skb
)->daddr
;
1088 ulen
= ntohs(uh
->len
);
1089 if (ulen
> skb
->len
)
1092 if (proto
== IPPROTO_UDP
) {
1093 /* UDP validates ulen. */
1095 /* Check for jumbo payload */
1099 if (ulen
< sizeof(*uh
))
1102 if (ulen
< skb
->len
) {
1103 if (pskb_trim_rcsum(skb
, ulen
))
1105 saddr
= &ipv6_hdr(skb
)->saddr
;
1106 daddr
= &ipv6_hdr(skb
)->daddr
;
1111 if (udp6_csum_init(skb
, uh
, proto
))
1114 /* Check if the socket is already available, e.g. due to early demux */
1115 sk
= inet6_steal_sock(net
, skb
, sizeof(struct udphdr
), saddr
, uh
->source
, daddr
, uh
->dest
,
1116 &refcounted
, udp6_ehashfn
);
1121 struct dst_entry
*dst
= skb_dst(skb
);
1124 if (unlikely(rcu_dereference(sk
->sk_rx_dst
) != dst
))
1125 udp6_sk_rx_dst_set(sk
, dst
);
1127 if (!uh
->check
&& !udp_get_no_check6_rx(sk
)) {
1130 goto report_csum_error
;
1133 ret
= udp6_unicast_rcv_skb(sk
, skb
, uh
);
1140 * Multicast receive code
1142 if (ipv6_addr_is_multicast(daddr
))
1143 return __udp6_lib_mcast_deliver(net
, skb
,
1144 saddr
, daddr
, udptable
, proto
);
1147 sk
= __udp6_lib_lookup_skb(skb
, uh
->source
, uh
->dest
, udptable
);
1149 if (!uh
->check
&& !udp_get_no_check6_rx(sk
))
1150 goto report_csum_error
;
1151 return udp6_unicast_rcv_skb(sk
, skb
, uh
);
1154 reason
= SKB_DROP_REASON_NO_SOCKET
;
1157 goto report_csum_error
;
1159 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_IN
, skb
))
1163 if (udp_lib_checksum_complete(skb
))
1166 __UDP6_INC_STATS(net
, UDP_MIB_NOPORTS
, proto
== IPPROTO_UDPLITE
);
1167 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_PORT_UNREACH
, 0);
1169 sk_skb_reason_drop(sk
, skb
, reason
);
1173 if (reason
== SKB_DROP_REASON_NOT_SPECIFIED
)
1174 reason
= SKB_DROP_REASON_PKT_TOO_SMALL
;
1175 net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
1176 proto
== IPPROTO_UDPLITE
? "-Lite" : "",
1177 saddr
, ntohs(uh
->source
),
1179 daddr
, ntohs(uh
->dest
));
1183 udp6_csum_zero_error(skb
);
1185 if (reason
== SKB_DROP_REASON_NOT_SPECIFIED
)
1186 reason
= SKB_DROP_REASON_UDP_CSUM
;
1187 __UDP6_INC_STATS(net
, UDP_MIB_CSUMERRORS
, proto
== IPPROTO_UDPLITE
);
1189 __UDP6_INC_STATS(net
, UDP_MIB_INERRORS
, proto
== IPPROTO_UDPLITE
);
1190 sk_skb_reason_drop(sk
, skb
, reason
);
1195 static struct sock
*__udp6_lib_demux_lookup(struct net
*net
,
1196 __be16 loc_port
, const struct in6_addr
*loc_addr
,
1197 __be16 rmt_port
, const struct in6_addr
*rmt_addr
,
1200 struct udp_table
*udptable
= net
->ipv4
.udp_table
;
1201 unsigned short hnum
= ntohs(loc_port
);
1202 struct udp_hslot
*hslot2
;
1207 hash2
= ipv6_portaddr_hash(net
, loc_addr
, hnum
);
1208 hslot2
= udp_hashslot2(udptable
, hash2
);
1209 ports
= INET_COMBINED_PORTS(rmt_port
, hnum
);
1211 udp_portaddr_for_each_entry_rcu(sk
, &hslot2
->head
) {
1212 if (sk
->sk_state
== TCP_ESTABLISHED
&&
1213 inet6_match(net
, sk
, rmt_addr
, loc_addr
, ports
, dif
, sdif
))
1215 /* Only check first socket in chain */
1221 void udp_v6_early_demux(struct sk_buff
*skb
)
1223 struct net
*net
= dev_net(skb
->dev
);
1224 const struct udphdr
*uh
;
1226 struct dst_entry
*dst
;
1227 int dif
= skb
->dev
->ifindex
;
1228 int sdif
= inet6_sdif(skb
);
1230 if (!pskb_may_pull(skb
, skb_transport_offset(skb
) +
1231 sizeof(struct udphdr
)))
1236 if (skb
->pkt_type
== PACKET_HOST
)
1237 sk
= __udp6_lib_demux_lookup(net
, uh
->dest
,
1238 &ipv6_hdr(skb
)->daddr
,
1239 uh
->source
, &ipv6_hdr(skb
)->saddr
,
1248 DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk
));
1249 skb
->destructor
= sock_pfree
;
1250 dst
= rcu_dereference(sk
->sk_rx_dst
);
1253 dst
= dst_check(dst
, sk
->sk_rx_dst_cookie
);
1255 /* set noref for now.
1256 * any place which wants to hold dst has to call
1259 skb_dst_set_noref(skb
, dst
);
1263 INDIRECT_CALLABLE_SCOPE
int udpv6_rcv(struct sk_buff
*skb
)
1265 return __udp6_lib_rcv(skb
, dev_net(skb
->dev
)->ipv4
.udp_table
, IPPROTO_UDP
);
1269 * Throw away all pending data and cancel the corking. Socket is locked.
1271 static void udp_v6_flush_pending_frames(struct sock
*sk
)
1273 struct udp_sock
*up
= udp_sk(sk
);
1275 if (up
->pending
== AF_INET
)
1276 udp_flush_pending_frames(sk
);
1277 else if (up
->pending
) {
1279 WRITE_ONCE(up
->pending
, 0);
1280 ip6_flush_pending_frames(sk
);
1284 static int udpv6_pre_connect(struct sock
*sk
, struct sockaddr
*uaddr
,
1287 if (addr_len
< offsetofend(struct sockaddr
, sa_family
))
1289 /* The following checks are replicated from __ip6_datagram_connect()
1290 * and intended to prevent BPF program called below from accessing
1291 * bytes that are out of the bound specified by user in addr_len.
1293 if (uaddr
->sa_family
== AF_INET
) {
1294 if (ipv6_only_sock(sk
))
1295 return -EAFNOSUPPORT
;
1296 return udp_pre_connect(sk
, uaddr
, addr_len
);
1299 if (addr_len
< SIN6_LEN_RFC2133
)
1302 return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk
, uaddr
, &addr_len
);
1305 static int udpv6_connect(struct sock
*sk
, struct sockaddr
*uaddr
, int addr_len
)
1310 res
= __ip6_datagram_connect(sk
, uaddr
, addr_len
);
1318 * udp6_hwcsum_outgoing - handle outgoing HW checksumming
1319 * @sk: socket we are sending on
1320 * @skb: sk_buff containing the filled-in UDP header
1321 * (checksum field must be zeroed out)
1322 * @saddr: source address
1323 * @daddr: destination address
1324 * @len: length of packet
1326 static void udp6_hwcsum_outgoing(struct sock
*sk
, struct sk_buff
*skb
,
1327 const struct in6_addr
*saddr
,
1328 const struct in6_addr
*daddr
, int len
)
1330 unsigned int offset
;
1331 struct udphdr
*uh
= udp_hdr(skb
);
1332 struct sk_buff
*frags
= skb_shinfo(skb
)->frag_list
;
1336 /* Only one fragment on the socket. */
1337 skb
->csum_start
= skb_transport_header(skb
) - skb
->head
;
1338 skb
->csum_offset
= offsetof(struct udphdr
, check
);
1339 uh
->check
= ~csum_ipv6_magic(saddr
, daddr
, len
, IPPROTO_UDP
, 0);
1342 * HW-checksum won't work as there are two or more
1343 * fragments on the socket so that all csums of sk_buffs
1344 * should be together
1346 offset
= skb_transport_offset(skb
);
1347 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
1350 skb
->ip_summed
= CHECKSUM_NONE
;
1353 csum
= csum_add(csum
, frags
->csum
);
1354 } while ((frags
= frags
->next
));
1356 uh
->check
= csum_ipv6_magic(saddr
, daddr
, len
, IPPROTO_UDP
,
1359 uh
->check
= CSUM_MANGLED_0
;
1367 static int udp_v6_send_skb(struct sk_buff
*skb
, struct flowi6
*fl6
,
1368 struct inet_cork
*cork
)
1370 struct sock
*sk
= skb
->sk
;
1373 int is_udplite
= IS_UDPLITE(sk
);
1375 int offset
= skb_transport_offset(skb
);
1376 int len
= skb
->len
- offset
;
1377 int datalen
= len
- sizeof(*uh
);
1380 * Create a UDP header
1383 uh
->source
= fl6
->fl6_sport
;
1384 uh
->dest
= fl6
->fl6_dport
;
1385 uh
->len
= htons(len
);
1388 if (cork
->gso_size
) {
1389 const int hlen
= skb_network_header_len(skb
) +
1390 sizeof(struct udphdr
);
1392 if (hlen
+ cork
->gso_size
> cork
->fragsize
) {
1396 if (datalen
> cork
->gso_size
* UDP_MAX_SEGMENTS
) {
1400 if (udp_get_no_check6_tx(sk
)) {
1404 if (is_udplite
|| dst_xfrm(skb_dst(skb
))) {
1409 if (datalen
> cork
->gso_size
) {
1410 skb_shinfo(skb
)->gso_size
= cork
->gso_size
;
1411 skb_shinfo(skb
)->gso_type
= SKB_GSO_UDP_L4
;
1412 skb_shinfo(skb
)->gso_segs
= DIV_ROUND_UP(datalen
,
1415 /* Don't checksum the payload, skb will get segmented */
1421 csum
= udplite_csum(skb
);
1422 else if (udp_get_no_check6_tx(sk
)) { /* UDP csum disabled */
1423 skb
->ip_summed
= CHECKSUM_NONE
;
1425 } else if (skb
->ip_summed
== CHECKSUM_PARTIAL
) { /* UDP hardware csum */
1427 udp6_hwcsum_outgoing(sk
, skb
, &fl6
->saddr
, &fl6
->daddr
, len
);
1430 csum
= udp_csum(skb
);
1432 /* add protocol-dependent pseudo-header */
1433 uh
->check
= csum_ipv6_magic(&fl6
->saddr
, &fl6
->daddr
,
1434 len
, fl6
->flowi6_proto
, csum
);
1436 uh
->check
= CSUM_MANGLED_0
;
1439 err
= ip6_send_skb(skb
);
1441 if (err
== -ENOBUFS
&& !inet6_test_bit(RECVERR6
, sk
)) {
1442 UDP6_INC_STATS(sock_net(sk
),
1443 UDP_MIB_SNDBUFERRORS
, is_udplite
);
1447 UDP6_INC_STATS(sock_net(sk
),
1448 UDP_MIB_OUTDATAGRAMS
, is_udplite
);
1453 static int udp_v6_push_pending_frames(struct sock
*sk
)
1455 struct sk_buff
*skb
;
1456 struct udp_sock
*up
= udp_sk(sk
);
1459 if (up
->pending
== AF_INET
)
1460 return udp_push_pending_frames(sk
);
1462 skb
= ip6_finish_skb(sk
);
1466 err
= udp_v6_send_skb(skb
, &inet_sk(sk
)->cork
.fl
.u
.ip6
,
1467 &inet_sk(sk
)->cork
.base
);
1470 WRITE_ONCE(up
->pending
, 0);
1474 int udpv6_sendmsg(struct sock
*sk
, struct msghdr
*msg
, size_t len
)
1476 struct ipv6_txoptions opt_space
;
1477 struct udp_sock
*up
= udp_sk(sk
);
1478 struct inet_sock
*inet
= inet_sk(sk
);
1479 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1480 DECLARE_SOCKADDR(struct sockaddr_in6
*, sin6
, msg
->msg_name
);
1481 struct in6_addr
*daddr
, *final_p
, final
;
1482 struct ipv6_txoptions
*opt
= NULL
;
1483 struct ipv6_txoptions
*opt_to_free
= NULL
;
1484 struct ip6_flowlabel
*flowlabel
= NULL
;
1485 struct inet_cork_full cork
;
1486 struct flowi6
*fl6
= &cork
.fl
.u
.ip6
;
1487 struct dst_entry
*dst
;
1488 struct ipcm6_cookie ipc6
;
1489 int addr_len
= msg
->msg_namelen
;
1490 bool connected
= false;
1492 int corkreq
= udp_test_bit(CORK
, sk
) || msg
->msg_flags
& MSG_MORE
;
1494 int is_udplite
= IS_UDPLITE(sk
);
1495 int (*getfrag
)(void *, char *, int, int, int, struct sk_buff
*);
1498 ipc6
.gso_size
= READ_ONCE(up
->gso_size
);
1499 ipc6
.sockc
.tsflags
= READ_ONCE(sk
->sk_tsflags
);
1500 ipc6
.sockc
.mark
= READ_ONCE(sk
->sk_mark
);
1501 ipc6
.sockc
.priority
= READ_ONCE(sk
->sk_priority
);
1503 /* destination address check */
1505 if (addr_len
< offsetof(struct sockaddr
, sa_data
))
1508 switch (sin6
->sin6_family
) {
1510 if (addr_len
< SIN6_LEN_RFC2133
)
1512 daddr
= &sin6
->sin6_addr
;
1513 if (ipv6_addr_any(daddr
) &&
1514 ipv6_addr_v4mapped(&np
->saddr
))
1515 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK
),
1519 goto do_udp_sendmsg
;
1521 msg
->msg_name
= sin6
= NULL
;
1522 msg
->msg_namelen
= addr_len
= 0;
1528 } else if (!READ_ONCE(up
->pending
)) {
1529 if (sk
->sk_state
!= TCP_ESTABLISHED
)
1530 return -EDESTADDRREQ
;
1531 daddr
= &sk
->sk_v6_daddr
;
1536 if (ipv6_addr_v4mapped(daddr
)) {
1537 struct sockaddr_in sin
;
1538 sin
.sin_family
= AF_INET
;
1539 sin
.sin_port
= sin6
? sin6
->sin6_port
: inet
->inet_dport
;
1540 sin
.sin_addr
.s_addr
= daddr
->s6_addr32
[3];
1541 msg
->msg_name
= &sin
;
1542 msg
->msg_namelen
= sizeof(sin
);
1544 err
= ipv6_only_sock(sk
) ?
1545 -ENETUNREACH
: udp_sendmsg(sk
, msg
, len
);
1546 msg
->msg_name
= sin6
;
1547 msg
->msg_namelen
= addr_len
;
1552 /* Rough check on arithmetic overflow,
1553 better check is made in ip6_append_data().
1555 if (len
> INT_MAX
- sizeof(struct udphdr
))
1558 getfrag
= is_udplite
? udplite_getfrag
: ip_generic_getfrag
;
1559 if (READ_ONCE(up
->pending
)) {
1560 if (READ_ONCE(up
->pending
) == AF_INET
)
1561 return udp_sendmsg(sk
, msg
, len
);
1563 * There are pending frames.
1564 * The socket lock must be held while it's corked.
1567 if (likely(up
->pending
)) {
1568 if (unlikely(up
->pending
!= AF_INET6
)) {
1570 return -EAFNOSUPPORT
;
1573 goto do_append_data
;
1577 ulen
+= sizeof(struct udphdr
);
1579 memset(fl6
, 0, sizeof(*fl6
));
1582 if (sin6
->sin6_port
== 0)
1585 fl6
->fl6_dport
= sin6
->sin6_port
;
1586 daddr
= &sin6
->sin6_addr
;
1588 if (inet6_test_bit(SNDFLOW
, sk
)) {
1589 fl6
->flowlabel
= sin6
->sin6_flowinfo
&IPV6_FLOWINFO_MASK
;
1590 if (fl6
->flowlabel
& IPV6_FLOWLABEL_MASK
) {
1591 flowlabel
= fl6_sock_lookup(sk
, fl6
->flowlabel
);
1592 if (IS_ERR(flowlabel
))
1598 * Otherwise it will be difficult to maintain
1601 if (sk
->sk_state
== TCP_ESTABLISHED
&&
1602 ipv6_addr_equal(daddr
, &sk
->sk_v6_daddr
))
1603 daddr
= &sk
->sk_v6_daddr
;
1605 if (addr_len
>= sizeof(struct sockaddr_in6
) &&
1606 sin6
->sin6_scope_id
&&
1607 __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr
)))
1608 fl6
->flowi6_oif
= sin6
->sin6_scope_id
;
1610 if (sk
->sk_state
!= TCP_ESTABLISHED
)
1611 return -EDESTADDRREQ
;
1613 fl6
->fl6_dport
= inet
->inet_dport
;
1614 daddr
= &sk
->sk_v6_daddr
;
1615 fl6
->flowlabel
= np
->flow_label
;
1619 if (!fl6
->flowi6_oif
)
1620 fl6
->flowi6_oif
= READ_ONCE(sk
->sk_bound_dev_if
);
1622 if (!fl6
->flowi6_oif
)
1623 fl6
->flowi6_oif
= np
->sticky_pktinfo
.ipi6_ifindex
;
1625 fl6
->flowi6_uid
= sk
->sk_uid
;
1627 if (msg
->msg_controllen
) {
1629 memset(opt
, 0, sizeof(struct ipv6_txoptions
));
1630 opt
->tot_len
= sizeof(*opt
);
1633 err
= udp_cmsg_send(sk
, msg
, &ipc6
.gso_size
);
1635 err
= ip6_datagram_send_ctl(sock_net(sk
), sk
, msg
, fl6
,
1640 fl6_sock_release(flowlabel
);
1643 if ((fl6
->flowlabel
&IPV6_FLOWLABEL_MASK
) && !flowlabel
) {
1644 flowlabel
= fl6_sock_lookup(sk
, fl6
->flowlabel
);
1645 if (IS_ERR(flowlabel
))
1648 if (!(opt
->opt_nflen
|opt
->opt_flen
))
1652 opt
= txopt_get(np
);
1656 opt
= fl6_merge_options(&opt_space
, flowlabel
, opt
);
1657 opt
= ipv6_fixup_options(&opt_space
, opt
);
1660 fl6
->flowi6_proto
= sk
->sk_protocol
;
1661 fl6
->flowi6_mark
= ipc6
.sockc
.mark
;
1662 fl6
->daddr
= *daddr
;
1663 if (ipv6_addr_any(&fl6
->saddr
) && !ipv6_addr_any(&np
->saddr
))
1664 fl6
->saddr
= np
->saddr
;
1665 fl6
->fl6_sport
= inet
->inet_sport
;
1667 if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG
) && !connected
) {
1668 err
= BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk
,
1669 (struct sockaddr
*)sin6
,
1675 if (ipv6_addr_v4mapped(&sin6
->sin6_addr
)) {
1676 /* BPF program rewrote IPv6-only by IPv4-mapped
1677 * IPv6. It's currently unsupported.
1682 if (sin6
->sin6_port
== 0) {
1683 /* BPF program set invalid port. Reject it. */
1687 fl6
->fl6_dport
= sin6
->sin6_port
;
1688 fl6
->daddr
= sin6
->sin6_addr
;
1692 if (ipv6_addr_any(&fl6
->daddr
))
1693 fl6
->daddr
.s6_addr
[15] = 0x1; /* :: means loopback (BSD'ism) */
1695 final_p
= fl6_update_dst(fl6
, opt
, &final
);
1699 if (!fl6
->flowi6_oif
&& ipv6_addr_is_multicast(&fl6
->daddr
)) {
1700 fl6
->flowi6_oif
= READ_ONCE(np
->mcast_oif
);
1702 } else if (!fl6
->flowi6_oif
)
1703 fl6
->flowi6_oif
= READ_ONCE(np
->ucast_oif
);
1705 security_sk_classify_flow(sk
, flowi6_to_flowi_common(fl6
));
1707 if (ipc6
.tclass
< 0)
1708 ipc6
.tclass
= np
->tclass
;
1710 fl6
->flowlabel
= ip6_make_flowinfo(ipc6
.tclass
, fl6
->flowlabel
);
1712 dst
= ip6_sk_dst_lookup_flow(sk
, fl6
, final_p
, connected
);
1719 if (ipc6
.hlimit
< 0)
1720 ipc6
.hlimit
= ip6_sk_dst_hoplimit(np
, fl6
, dst
);
1722 if (msg
->msg_flags
&MSG_CONFIRM
)
1726 /* Lockless fast path for the non-corking case */
1728 struct sk_buff
*skb
;
1730 skb
= ip6_make_skb(sk
, getfrag
, msg
, ulen
,
1731 sizeof(struct udphdr
), &ipc6
,
1733 msg
->msg_flags
, &cork
);
1735 if (!IS_ERR_OR_NULL(skb
))
1736 err
= udp_v6_send_skb(skb
, fl6
, &cork
.base
);
1737 /* ip6_make_skb steals dst reference */
1742 if (unlikely(up
->pending
)) {
1743 /* The socket is already corked while preparing it. */
1744 /* ... which is an evident application bug. --ANK */
1747 net_dbg_ratelimited("udp cork app bug 2\n");
1752 WRITE_ONCE(up
->pending
, AF_INET6
);
1755 if (ipc6
.dontfrag
< 0)
1756 ipc6
.dontfrag
= inet6_test_bit(DONTFRAG
, sk
);
1758 err
= ip6_append_data(sk
, getfrag
, msg
, ulen
, sizeof(struct udphdr
),
1759 &ipc6
, fl6
, dst_rt6_info(dst
),
1760 corkreq
? msg
->msg_flags
|MSG_MORE
: msg
->msg_flags
);
1762 udp_v6_flush_pending_frames(sk
);
1764 err
= udp_v6_push_pending_frames(sk
);
1765 else if (unlikely(skb_queue_empty(&sk
->sk_write_queue
)))
1766 WRITE_ONCE(up
->pending
, 0);
1769 err
= inet6_test_bit(RECVERR6
, sk
) ? net_xmit_errno(err
) : 0;
1775 fl6_sock_release(flowlabel
);
1776 txopt_put(opt_to_free
);
1780 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
1781 * ENOBUFS might not be good (it's not tunable per se), but otherwise
1782 * we don't have a good statistic (IpOutDiscards but it can be too many
1783 * things). We could add another new stat but at least for now that
1784 * seems like overkill.
1786 if (err
== -ENOBUFS
|| test_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
)) {
1787 UDP6_INC_STATS(sock_net(sk
),
1788 UDP_MIB_SNDBUFERRORS
, is_udplite
);
1793 if (msg
->msg_flags
& MSG_PROBE
)
1794 dst_confirm_neigh(dst
, &fl6
->daddr
);
1795 if (!(msg
->msg_flags
&MSG_PROBE
) || len
)
1796 goto back_from_confirm
;
1800 EXPORT_SYMBOL(udpv6_sendmsg
);
1802 static void udpv6_splice_eof(struct socket
*sock
)
1804 struct sock
*sk
= sock
->sk
;
1805 struct udp_sock
*up
= udp_sk(sk
);
1807 if (!READ_ONCE(up
->pending
) || udp_test_bit(CORK
, sk
))
1811 if (up
->pending
&& !udp_test_bit(CORK
, sk
))
1812 udp_v6_push_pending_frames(sk
);
1816 void udpv6_destroy_sock(struct sock
*sk
)
1818 struct udp_sock
*up
= udp_sk(sk
);
1821 /* protects from races with udp_abort() */
1822 sock_set_flag(sk
, SOCK_DEAD
);
1823 udp_v6_flush_pending_frames(sk
);
1826 if (static_branch_unlikely(&udpv6_encap_needed_key
)) {
1827 if (up
->encap_type
) {
1828 void (*encap_destroy
)(struct sock
*sk
);
1829 encap_destroy
= READ_ONCE(up
->encap_destroy
);
1833 if (udp_test_bit(ENCAP_ENABLED
, sk
)) {
1834 static_branch_dec(&udpv6_encap_needed_key
);
1835 udp_encap_disable();
1841 * Socket option code for UDP
1843 int udpv6_setsockopt(struct sock
*sk
, int level
, int optname
, sockptr_t optval
,
1844 unsigned int optlen
)
1846 if (level
== SOL_UDP
|| level
== SOL_UDPLITE
|| level
== SOL_SOCKET
)
1847 return udp_lib_setsockopt(sk
, level
, optname
,
1849 udp_v6_push_pending_frames
);
1850 return ipv6_setsockopt(sk
, level
, optname
, optval
, optlen
);
1853 int udpv6_getsockopt(struct sock
*sk
, int level
, int optname
,
1854 char __user
*optval
, int __user
*optlen
)
1856 if (level
== SOL_UDP
|| level
== SOL_UDPLITE
)
1857 return udp_lib_getsockopt(sk
, level
, optname
, optval
, optlen
);
1858 return ipv6_getsockopt(sk
, level
, optname
, optval
, optlen
);
1862 /* ------------------------------------------------------------------------ */
1863 #ifdef CONFIG_PROC_FS
1864 int udp6_seq_show(struct seq_file
*seq
, void *v
)
1866 if (v
== SEQ_START_TOKEN
) {
1867 seq_puts(seq
, IPV6_SEQ_DGRAM_HEADER
);
1869 int bucket
= ((struct udp_iter_state
*)seq
->private)->bucket
;
1870 const struct inet_sock
*inet
= inet_sk((const struct sock
*)v
);
1871 __u16 srcp
= ntohs(inet
->inet_sport
);
1872 __u16 destp
= ntohs(inet
->inet_dport
);
1873 __ip6_dgram_sock_seq_show(seq
, v
, srcp
, destp
,
1874 udp_rqueue_get(v
), bucket
);
1879 const struct seq_operations udp6_seq_ops
= {
1880 .start
= udp_seq_start
,
1881 .next
= udp_seq_next
,
1882 .stop
= udp_seq_stop
,
1883 .show
= udp6_seq_show
,
1885 EXPORT_SYMBOL(udp6_seq_ops
);
1887 static struct udp_seq_afinfo udp6_seq_afinfo
= {
1892 int __net_init
udp6_proc_init(struct net
*net
)
1894 if (!proc_create_net_data("udp6", 0444, net
->proc_net
, &udp6_seq_ops
,
1895 sizeof(struct udp_iter_state
), &udp6_seq_afinfo
))
1900 void udp6_proc_exit(struct net
*net
)
1902 remove_proc_entry("udp6", net
->proc_net
);
1904 #endif /* CONFIG_PROC_FS */
1906 /* ------------------------------------------------------------------------ */
1908 struct proto udpv6_prot
= {
1910 .owner
= THIS_MODULE
,
1911 .close
= udp_lib_close
,
1912 .pre_connect
= udpv6_pre_connect
,
1913 .connect
= udpv6_connect
,
1914 .disconnect
= udp_disconnect
,
1916 .init
= udpv6_init_sock
,
1917 .destroy
= udpv6_destroy_sock
,
1918 .setsockopt
= udpv6_setsockopt
,
1919 .getsockopt
= udpv6_getsockopt
,
1920 .sendmsg
= udpv6_sendmsg
,
1921 .recvmsg
= udpv6_recvmsg
,
1922 .splice_eof
= udpv6_splice_eof
,
1923 .release_cb
= ip6_datagram_release_cb
,
1924 .hash
= udp_lib_hash
,
1925 .unhash
= udp_lib_unhash
,
1926 .rehash
= udp_v6_rehash
,
1927 .get_port
= udp_v6_get_port
,
1928 .put_port
= udp_lib_unhash
,
1929 #ifdef CONFIG_BPF_SYSCALL
1930 .psock_update_sk_prot
= udp_bpf_update_proto
,
1933 .memory_allocated
= &udp_memory_allocated
,
1934 .per_cpu_fw_alloc
= &udp_memory_per_cpu_fw_alloc
,
1936 .sysctl_mem
= sysctl_udp_mem
,
1937 .sysctl_wmem_offset
= offsetof(struct net
, ipv4
.sysctl_udp_wmem_min
),
1938 .sysctl_rmem_offset
= offsetof(struct net
, ipv4
.sysctl_udp_rmem_min
),
1939 .obj_size
= sizeof(struct udp6_sock
),
1940 .ipv6_pinfo_offset
= offsetof(struct udp6_sock
, inet6
),
1941 .h
.udp_table
= NULL
,
1942 .diag_destroy
= udp_abort
,
1945 static struct inet_protosw udpv6_protosw
= {
1947 .protocol
= IPPROTO_UDP
,
1948 .prot
= &udpv6_prot
,
1949 .ops
= &inet6_dgram_ops
,
1950 .flags
= INET_PROTOSW_PERMANENT
,
1953 int __init
udpv6_init(void)
1957 net_hotdata
.udpv6_protocol
= (struct inet6_protocol
) {
1958 .handler
= udpv6_rcv
,
1959 .err_handler
= udpv6_err
,
1960 .flags
= INET6_PROTO_NOPOLICY
| INET6_PROTO_FINAL
,
1962 ret
= inet6_add_protocol(&net_hotdata
.udpv6_protocol
, IPPROTO_UDP
);
1966 ret
= inet6_register_protosw(&udpv6_protosw
);
1968 goto out_udpv6_protocol
;
1973 inet6_del_protocol(&net_hotdata
.udpv6_protocol
, IPPROTO_UDP
);
1977 void udpv6_exit(void)
1979 inet6_unregister_protosw(&udpv6_protosw
);
1980 inet6_del_protocol(&net_hotdata
.udpv6_protocol
, IPPROTO_UDP
);