1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Linux INET6 implementation
7 * Pedro Roque <roque@di.fc.ul.pt>
9 * Based on linux/ipv4/udp.c
12 * Hideaki YOSHIFUJI : sin6_scope_id support
13 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
14 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
15 * a single port at the same time.
16 * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data
17 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file.
20 #include <linux/bpf-cgroup.h>
21 #include <linux/errno.h>
22 #include <linux/types.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/net.h>
26 #include <linux/in6.h>
27 #include <linux/netdevice.h>
28 #include <linux/if_arp.h>
29 #include <linux/ipv6.h>
30 #include <linux/icmpv6.h>
31 #include <linux/init.h>
32 #include <linux/module.h>
33 #include <linux/skbuff.h>
34 #include <linux/slab.h>
35 #include <linux/uaccess.h>
36 #include <linux/indirect_call_wrapper.h>
37 #include <trace/events/udp.h>
39 #include <net/addrconf.h>
40 #include <net/ndisc.h>
41 #include <net/protocol.h>
42 #include <net/transp_v6.h>
43 #include <net/ip6_route.h>
46 #include <net/tcp_states.h>
47 #include <net/ip6_checksum.h>
48 #include <net/ip6_tunnel.h>
50 #include <net/inet_hashtables.h>
51 #include <net/inet6_hashtables.h>
52 #include <net/busy_poll.h>
53 #include <net/sock_reuseport.h>
56 #include <linux/proc_fs.h>
57 #include <linux/seq_file.h>
58 #include <trace/events/skb.h>
61 static void udpv6_destruct_sock(struct sock
*sk
)
63 udp_destruct_common(sk
);
64 inet6_sock_destruct(sk
);
67 int udpv6_init_sock(struct sock
*sk
)
69 udp_lib_init_sock(sk
);
70 sk
->sk_destruct
= udpv6_destruct_sock
;
71 set_bit(SOCK_SUPPORT_ZC
, &sk
->sk_socket
->flags
);
75 INDIRECT_CALLABLE_SCOPE
76 u32
udp6_ehashfn(const struct net
*net
,
77 const struct in6_addr
*laddr
,
79 const struct in6_addr
*faddr
,
84 net_get_random_once(&udp6_ehash_secret
,
85 sizeof(udp6_ehash_secret
));
86 net_get_random_once(&udp_ipv6_hash_secret
,
87 sizeof(udp_ipv6_hash_secret
));
89 lhash
= (__force u32
)laddr
->s6_addr32
[3];
90 fhash
= __ipv6_addr_jhash(faddr
, udp_ipv6_hash_secret
);
92 return __inet6_ehashfn(lhash
, lport
, fhash
, fport
,
93 udp6_ehash_secret
+ net_hash_mix(net
));
96 int udp_v6_get_port(struct sock
*sk
, unsigned short snum
)
98 unsigned int hash2_nulladdr
=
99 ipv6_portaddr_hash(sock_net(sk
), &in6addr_any
, snum
);
100 unsigned int hash2_partial
=
101 ipv6_portaddr_hash(sock_net(sk
), &sk
->sk_v6_rcv_saddr
, 0);
103 /* precompute partial secondary hash */
104 udp_sk(sk
)->udp_portaddr_hash
= hash2_partial
;
105 return udp_lib_get_port(sk
, snum
, hash2_nulladdr
);
108 void udp_v6_rehash(struct sock
*sk
)
110 u16 new_hash
= ipv6_portaddr_hash(sock_net(sk
),
111 &sk
->sk_v6_rcv_saddr
,
112 inet_sk(sk
)->inet_num
);
115 if (ipv6_addr_v4mapped(&sk
->sk_v6_rcv_saddr
)) {
116 new_hash4
= udp_ehashfn(sock_net(sk
),
117 sk
->sk_rcv_saddr
, sk
->sk_num
,
118 sk
->sk_daddr
, sk
->sk_dport
);
120 new_hash4
= udp6_ehashfn(sock_net(sk
),
121 &sk
->sk_v6_rcv_saddr
, sk
->sk_num
,
122 &sk
->sk_v6_daddr
, sk
->sk_dport
);
125 udp_lib_rehash(sk
, new_hash
, new_hash4
);
128 static int compute_score(struct sock
*sk
, const struct net
*net
,
129 const struct in6_addr
*saddr
, __be16 sport
,
130 const struct in6_addr
*daddr
, unsigned short hnum
,
133 int bound_dev_if
, score
;
134 struct inet_sock
*inet
;
137 if (!net_eq(sock_net(sk
), net
) ||
138 udp_sk(sk
)->udp_port_hash
!= hnum
||
139 sk
->sk_family
!= PF_INET6
)
142 if (!ipv6_addr_equal(&sk
->sk_v6_rcv_saddr
, daddr
))
148 if (inet
->inet_dport
) {
149 if (inet
->inet_dport
!= sport
)
154 if (!ipv6_addr_any(&sk
->sk_v6_daddr
)) {
155 if (!ipv6_addr_equal(&sk
->sk_v6_daddr
, saddr
))
160 bound_dev_if
= READ_ONCE(sk
->sk_bound_dev_if
);
161 dev_match
= udp_sk_bound_dev_eq(net
, bound_dev_if
, dif
, sdif
);
167 if (READ_ONCE(sk
->sk_incoming_cpu
) == raw_smp_processor_id())
173 /* called with rcu_read_lock() */
174 static struct sock
*udp6_lib_lookup2(const struct net
*net
,
175 const struct in6_addr
*saddr
, __be16 sport
,
176 const struct in6_addr
*daddr
, unsigned int hnum
,
177 int dif
, int sdif
, struct udp_hslot
*hslot2
,
180 struct sock
*sk
, *result
;
186 udp_portaddr_for_each_entry_rcu(sk
, &hslot2
->head
) {
187 need_rescore
= false;
189 score
= compute_score(need_rescore
? result
: sk
, net
, saddr
,
190 sport
, daddr
, hnum
, dif
, sdif
);
191 if (score
> badness
) {
197 if (sk
->sk_state
== TCP_ESTABLISHED
) {
202 result
= inet6_lookup_reuseport(net
, sk
, skb
, sizeof(struct udphdr
),
203 saddr
, sport
, daddr
, hnum
, udp6_ehashfn
);
209 /* Fall back to scoring if group has connections */
210 if (!reuseport_has_conns(sk
))
213 /* Reuseport logic returned an error, keep original score. */
217 /* compute_score is too long of a function to be
218 * inlined, and calling it again here yields
219 * measureable overhead for some
220 * workloads. Work around it by jumping
221 * backwards to rescore 'result'.
230 #if IS_ENABLED(CONFIG_BASE_SMALL)
231 static struct sock
*udp6_lib_lookup4(const struct net
*net
,
232 const struct in6_addr
*saddr
, __be16 sport
,
233 const struct in6_addr
*daddr
,
234 unsigned int hnum
, int dif
, int sdif
,
235 struct udp_table
*udptable
)
240 static void udp6_hash4(struct sock
*sk
)
243 #else /* !CONFIG_BASE_SMALL */
244 static struct sock
*udp6_lib_lookup4(const struct net
*net
,
245 const struct in6_addr
*saddr
, __be16 sport
,
246 const struct in6_addr
*daddr
,
247 unsigned int hnum
, int dif
, int sdif
,
248 struct udp_table
*udptable
)
250 const __portpair ports
= INET_COMBINED_PORTS(sport
, hnum
);
251 const struct hlist_nulls_node
*node
;
252 struct udp_hslot
*hslot4
;
253 unsigned int hash4
, slot
;
257 hash4
= udp6_ehashfn(net
, daddr
, hnum
, saddr
, sport
);
258 slot
= hash4
& udptable
->mask
;
259 hslot4
= &udptable
->hash4
[slot
];
262 udp_lrpa_for_each_entry_rcu(up
, node
, &hslot4
->nulls_head
) {
263 sk
= (struct sock
*)up
;
264 if (inet6_match(net
, sk
, saddr
, daddr
, ports
, dif
, sdif
))
268 /* if the nulls value we got at the end of this lookup is not the
269 * expected one, we must restart lookup. We probably met an item that
270 * was moved to another chain due to rehash.
272 if (get_nulls_value(node
) != slot
)
278 static void udp6_hash4(struct sock
*sk
)
280 struct net
*net
= sock_net(sk
);
283 if (ipv6_addr_v4mapped(&sk
->sk_v6_rcv_saddr
)) {
288 if (sk_unhashed(sk
) || ipv6_addr_any(&sk
->sk_v6_rcv_saddr
))
291 hash
= udp6_ehashfn(net
, &sk
->sk_v6_rcv_saddr
, sk
->sk_num
,
292 &sk
->sk_v6_daddr
, sk
->sk_dport
);
294 udp_lib_hash4(sk
, hash
);
296 #endif /* CONFIG_BASE_SMALL */
298 /* rcu_read_lock() must be held */
299 struct sock
*__udp6_lib_lookup(const struct net
*net
,
300 const struct in6_addr
*saddr
, __be16 sport
,
301 const struct in6_addr
*daddr
, __be16 dport
,
302 int dif
, int sdif
, struct udp_table
*udptable
,
305 unsigned short hnum
= ntohs(dport
);
306 struct udp_hslot
*hslot2
;
307 struct sock
*result
, *sk
;
310 hash2
= ipv6_portaddr_hash(net
, daddr
, hnum
);
311 hslot2
= udp_hashslot2(udptable
, hash2
);
313 if (udp_has_hash4(hslot2
)) {
314 result
= udp6_lib_lookup4(net
, saddr
, sport
, daddr
, hnum
,
315 dif
, sdif
, udptable
);
316 if (result
) /* udp6_lib_lookup4 return sk or NULL */
320 /* Lookup connected or non-wildcard sockets */
321 result
= udp6_lib_lookup2(net
, saddr
, sport
,
322 daddr
, hnum
, dif
, sdif
,
324 if (!IS_ERR_OR_NULL(result
) && result
->sk_state
== TCP_ESTABLISHED
)
327 /* Lookup redirect from BPF */
328 if (static_branch_unlikely(&bpf_sk_lookup_enabled
) &&
329 udptable
== net
->ipv4
.udp_table
) {
330 sk
= inet6_lookup_run_sk_lookup(net
, IPPROTO_UDP
, skb
, sizeof(struct udphdr
),
331 saddr
, sport
, daddr
, hnum
, dif
,
339 /* Got non-wildcard socket or error on first lookup */
343 /* Lookup wildcard sockets */
344 hash2
= ipv6_portaddr_hash(net
, &in6addr_any
, hnum
);
345 hslot2
= udp_hashslot2(udptable
, hash2
);
347 result
= udp6_lib_lookup2(net
, saddr
, sport
,
348 &in6addr_any
, hnum
, dif
, sdif
,
355 EXPORT_SYMBOL_GPL(__udp6_lib_lookup
);
357 static struct sock
*__udp6_lib_lookup_skb(struct sk_buff
*skb
,
358 __be16 sport
, __be16 dport
,
359 struct udp_table
*udptable
)
361 const struct ipv6hdr
*iph
= ipv6_hdr(skb
);
363 return __udp6_lib_lookup(dev_net(skb
->dev
), &iph
->saddr
, sport
,
364 &iph
->daddr
, dport
, inet6_iif(skb
),
365 inet6_sdif(skb
), udptable
, skb
);
368 struct sock
*udp6_lib_lookup_skb(const struct sk_buff
*skb
,
369 __be16 sport
, __be16 dport
)
371 const u16 offset
= NAPI_GRO_CB(skb
)->network_offsets
[skb
->encapsulation
];
372 const struct ipv6hdr
*iph
= (struct ipv6hdr
*)(skb
->data
+ offset
);
373 struct net
*net
= dev_net(skb
->dev
);
376 inet6_get_iif_sdif(skb
, &iif
, &sdif
);
378 return __udp6_lib_lookup(net
, &iph
->saddr
, sport
,
379 &iph
->daddr
, dport
, iif
,
380 sdif
, net
->ipv4
.udp_table
, NULL
);
383 /* Must be called under rcu_read_lock().
384 * Does increment socket refcount.
386 #if IS_ENABLED(CONFIG_NF_TPROXY_IPV6) || IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
387 struct sock
*udp6_lib_lookup(const struct net
*net
, const struct in6_addr
*saddr
, __be16 sport
,
388 const struct in6_addr
*daddr
, __be16 dport
, int dif
)
392 sk
= __udp6_lib_lookup(net
, saddr
, sport
, daddr
, dport
,
393 dif
, 0, net
->ipv4
.udp_table
, NULL
);
394 if (sk
&& !refcount_inc_not_zero(&sk
->sk_refcnt
))
398 EXPORT_SYMBOL_GPL(udp6_lib_lookup
);
401 /* do not use the scratch area len for jumbogram: their length execeeds the
402 * scratch area space; note that the IP6CB flags is still in the first
403 * cacheline, so checking for jumbograms is cheap
405 static int udp6_skb_len(struct sk_buff
*skb
)
407 return unlikely(inet6_is_jumbogram(skb
)) ? skb
->len
: udp_skb_len(skb
);
411 * This should be easy, if there is something there we
412 * return it, otherwise we block.
415 int udpv6_recvmsg(struct sock
*sk
, struct msghdr
*msg
, size_t len
,
416 int flags
, int *addr_len
)
418 struct ipv6_pinfo
*np
= inet6_sk(sk
);
419 struct inet_sock
*inet
= inet_sk(sk
);
421 unsigned int ulen
, copied
;
422 int off
, err
, peeking
= flags
& MSG_PEEK
;
423 int is_udplite
= IS_UDPLITE(sk
);
424 struct udp_mib __percpu
*mib
;
425 bool checksum_valid
= false;
428 if (flags
& MSG_ERRQUEUE
)
429 return ipv6_recv_error(sk
, msg
, len
, addr_len
);
431 if (np
->rxpmtu
&& np
->rxopt
.bits
.rxpmtu
)
432 return ipv6_recv_rxpmtu(sk
, msg
, len
, addr_len
);
435 off
= sk_peek_offset(sk
, flags
);
436 skb
= __skb_recv_udp(sk
, flags
, &off
, &err
);
440 ulen
= udp6_skb_len(skb
);
442 if (copied
> ulen
- off
)
444 else if (copied
< ulen
)
445 msg
->msg_flags
|= MSG_TRUNC
;
447 is_udp4
= (skb
->protocol
== htons(ETH_P_IP
));
448 mib
= __UDPX_MIB(sk
, is_udp4
);
451 * If checksum is needed at all, try to do it while copying the
452 * data. If the data is truncated, or if we only want a partial
453 * coverage checksum (UDP-Lite), do it before the copy.
456 if (copied
< ulen
|| peeking
||
457 (is_udplite
&& UDP_SKB_CB(skb
)->partial_cov
)) {
458 checksum_valid
= udp_skb_csum_unnecessary(skb
) ||
459 !__udp_lib_checksum_complete(skb
);
464 if (checksum_valid
|| udp_skb_csum_unnecessary(skb
)) {
465 if (udp_skb_is_linear(skb
))
466 err
= copy_linear_skb(skb
, copied
, off
, &msg
->msg_iter
);
468 err
= skb_copy_datagram_msg(skb
, off
, msg
, copied
);
470 err
= skb_copy_and_csum_datagram_msg(skb
, off
, msg
);
476 atomic_inc(&sk
->sk_drops
);
477 SNMP_INC_STATS(mib
, UDP_MIB_INERRORS
);
483 SNMP_INC_STATS(mib
, UDP_MIB_INDATAGRAMS
);
485 sock_recv_cmsgs(msg
, sk
, skb
);
487 /* Copy the address. */
489 DECLARE_SOCKADDR(struct sockaddr_in6
*, sin6
, msg
->msg_name
);
490 sin6
->sin6_family
= AF_INET6
;
491 sin6
->sin6_port
= udp_hdr(skb
)->source
;
492 sin6
->sin6_flowinfo
= 0;
495 ipv6_addr_set_v4mapped(ip_hdr(skb
)->saddr
,
497 sin6
->sin6_scope_id
= 0;
499 sin6
->sin6_addr
= ipv6_hdr(skb
)->saddr
;
500 sin6
->sin6_scope_id
=
501 ipv6_iface_scope_id(&sin6
->sin6_addr
,
504 *addr_len
= sizeof(*sin6
);
506 BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk
,
507 (struct sockaddr
*)sin6
,
511 if (udp_test_bit(GRO_ENABLED
, sk
))
512 udp_cmsg_recv(msg
, sk
, skb
);
515 ip6_datagram_recv_common_ctl(sk
, msg
, skb
);
518 if (inet_cmsg_flags(inet
))
519 ip_cmsg_recv_offset(msg
, sk
, skb
,
520 sizeof(struct udphdr
), off
);
523 ip6_datagram_recv_specific_ctl(sk
, msg
, skb
);
527 if (flags
& MSG_TRUNC
)
530 skb_consume_udp(sk
, skb
, peeking
? -err
: err
);
534 if (!__sk_queue_drop_skb(sk
, &udp_sk(sk
)->reader_queue
, skb
, flags
,
535 udp_skb_destructor
)) {
536 SNMP_INC_STATS(mib
, UDP_MIB_CSUMERRORS
);
537 SNMP_INC_STATS(mib
, UDP_MIB_INERRORS
);
541 /* starting over for a new packet, but check if we need to yield */
543 msg
->msg_flags
&= ~MSG_TRUNC
;
547 DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key
);
548 void udpv6_encap_enable(void)
550 static_branch_inc(&udpv6_encap_needed_key
);
552 EXPORT_SYMBOL(udpv6_encap_enable
);
554 /* Handler for tunnels with arbitrary destination ports: no socket lookup, go
555 * through error handlers in encapsulations looking for a match.
557 static int __udp6_lib_err_encap_no_sk(struct sk_buff
*skb
,
558 struct inet6_skb_parm
*opt
,
559 u8 type
, u8 code
, int offset
, __be32 info
)
563 for (i
= 0; i
< MAX_IPTUN_ENCAP_OPS
; i
++) {
564 int (*handler
)(struct sk_buff
*skb
, struct inet6_skb_parm
*opt
,
565 u8 type
, u8 code
, int offset
, __be32 info
);
566 const struct ip6_tnl_encap_ops
*encap
;
568 encap
= rcu_dereference(ip6tun_encaps
[i
]);
571 handler
= encap
->err_handler
;
572 if (handler
&& !handler(skb
, opt
, type
, code
, offset
, info
))
579 /* Try to match ICMP errors to UDP tunnels by looking up a socket without
580 * reversing source and destination port: this will match tunnels that force the
581 * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
582 * lwtunnels might actually break this assumption by being configured with
583 * different destination ports on endpoints, in this case we won't be able to
584 * trace ICMP messages back to them.
586 * If this doesn't match any socket, probe tunnels with arbitrary destination
587 * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
588 * we've sent packets to won't necessarily match the local destination port.
590 * Then ask the tunnel implementation to match the error against a valid
593 * Return an error if we can't find a match, the socket if we need further
594 * processing, zero otherwise.
596 static struct sock
*__udp6_lib_err_encap(struct net
*net
,
597 const struct ipv6hdr
*hdr
, int offset
,
599 struct udp_table
*udptable
,
602 struct inet6_skb_parm
*opt
,
603 u8 type
, u8 code
, __be32 info
)
605 int (*lookup
)(struct sock
*sk
, struct sk_buff
*skb
);
606 int network_offset
, transport_offset
;
609 network_offset
= skb_network_offset(skb
);
610 transport_offset
= skb_transport_offset(skb
);
612 /* Network header needs to point to the outer IPv6 header inside ICMP */
613 skb_reset_network_header(skb
);
615 /* Transport header needs to point to the UDP header */
616 skb_set_transport_header(skb
, offset
);
621 lookup
= READ_ONCE(up
->encap_err_lookup
);
622 if (lookup
&& lookup(sk
, skb
))
628 sk
= __udp6_lib_lookup(net
, &hdr
->daddr
, uh
->source
,
629 &hdr
->saddr
, uh
->dest
,
630 inet6_iif(skb
), 0, udptable
, skb
);
634 lookup
= READ_ONCE(up
->encap_err_lookup
);
635 if (!lookup
|| lookup(sk
, skb
))
641 sk
= ERR_PTR(__udp6_lib_err_encap_no_sk(skb
, opt
, type
, code
,
645 skb_set_transport_header(skb
, transport_offset
);
646 skb_set_network_header(skb
, network_offset
);
651 int __udp6_lib_err(struct sk_buff
*skb
, struct inet6_skb_parm
*opt
,
652 u8 type
, u8 code
, int offset
, __be32 info
,
653 struct udp_table
*udptable
)
655 struct ipv6_pinfo
*np
;
656 const struct ipv6hdr
*hdr
= (const struct ipv6hdr
*)skb
->data
;
657 const struct in6_addr
*saddr
= &hdr
->saddr
;
658 const struct in6_addr
*daddr
= seg6_get_daddr(skb
, opt
) ? : &hdr
->daddr
;
659 struct udphdr
*uh
= (struct udphdr
*)(skb
->data
+offset
);
664 struct net
*net
= dev_net(skb
->dev
);
666 sk
= __udp6_lib_lookup(net
, daddr
, uh
->dest
, saddr
, uh
->source
,
667 inet6_iif(skb
), inet6_sdif(skb
), udptable
, NULL
);
669 if (!sk
|| READ_ONCE(udp_sk(sk
)->encap_type
)) {
670 /* No socket for error: try tunnels before discarding */
671 if (static_branch_unlikely(&udpv6_encap_needed_key
)) {
672 sk
= __udp6_lib_err_encap(net
, hdr
, offset
, uh
,
674 opt
, type
, code
, info
);
678 sk
= ERR_PTR(-ENOENT
);
681 __ICMP6_INC_STATS(net
, __in6_dev_get(skb
->dev
),
689 harderr
= icmpv6_err_convert(type
, code
, &err
);
692 if (type
== ICMPV6_PKT_TOOBIG
) {
693 if (!ip6_sk_accept_pmtu(sk
))
695 ip6_sk_update_pmtu(skb
, sk
, info
);
696 if (READ_ONCE(np
->pmtudisc
) != IPV6_PMTUDISC_DONT
)
699 if (type
== NDISC_REDIRECT
) {
701 ip6_redirect(skb
, sock_net(sk
), inet6_iif(skb
),
702 READ_ONCE(sk
->sk_mark
), sk
->sk_uid
);
704 ip6_sk_redirect(skb
, sk
);
709 /* Tunnels don't have an application socket: don't pass errors back */
711 if (udp_sk(sk
)->encap_err_rcv
)
712 udp_sk(sk
)->encap_err_rcv(sk
, skb
, err
, uh
->dest
,
713 ntohl(info
), (u8
*)(uh
+1));
717 if (!inet6_test_bit(RECVERR6
, sk
)) {
718 if (!harderr
|| sk
->sk_state
!= TCP_ESTABLISHED
)
721 ipv6_icmp_error(sk
, skb
, err
, uh
->dest
, ntohl(info
), (u8
*)(uh
+1));
730 static int __udpv6_queue_rcv_skb(struct sock
*sk
, struct sk_buff
*skb
)
734 if (!ipv6_addr_any(&sk
->sk_v6_daddr
)) {
735 sock_rps_save_rxhash(sk
, skb
);
736 sk_mark_napi_id(sk
, skb
);
737 sk_incoming_cpu_update(sk
);
739 sk_mark_napi_id_once(sk
, skb
);
742 rc
= __udp_enqueue_schedule_skb(sk
, skb
);
744 int is_udplite
= IS_UDPLITE(sk
);
745 enum skb_drop_reason drop_reason
;
747 /* Note that an ENOMEM error is charged twice */
749 UDP6_INC_STATS(sock_net(sk
),
750 UDP_MIB_RCVBUFERRORS
, is_udplite
);
751 drop_reason
= SKB_DROP_REASON_SOCKET_RCVBUFF
;
753 UDP6_INC_STATS(sock_net(sk
),
754 UDP_MIB_MEMERRORS
, is_udplite
);
755 drop_reason
= SKB_DROP_REASON_PROTO_MEM
;
757 UDP6_INC_STATS(sock_net(sk
), UDP_MIB_INERRORS
, is_udplite
);
758 trace_udp_fail_queue_rcv_skb(rc
, sk
, skb
);
759 sk_skb_reason_drop(sk
, skb
, drop_reason
);
766 static __inline__
int udpv6_err(struct sk_buff
*skb
,
767 struct inet6_skb_parm
*opt
, u8 type
,
768 u8 code
, int offset
, __be32 info
)
770 return __udp6_lib_err(skb
, opt
, type
, code
, offset
, info
,
771 dev_net(skb
->dev
)->ipv4
.udp_table
);
774 static int udpv6_queue_rcv_one_skb(struct sock
*sk
, struct sk_buff
*skb
)
776 enum skb_drop_reason drop_reason
= SKB_DROP_REASON_NOT_SPECIFIED
;
777 struct udp_sock
*up
= udp_sk(sk
);
778 int is_udplite
= IS_UDPLITE(sk
);
780 if (!xfrm6_policy_check(sk
, XFRM_POLICY_IN
, skb
)) {
781 drop_reason
= SKB_DROP_REASON_XFRM_POLICY
;
786 if (static_branch_unlikely(&udpv6_encap_needed_key
) &&
787 READ_ONCE(up
->encap_type
)) {
788 int (*encap_rcv
)(struct sock
*sk
, struct sk_buff
*skb
);
791 * This is an encapsulation socket so pass the skb to
792 * the socket's udp_encap_rcv() hook. Otherwise, just
793 * fall through and pass this up the UDP socket.
794 * up->encap_rcv() returns the following value:
795 * =0 if skb was successfully passed to the encap
796 * handler or was discarded by it.
797 * >0 if skb should be passed on to UDP.
798 * <0 if skb should be resubmitted as proto -N
801 /* if we're overly short, let UDP handle it */
802 encap_rcv
= READ_ONCE(up
->encap_rcv
);
806 /* Verify checksum before giving to encap */
807 if (udp_lib_checksum_complete(skb
))
810 ret
= encap_rcv(sk
, skb
);
812 __UDP6_INC_STATS(sock_net(sk
),
819 /* FALLTHROUGH -- it's a UDP Packet */
823 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
825 if (udp_test_bit(UDPLITE_RECV_CC
, sk
) && UDP_SKB_CB(skb
)->partial_cov
) {
826 u16 pcrlen
= READ_ONCE(up
->pcrlen
);
828 if (pcrlen
== 0) { /* full coverage was set */
829 net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
830 UDP_SKB_CB(skb
)->cscov
, skb
->len
);
833 if (UDP_SKB_CB(skb
)->cscov
< pcrlen
) {
834 net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n",
835 UDP_SKB_CB(skb
)->cscov
, pcrlen
);
840 prefetch(&sk
->sk_rmem_alloc
);
841 if (rcu_access_pointer(sk
->sk_filter
) &&
842 udp_lib_checksum_complete(skb
))
845 if (sk_filter_trim_cap(sk
, skb
, sizeof(struct udphdr
))) {
846 drop_reason
= SKB_DROP_REASON_SOCKET_FILTER
;
850 udp_csum_pull_header(skb
);
854 return __udpv6_queue_rcv_skb(sk
, skb
);
857 drop_reason
= SKB_DROP_REASON_UDP_CSUM
;
858 __UDP6_INC_STATS(sock_net(sk
), UDP_MIB_CSUMERRORS
, is_udplite
);
860 __UDP6_INC_STATS(sock_net(sk
), UDP_MIB_INERRORS
, is_udplite
);
861 atomic_inc(&sk
->sk_drops
);
862 sk_skb_reason_drop(sk
, skb
, drop_reason
);
866 static int udpv6_queue_rcv_skb(struct sock
*sk
, struct sk_buff
*skb
)
868 struct sk_buff
*next
, *segs
;
871 if (likely(!udp_unexpected_gso(sk
, skb
)))
872 return udpv6_queue_rcv_one_skb(sk
, skb
);
874 __skb_push(skb
, -skb_mac_offset(skb
));
875 segs
= udp_rcv_segment(sk
, skb
, false);
876 skb_list_walk_safe(segs
, skb
, next
) {
877 __skb_pull(skb
, skb_transport_offset(skb
));
879 udp_post_segment_fix_csum(skb
);
880 ret
= udpv6_queue_rcv_one_skb(sk
, skb
);
882 ip6_protocol_deliver_rcu(dev_net(skb
->dev
), skb
, ret
,
888 static bool __udp_v6_is_mcast_sock(struct net
*net
, const struct sock
*sk
,
889 __be16 loc_port
, const struct in6_addr
*loc_addr
,
890 __be16 rmt_port
, const struct in6_addr
*rmt_addr
,
891 int dif
, int sdif
, unsigned short hnum
)
893 const struct inet_sock
*inet
= inet_sk(sk
);
895 if (!net_eq(sock_net(sk
), net
))
898 if (udp_sk(sk
)->udp_port_hash
!= hnum
||
899 sk
->sk_family
!= PF_INET6
||
900 (inet
->inet_dport
&& inet
->inet_dport
!= rmt_port
) ||
901 (!ipv6_addr_any(&sk
->sk_v6_daddr
) &&
902 !ipv6_addr_equal(&sk
->sk_v6_daddr
, rmt_addr
)) ||
903 !udp_sk_bound_dev_eq(net
, READ_ONCE(sk
->sk_bound_dev_if
), dif
, sdif
) ||
904 (!ipv6_addr_any(&sk
->sk_v6_rcv_saddr
) &&
905 !ipv6_addr_equal(&sk
->sk_v6_rcv_saddr
, loc_addr
)))
907 if (!inet6_mc_check(sk
, loc_addr
, rmt_addr
))
912 static void udp6_csum_zero_error(struct sk_buff
*skb
)
914 /* RFC 2460 section 8.1 says that we SHOULD log
915 * this error. Well, it is reasonable.
917 net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
918 &ipv6_hdr(skb
)->saddr
, ntohs(udp_hdr(skb
)->source
),
919 &ipv6_hdr(skb
)->daddr
, ntohs(udp_hdr(skb
)->dest
));
923 * Note: called only from the BH handler context,
924 * so we don't need to lock the hashes.
926 static int __udp6_lib_mcast_deliver(struct net
*net
, struct sk_buff
*skb
,
927 const struct in6_addr
*saddr
, const struct in6_addr
*daddr
,
928 struct udp_table
*udptable
, int proto
)
930 struct sock
*sk
, *first
= NULL
;
931 const struct udphdr
*uh
= udp_hdr(skb
);
932 unsigned short hnum
= ntohs(uh
->dest
);
933 struct udp_hslot
*hslot
= udp_hashslot(udptable
, net
, hnum
);
934 unsigned int offset
= offsetof(typeof(*sk
), sk_node
);
935 unsigned int hash2
= 0, hash2_any
= 0, use_hash2
= (hslot
->count
> 10);
936 int dif
= inet6_iif(skb
);
937 int sdif
= inet6_sdif(skb
);
938 struct hlist_node
*node
;
939 struct sk_buff
*nskb
;
942 hash2_any
= ipv6_portaddr_hash(net
, &in6addr_any
, hnum
) &
944 hash2
= ipv6_portaddr_hash(net
, daddr
, hnum
) & udptable
->mask
;
946 hslot
= &udptable
->hash2
[hash2
].hslot
;
947 offset
= offsetof(typeof(*sk
), __sk_common
.skc_portaddr_node
);
950 sk_for_each_entry_offset_rcu(sk
, node
, &hslot
->head
, offset
) {
951 if (!__udp_v6_is_mcast_sock(net
, sk
, uh
->dest
, daddr
,
952 uh
->source
, saddr
, dif
, sdif
,
955 /* If zero checksum and no_check is not on for
956 * the socket then skip it.
958 if (!uh
->check
&& !udp_get_no_check6_rx(sk
))
964 nskb
= skb_clone(skb
, GFP_ATOMIC
);
965 if (unlikely(!nskb
)) {
966 atomic_inc(&sk
->sk_drops
);
967 __UDP6_INC_STATS(net
, UDP_MIB_RCVBUFERRORS
,
969 __UDP6_INC_STATS(net
, UDP_MIB_INERRORS
,
974 if (udpv6_queue_rcv_skb(sk
, nskb
) > 0)
978 /* Also lookup *:port if we are using hash2 and haven't done so yet. */
979 if (use_hash2
&& hash2
!= hash2_any
) {
985 if (udpv6_queue_rcv_skb(first
, skb
) > 0)
989 __UDP6_INC_STATS(net
, UDP_MIB_IGNOREDMULTI
,
990 proto
== IPPROTO_UDPLITE
);
995 static void udp6_sk_rx_dst_set(struct sock
*sk
, struct dst_entry
*dst
)
997 if (udp_sk_rx_dst_set(sk
, dst
))
998 sk
->sk_rx_dst_cookie
= rt6_get_cookie(dst_rt6_info(dst
));
1001 /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
1002 * return code conversion for ip layer consumption
1004 static int udp6_unicast_rcv_skb(struct sock
*sk
, struct sk_buff
*skb
,
1009 if (inet_get_convert_csum(sk
) && uh
->check
&& !IS_UDPLITE(sk
))
1010 skb_checksum_try_convert(skb
, IPPROTO_UDP
, ip6_compute_pseudo
);
1012 ret
= udpv6_queue_rcv_skb(sk
, skb
);
1014 /* a return value > 0 means to resubmit the input */
1020 int __udp6_lib_rcv(struct sk_buff
*skb
, struct udp_table
*udptable
,
1023 enum skb_drop_reason reason
= SKB_DROP_REASON_NOT_SPECIFIED
;
1024 const struct in6_addr
*saddr
, *daddr
;
1025 struct net
*net
= dev_net(skb
->dev
);
1026 struct sock
*sk
= NULL
;
1031 if (!pskb_may_pull(skb
, sizeof(struct udphdr
)))
1034 saddr
= &ipv6_hdr(skb
)->saddr
;
1035 daddr
= &ipv6_hdr(skb
)->daddr
;
1038 ulen
= ntohs(uh
->len
);
1039 if (ulen
> skb
->len
)
1042 if (proto
== IPPROTO_UDP
) {
1043 /* UDP validates ulen. */
1045 /* Check for jumbo payload */
1049 if (ulen
< sizeof(*uh
))
1052 if (ulen
< skb
->len
) {
1053 if (pskb_trim_rcsum(skb
, ulen
))
1055 saddr
= &ipv6_hdr(skb
)->saddr
;
1056 daddr
= &ipv6_hdr(skb
)->daddr
;
1061 if (udp6_csum_init(skb
, uh
, proto
))
1064 /* Check if the socket is already available, e.g. due to early demux */
1065 sk
= inet6_steal_sock(net
, skb
, sizeof(struct udphdr
), saddr
, uh
->source
, daddr
, uh
->dest
,
1066 &refcounted
, udp6_ehashfn
);
1071 struct dst_entry
*dst
= skb_dst(skb
);
1074 if (unlikely(rcu_dereference(sk
->sk_rx_dst
) != dst
))
1075 udp6_sk_rx_dst_set(sk
, dst
);
1077 if (!uh
->check
&& !udp_get_no_check6_rx(sk
)) {
1080 goto report_csum_error
;
1083 ret
= udp6_unicast_rcv_skb(sk
, skb
, uh
);
1090 * Multicast receive code
1092 if (ipv6_addr_is_multicast(daddr
))
1093 return __udp6_lib_mcast_deliver(net
, skb
,
1094 saddr
, daddr
, udptable
, proto
);
1097 sk
= __udp6_lib_lookup_skb(skb
, uh
->source
, uh
->dest
, udptable
);
1099 if (!uh
->check
&& !udp_get_no_check6_rx(sk
))
1100 goto report_csum_error
;
1101 return udp6_unicast_rcv_skb(sk
, skb
, uh
);
1104 reason
= SKB_DROP_REASON_NO_SOCKET
;
1107 goto report_csum_error
;
1109 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_IN
, skb
))
1113 if (udp_lib_checksum_complete(skb
))
1116 __UDP6_INC_STATS(net
, UDP_MIB_NOPORTS
, proto
== IPPROTO_UDPLITE
);
1117 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_PORT_UNREACH
, 0);
1119 sk_skb_reason_drop(sk
, skb
, reason
);
1123 if (reason
== SKB_DROP_REASON_NOT_SPECIFIED
)
1124 reason
= SKB_DROP_REASON_PKT_TOO_SMALL
;
1125 net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
1126 proto
== IPPROTO_UDPLITE
? "-Lite" : "",
1127 saddr
, ntohs(uh
->source
),
1129 daddr
, ntohs(uh
->dest
));
1133 udp6_csum_zero_error(skb
);
1135 if (reason
== SKB_DROP_REASON_NOT_SPECIFIED
)
1136 reason
= SKB_DROP_REASON_UDP_CSUM
;
1137 __UDP6_INC_STATS(net
, UDP_MIB_CSUMERRORS
, proto
== IPPROTO_UDPLITE
);
1139 __UDP6_INC_STATS(net
, UDP_MIB_INERRORS
, proto
== IPPROTO_UDPLITE
);
1140 sk_skb_reason_drop(sk
, skb
, reason
);
1145 static struct sock
*__udp6_lib_demux_lookup(struct net
*net
,
1146 __be16 loc_port
, const struct in6_addr
*loc_addr
,
1147 __be16 rmt_port
, const struct in6_addr
*rmt_addr
,
1150 struct udp_table
*udptable
= net
->ipv4
.udp_table
;
1151 unsigned short hnum
= ntohs(loc_port
);
1152 struct udp_hslot
*hslot2
;
1157 hash2
= ipv6_portaddr_hash(net
, loc_addr
, hnum
);
1158 hslot2
= udp_hashslot2(udptable
, hash2
);
1159 ports
= INET_COMBINED_PORTS(rmt_port
, hnum
);
1161 udp_portaddr_for_each_entry_rcu(sk
, &hslot2
->head
) {
1162 if (sk
->sk_state
== TCP_ESTABLISHED
&&
1163 inet6_match(net
, sk
, rmt_addr
, loc_addr
, ports
, dif
, sdif
))
1165 /* Only check first socket in chain */
1171 void udp_v6_early_demux(struct sk_buff
*skb
)
1173 struct net
*net
= dev_net(skb
->dev
);
1174 const struct udphdr
*uh
;
1176 struct dst_entry
*dst
;
1177 int dif
= skb
->dev
->ifindex
;
1178 int sdif
= inet6_sdif(skb
);
1180 if (!pskb_may_pull(skb
, skb_transport_offset(skb
) +
1181 sizeof(struct udphdr
)))
1186 if (skb
->pkt_type
== PACKET_HOST
)
1187 sk
= __udp6_lib_demux_lookup(net
, uh
->dest
,
1188 &ipv6_hdr(skb
)->daddr
,
1189 uh
->source
, &ipv6_hdr(skb
)->saddr
,
1198 DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk
));
1199 skb
->destructor
= sock_pfree
;
1200 dst
= rcu_dereference(sk
->sk_rx_dst
);
1203 dst
= dst_check(dst
, sk
->sk_rx_dst_cookie
);
1205 /* set noref for now.
1206 * any place which wants to hold dst has to call
1209 skb_dst_set_noref(skb
, dst
);
1213 INDIRECT_CALLABLE_SCOPE
int udpv6_rcv(struct sk_buff
*skb
)
1215 return __udp6_lib_rcv(skb
, dev_net(skb
->dev
)->ipv4
.udp_table
, IPPROTO_UDP
);
1219 * Throw away all pending data and cancel the corking. Socket is locked.
1221 static void udp_v6_flush_pending_frames(struct sock
*sk
)
1223 struct udp_sock
*up
= udp_sk(sk
);
1225 if (up
->pending
== AF_INET
)
1226 udp_flush_pending_frames(sk
);
1227 else if (up
->pending
) {
1229 WRITE_ONCE(up
->pending
, 0);
1230 ip6_flush_pending_frames(sk
);
1234 static int udpv6_pre_connect(struct sock
*sk
, struct sockaddr
*uaddr
,
1237 if (addr_len
< offsetofend(struct sockaddr
, sa_family
))
1239 /* The following checks are replicated from __ip6_datagram_connect()
1240 * and intended to prevent BPF program called below from accessing
1241 * bytes that are out of the bound specified by user in addr_len.
1243 if (uaddr
->sa_family
== AF_INET
) {
1244 if (ipv6_only_sock(sk
))
1245 return -EAFNOSUPPORT
;
1246 return udp_pre_connect(sk
, uaddr
, addr_len
);
1249 if (addr_len
< SIN6_LEN_RFC2133
)
1252 return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk
, uaddr
, &addr_len
);
1255 static int udpv6_connect(struct sock
*sk
, struct sockaddr
*uaddr
, int addr_len
)
1260 res
= __ip6_datagram_connect(sk
, uaddr
, addr_len
);
1268 * udp6_hwcsum_outgoing - handle outgoing HW checksumming
1269 * @sk: socket we are sending on
1270 * @skb: sk_buff containing the filled-in UDP header
1271 * (checksum field must be zeroed out)
1272 * @saddr: source address
1273 * @daddr: destination address
1274 * @len: length of packet
1276 static void udp6_hwcsum_outgoing(struct sock
*sk
, struct sk_buff
*skb
,
1277 const struct in6_addr
*saddr
,
1278 const struct in6_addr
*daddr
, int len
)
1280 unsigned int offset
;
1281 struct udphdr
*uh
= udp_hdr(skb
);
1282 struct sk_buff
*frags
= skb_shinfo(skb
)->frag_list
;
1286 /* Only one fragment on the socket. */
1287 skb
->csum_start
= skb_transport_header(skb
) - skb
->head
;
1288 skb
->csum_offset
= offsetof(struct udphdr
, check
);
1289 uh
->check
= ~csum_ipv6_magic(saddr
, daddr
, len
, IPPROTO_UDP
, 0);
1292 * HW-checksum won't work as there are two or more
1293 * fragments on the socket so that all csums of sk_buffs
1294 * should be together
1296 offset
= skb_transport_offset(skb
);
1297 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
1300 skb
->ip_summed
= CHECKSUM_NONE
;
1303 csum
= csum_add(csum
, frags
->csum
);
1304 } while ((frags
= frags
->next
));
1306 uh
->check
= csum_ipv6_magic(saddr
, daddr
, len
, IPPROTO_UDP
,
1309 uh
->check
= CSUM_MANGLED_0
;
1317 static int udp_v6_send_skb(struct sk_buff
*skb
, struct flowi6
*fl6
,
1318 struct inet_cork
*cork
)
1320 struct sock
*sk
= skb
->sk
;
1323 int is_udplite
= IS_UDPLITE(sk
);
1325 int offset
= skb_transport_offset(skb
);
1326 int len
= skb
->len
- offset
;
1327 int datalen
= len
- sizeof(*uh
);
1330 * Create a UDP header
1333 uh
->source
= fl6
->fl6_sport
;
1334 uh
->dest
= fl6
->fl6_dport
;
1335 uh
->len
= htons(len
);
1338 if (cork
->gso_size
) {
1339 const int hlen
= skb_network_header_len(skb
) +
1340 sizeof(struct udphdr
);
1342 if (hlen
+ cork
->gso_size
> cork
->fragsize
) {
1346 if (datalen
> cork
->gso_size
* UDP_MAX_SEGMENTS
) {
1350 if (udp_get_no_check6_tx(sk
)) {
1354 if (is_udplite
|| dst_xfrm(skb_dst(skb
))) {
1359 if (datalen
> cork
->gso_size
) {
1360 skb_shinfo(skb
)->gso_size
= cork
->gso_size
;
1361 skb_shinfo(skb
)->gso_type
= SKB_GSO_UDP_L4
;
1362 skb_shinfo(skb
)->gso_segs
= DIV_ROUND_UP(datalen
,
1365 /* Don't checksum the payload, skb will get segmented */
1371 csum
= udplite_csum(skb
);
1372 else if (udp_get_no_check6_tx(sk
)) { /* UDP csum disabled */
1373 skb
->ip_summed
= CHECKSUM_NONE
;
1375 } else if (skb
->ip_summed
== CHECKSUM_PARTIAL
) { /* UDP hardware csum */
1377 udp6_hwcsum_outgoing(sk
, skb
, &fl6
->saddr
, &fl6
->daddr
, len
);
1380 csum
= udp_csum(skb
);
1382 /* add protocol-dependent pseudo-header */
1383 uh
->check
= csum_ipv6_magic(&fl6
->saddr
, &fl6
->daddr
,
1384 len
, fl6
->flowi6_proto
, csum
);
1386 uh
->check
= CSUM_MANGLED_0
;
1389 err
= ip6_send_skb(skb
);
1391 if (err
== -ENOBUFS
&& !inet6_test_bit(RECVERR6
, sk
)) {
1392 UDP6_INC_STATS(sock_net(sk
),
1393 UDP_MIB_SNDBUFERRORS
, is_udplite
);
1397 UDP6_INC_STATS(sock_net(sk
),
1398 UDP_MIB_OUTDATAGRAMS
, is_udplite
);
1403 static int udp_v6_push_pending_frames(struct sock
*sk
)
1405 struct sk_buff
*skb
;
1406 struct udp_sock
*up
= udp_sk(sk
);
1409 if (up
->pending
== AF_INET
)
1410 return udp_push_pending_frames(sk
);
1412 skb
= ip6_finish_skb(sk
);
1416 err
= udp_v6_send_skb(skb
, &inet_sk(sk
)->cork
.fl
.u
.ip6
,
1417 &inet_sk(sk
)->cork
.base
);
1420 WRITE_ONCE(up
->pending
, 0);
1424 int udpv6_sendmsg(struct sock
*sk
, struct msghdr
*msg
, size_t len
)
1426 struct ipv6_txoptions opt_space
;
1427 struct udp_sock
*up
= udp_sk(sk
);
1428 struct inet_sock
*inet
= inet_sk(sk
);
1429 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1430 DECLARE_SOCKADDR(struct sockaddr_in6
*, sin6
, msg
->msg_name
);
1431 struct in6_addr
*daddr
, *final_p
, final
;
1432 struct ipv6_txoptions
*opt
= NULL
;
1433 struct ipv6_txoptions
*opt_to_free
= NULL
;
1434 struct ip6_flowlabel
*flowlabel
= NULL
;
1435 struct inet_cork_full cork
;
1436 struct flowi6
*fl6
= &cork
.fl
.u
.ip6
;
1437 struct dst_entry
*dst
;
1438 struct ipcm6_cookie ipc6
;
1439 int addr_len
= msg
->msg_namelen
;
1440 bool connected
= false;
1442 int corkreq
= udp_test_bit(CORK
, sk
) || msg
->msg_flags
& MSG_MORE
;
1444 int is_udplite
= IS_UDPLITE(sk
);
1445 int (*getfrag
)(void *, char *, int, int, int, struct sk_buff
*);
1448 ipc6
.gso_size
= READ_ONCE(up
->gso_size
);
1449 ipc6
.sockc
.tsflags
= READ_ONCE(sk
->sk_tsflags
);
1450 ipc6
.sockc
.mark
= READ_ONCE(sk
->sk_mark
);
1452 /* destination address check */
1454 if (addr_len
< offsetof(struct sockaddr
, sa_data
))
1457 switch (sin6
->sin6_family
) {
1459 if (addr_len
< SIN6_LEN_RFC2133
)
1461 daddr
= &sin6
->sin6_addr
;
1462 if (ipv6_addr_any(daddr
) &&
1463 ipv6_addr_v4mapped(&np
->saddr
))
1464 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK
),
1468 goto do_udp_sendmsg
;
1470 msg
->msg_name
= sin6
= NULL
;
1471 msg
->msg_namelen
= addr_len
= 0;
1477 } else if (!READ_ONCE(up
->pending
)) {
1478 if (sk
->sk_state
!= TCP_ESTABLISHED
)
1479 return -EDESTADDRREQ
;
1480 daddr
= &sk
->sk_v6_daddr
;
1485 if (ipv6_addr_v4mapped(daddr
)) {
1486 struct sockaddr_in sin
;
1487 sin
.sin_family
= AF_INET
;
1488 sin
.sin_port
= sin6
? sin6
->sin6_port
: inet
->inet_dport
;
1489 sin
.sin_addr
.s_addr
= daddr
->s6_addr32
[3];
1490 msg
->msg_name
= &sin
;
1491 msg
->msg_namelen
= sizeof(sin
);
1493 err
= ipv6_only_sock(sk
) ?
1494 -ENETUNREACH
: udp_sendmsg(sk
, msg
, len
);
1495 msg
->msg_name
= sin6
;
1496 msg
->msg_namelen
= addr_len
;
1501 /* Rough check on arithmetic overflow,
1502 better check is made in ip6_append_data().
1504 if (len
> INT_MAX
- sizeof(struct udphdr
))
1507 getfrag
= is_udplite
? udplite_getfrag
: ip_generic_getfrag
;
1508 if (READ_ONCE(up
->pending
)) {
1509 if (READ_ONCE(up
->pending
) == AF_INET
)
1510 return udp_sendmsg(sk
, msg
, len
);
1512 * There are pending frames.
1513 * The socket lock must be held while it's corked.
1516 if (likely(up
->pending
)) {
1517 if (unlikely(up
->pending
!= AF_INET6
)) {
1519 return -EAFNOSUPPORT
;
1522 goto do_append_data
;
1526 ulen
+= sizeof(struct udphdr
);
1528 memset(fl6
, 0, sizeof(*fl6
));
1531 if (sin6
->sin6_port
== 0)
1534 fl6
->fl6_dport
= sin6
->sin6_port
;
1535 daddr
= &sin6
->sin6_addr
;
1537 if (inet6_test_bit(SNDFLOW
, sk
)) {
1538 fl6
->flowlabel
= sin6
->sin6_flowinfo
&IPV6_FLOWINFO_MASK
;
1539 if (fl6
->flowlabel
& IPV6_FLOWLABEL_MASK
) {
1540 flowlabel
= fl6_sock_lookup(sk
, fl6
->flowlabel
);
1541 if (IS_ERR(flowlabel
))
1547 * Otherwise it will be difficult to maintain
1550 if (sk
->sk_state
== TCP_ESTABLISHED
&&
1551 ipv6_addr_equal(daddr
, &sk
->sk_v6_daddr
))
1552 daddr
= &sk
->sk_v6_daddr
;
1554 if (addr_len
>= sizeof(struct sockaddr_in6
) &&
1555 sin6
->sin6_scope_id
&&
1556 __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr
)))
1557 fl6
->flowi6_oif
= sin6
->sin6_scope_id
;
1559 if (sk
->sk_state
!= TCP_ESTABLISHED
)
1560 return -EDESTADDRREQ
;
1562 fl6
->fl6_dport
= inet
->inet_dport
;
1563 daddr
= &sk
->sk_v6_daddr
;
1564 fl6
->flowlabel
= np
->flow_label
;
1568 if (!fl6
->flowi6_oif
)
1569 fl6
->flowi6_oif
= READ_ONCE(sk
->sk_bound_dev_if
);
1571 if (!fl6
->flowi6_oif
)
1572 fl6
->flowi6_oif
= np
->sticky_pktinfo
.ipi6_ifindex
;
1574 fl6
->flowi6_uid
= sk
->sk_uid
;
1576 if (msg
->msg_controllen
) {
1578 memset(opt
, 0, sizeof(struct ipv6_txoptions
));
1579 opt
->tot_len
= sizeof(*opt
);
1582 err
= udp_cmsg_send(sk
, msg
, &ipc6
.gso_size
);
1584 err
= ip6_datagram_send_ctl(sock_net(sk
), sk
, msg
, fl6
,
1589 fl6_sock_release(flowlabel
);
1592 if ((fl6
->flowlabel
&IPV6_FLOWLABEL_MASK
) && !flowlabel
) {
1593 flowlabel
= fl6_sock_lookup(sk
, fl6
->flowlabel
);
1594 if (IS_ERR(flowlabel
))
1597 if (!(opt
->opt_nflen
|opt
->opt_flen
))
1601 opt
= txopt_get(np
);
1605 opt
= fl6_merge_options(&opt_space
, flowlabel
, opt
);
1606 opt
= ipv6_fixup_options(&opt_space
, opt
);
1609 fl6
->flowi6_proto
= sk
->sk_protocol
;
1610 fl6
->flowi6_mark
= ipc6
.sockc
.mark
;
1611 fl6
->daddr
= *daddr
;
1612 if (ipv6_addr_any(&fl6
->saddr
) && !ipv6_addr_any(&np
->saddr
))
1613 fl6
->saddr
= np
->saddr
;
1614 fl6
->fl6_sport
= inet
->inet_sport
;
1616 if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG
) && !connected
) {
1617 err
= BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk
,
1618 (struct sockaddr
*)sin6
,
1624 if (ipv6_addr_v4mapped(&sin6
->sin6_addr
)) {
1625 /* BPF program rewrote IPv6-only by IPv4-mapped
1626 * IPv6. It's currently unsupported.
1631 if (sin6
->sin6_port
== 0) {
1632 /* BPF program set invalid port. Reject it. */
1636 fl6
->fl6_dport
= sin6
->sin6_port
;
1637 fl6
->daddr
= sin6
->sin6_addr
;
1641 if (ipv6_addr_any(&fl6
->daddr
))
1642 fl6
->daddr
.s6_addr
[15] = 0x1; /* :: means loopback (BSD'ism) */
1644 final_p
= fl6_update_dst(fl6
, opt
, &final
);
1648 if (!fl6
->flowi6_oif
&& ipv6_addr_is_multicast(&fl6
->daddr
)) {
1649 fl6
->flowi6_oif
= READ_ONCE(np
->mcast_oif
);
1651 } else if (!fl6
->flowi6_oif
)
1652 fl6
->flowi6_oif
= READ_ONCE(np
->ucast_oif
);
1654 security_sk_classify_flow(sk
, flowi6_to_flowi_common(fl6
));
1656 if (ipc6
.tclass
< 0)
1657 ipc6
.tclass
= np
->tclass
;
1659 fl6
->flowlabel
= ip6_make_flowinfo(ipc6
.tclass
, fl6
->flowlabel
);
1661 dst
= ip6_sk_dst_lookup_flow(sk
, fl6
, final_p
, connected
);
1668 if (ipc6
.hlimit
< 0)
1669 ipc6
.hlimit
= ip6_sk_dst_hoplimit(np
, fl6
, dst
);
1671 if (msg
->msg_flags
&MSG_CONFIRM
)
1675 /* Lockless fast path for the non-corking case */
1677 struct sk_buff
*skb
;
1679 skb
= ip6_make_skb(sk
, getfrag
, msg
, ulen
,
1680 sizeof(struct udphdr
), &ipc6
,
1682 msg
->msg_flags
, &cork
);
1684 if (!IS_ERR_OR_NULL(skb
))
1685 err
= udp_v6_send_skb(skb
, fl6
, &cork
.base
);
1686 /* ip6_make_skb steals dst reference */
1691 if (unlikely(up
->pending
)) {
1692 /* The socket is already corked while preparing it. */
1693 /* ... which is an evident application bug. --ANK */
1696 net_dbg_ratelimited("udp cork app bug 2\n");
1701 WRITE_ONCE(up
->pending
, AF_INET6
);
1704 if (ipc6
.dontfrag
< 0)
1705 ipc6
.dontfrag
= inet6_test_bit(DONTFRAG
, sk
);
1707 err
= ip6_append_data(sk
, getfrag
, msg
, ulen
, sizeof(struct udphdr
),
1708 &ipc6
, fl6
, dst_rt6_info(dst
),
1709 corkreq
? msg
->msg_flags
|MSG_MORE
: msg
->msg_flags
);
1711 udp_v6_flush_pending_frames(sk
);
1713 err
= udp_v6_push_pending_frames(sk
);
1714 else if (unlikely(skb_queue_empty(&sk
->sk_write_queue
)))
1715 WRITE_ONCE(up
->pending
, 0);
1718 err
= inet6_test_bit(RECVERR6
, sk
) ? net_xmit_errno(err
) : 0;
1724 fl6_sock_release(flowlabel
);
1725 txopt_put(opt_to_free
);
1729 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
1730 * ENOBUFS might not be good (it's not tunable per se), but otherwise
1731 * we don't have a good statistic (IpOutDiscards but it can be too many
1732 * things). We could add another new stat but at least for now that
1733 * seems like overkill.
1735 if (err
== -ENOBUFS
|| test_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
)) {
1736 UDP6_INC_STATS(sock_net(sk
),
1737 UDP_MIB_SNDBUFERRORS
, is_udplite
);
1742 if (msg
->msg_flags
& MSG_PROBE
)
1743 dst_confirm_neigh(dst
, &fl6
->daddr
);
1744 if (!(msg
->msg_flags
&MSG_PROBE
) || len
)
1745 goto back_from_confirm
;
1749 EXPORT_SYMBOL(udpv6_sendmsg
);
1751 static void udpv6_splice_eof(struct socket
*sock
)
1753 struct sock
*sk
= sock
->sk
;
1754 struct udp_sock
*up
= udp_sk(sk
);
1756 if (!READ_ONCE(up
->pending
) || udp_test_bit(CORK
, sk
))
1760 if (up
->pending
&& !udp_test_bit(CORK
, sk
))
1761 udp_v6_push_pending_frames(sk
);
1765 void udpv6_destroy_sock(struct sock
*sk
)
1767 struct udp_sock
*up
= udp_sk(sk
);
1770 /* protects from races with udp_abort() */
1771 sock_set_flag(sk
, SOCK_DEAD
);
1772 udp_v6_flush_pending_frames(sk
);
1775 if (static_branch_unlikely(&udpv6_encap_needed_key
)) {
1776 if (up
->encap_type
) {
1777 void (*encap_destroy
)(struct sock
*sk
);
1778 encap_destroy
= READ_ONCE(up
->encap_destroy
);
1782 if (udp_test_bit(ENCAP_ENABLED
, sk
)) {
1783 static_branch_dec(&udpv6_encap_needed_key
);
1784 udp_encap_disable();
1790 * Socket option code for UDP
1792 int udpv6_setsockopt(struct sock
*sk
, int level
, int optname
, sockptr_t optval
,
1793 unsigned int optlen
)
1795 if (level
== SOL_UDP
|| level
== SOL_UDPLITE
|| level
== SOL_SOCKET
)
1796 return udp_lib_setsockopt(sk
, level
, optname
,
1798 udp_v6_push_pending_frames
);
1799 return ipv6_setsockopt(sk
, level
, optname
, optval
, optlen
);
1802 int udpv6_getsockopt(struct sock
*sk
, int level
, int optname
,
1803 char __user
*optval
, int __user
*optlen
)
1805 if (level
== SOL_UDP
|| level
== SOL_UDPLITE
)
1806 return udp_lib_getsockopt(sk
, level
, optname
, optval
, optlen
);
1807 return ipv6_getsockopt(sk
, level
, optname
, optval
, optlen
);
1811 /* ------------------------------------------------------------------------ */
1812 #ifdef CONFIG_PROC_FS
1813 int udp6_seq_show(struct seq_file
*seq
, void *v
)
1815 if (v
== SEQ_START_TOKEN
) {
1816 seq_puts(seq
, IPV6_SEQ_DGRAM_HEADER
);
1818 int bucket
= ((struct udp_iter_state
*)seq
->private)->bucket
;
1819 const struct inet_sock
*inet
= inet_sk((const struct sock
*)v
);
1820 __u16 srcp
= ntohs(inet
->inet_sport
);
1821 __u16 destp
= ntohs(inet
->inet_dport
);
1822 __ip6_dgram_sock_seq_show(seq
, v
, srcp
, destp
,
1823 udp_rqueue_get(v
), bucket
);
1828 const struct seq_operations udp6_seq_ops
= {
1829 .start
= udp_seq_start
,
1830 .next
= udp_seq_next
,
1831 .stop
= udp_seq_stop
,
1832 .show
= udp6_seq_show
,
1834 EXPORT_SYMBOL(udp6_seq_ops
);
1836 static struct udp_seq_afinfo udp6_seq_afinfo
= {
1841 int __net_init
udp6_proc_init(struct net
*net
)
1843 if (!proc_create_net_data("udp6", 0444, net
->proc_net
, &udp6_seq_ops
,
1844 sizeof(struct udp_iter_state
), &udp6_seq_afinfo
))
1849 void udp6_proc_exit(struct net
*net
)
1851 remove_proc_entry("udp6", net
->proc_net
);
1853 #endif /* CONFIG_PROC_FS */
1855 /* ------------------------------------------------------------------------ */
1857 struct proto udpv6_prot
= {
1859 .owner
= THIS_MODULE
,
1860 .close
= udp_lib_close
,
1861 .pre_connect
= udpv6_pre_connect
,
1862 .connect
= udpv6_connect
,
1863 .disconnect
= udp_disconnect
,
1865 .init
= udpv6_init_sock
,
1866 .destroy
= udpv6_destroy_sock
,
1867 .setsockopt
= udpv6_setsockopt
,
1868 .getsockopt
= udpv6_getsockopt
,
1869 .sendmsg
= udpv6_sendmsg
,
1870 .recvmsg
= udpv6_recvmsg
,
1871 .splice_eof
= udpv6_splice_eof
,
1872 .release_cb
= ip6_datagram_release_cb
,
1873 .hash
= udp_lib_hash
,
1874 .unhash
= udp_lib_unhash
,
1875 .rehash
= udp_v6_rehash
,
1876 .get_port
= udp_v6_get_port
,
1877 .put_port
= udp_lib_unhash
,
1878 #ifdef CONFIG_BPF_SYSCALL
1879 .psock_update_sk_prot
= udp_bpf_update_proto
,
1882 .memory_allocated
= &udp_memory_allocated
,
1883 .per_cpu_fw_alloc
= &udp_memory_per_cpu_fw_alloc
,
1885 .sysctl_mem
= sysctl_udp_mem
,
1886 .sysctl_wmem_offset
= offsetof(struct net
, ipv4
.sysctl_udp_wmem_min
),
1887 .sysctl_rmem_offset
= offsetof(struct net
, ipv4
.sysctl_udp_rmem_min
),
1888 .obj_size
= sizeof(struct udp6_sock
),
1889 .ipv6_pinfo_offset
= offsetof(struct udp6_sock
, inet6
),
1890 .h
.udp_table
= NULL
,
1891 .diag_destroy
= udp_abort
,
1894 static struct inet_protosw udpv6_protosw
= {
1896 .protocol
= IPPROTO_UDP
,
1897 .prot
= &udpv6_prot
,
1898 .ops
= &inet6_dgram_ops
,
1899 .flags
= INET_PROTOSW_PERMANENT
,
1902 int __init
udpv6_init(void)
1906 net_hotdata
.udpv6_protocol
= (struct inet6_protocol
) {
1907 .handler
= udpv6_rcv
,
1908 .err_handler
= udpv6_err
,
1909 .flags
= INET6_PROTO_NOPOLICY
| INET6_PROTO_FINAL
,
1911 ret
= inet6_add_protocol(&net_hotdata
.udpv6_protocol
, IPPROTO_UDP
);
1915 ret
= inet6_register_protosw(&udpv6_protosw
);
1917 goto out_udpv6_protocol
;
1922 inet6_del_protocol(&net_hotdata
.udpv6_protocol
, IPPROTO_UDP
);
1926 void udpv6_exit(void)
1928 inet6_unregister_protosw(&udpv6_protosw
);
1929 inet6_del_protocol(&net_hotdata
.udpv6_protocol
, IPPROTO_UDP
);