1 /* $NetBSD: udp_usrreq.c,v 1.178 2009/07/19 23:17:33 minskim Exp $ */
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
34 * The Regents of the University of California. All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: udp_usrreq.c,v 1.178 2009/07/19 23:17:33 minskim Exp $");
67 #include "opt_compat_netbsd.h"
68 #include "opt_ipsec.h"
69 #include "opt_inet_csum.h"
70 #include "opt_ipkdb.h"
71 #include "opt_mbuftrace.h"
73 #include <sys/param.h>
74 #include <sys/malloc.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/errno.h>
81 #include <sys/systm.h>
83 #include <sys/domain.h>
84 #include <sys/sysctl.h>
87 #include <net/route.h>
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet/ip_var.h>
95 #include <netinet/ip_icmp.h>
96 #include <netinet/udp.h>
97 #include <netinet/udp_var.h>
98 #include <netinet/udp_private.h>
101 #include <netinet/ip6.h>
102 #include <netinet/icmp6.h>
103 #include <netinet6/ip6_var.h>
104 #include <netinet6/ip6_private.h>
105 #include <netinet6/in6_pcb.h>
106 #include <netinet6/udp6_var.h>
107 #include <netinet6/udp6_private.h>
108 #include <netinet6/scope6_var.h>
112 /* always need ip6.h for IP6_EXTHDR_GET */
113 #include <netinet/ip6.h>
117 #if defined(NFAITH) && NFAITH > 0
118 #include <net/if_faith.h>
121 #include <machine/stdarg.h>
124 #include <netipsec/ipsec.h>
125 #include <netipsec/ipsec_var.h>
126 #include <netipsec/ipsec_private.h>
127 #include <netipsec/esp.h>
129 #include <netipsec/ipsec6.h>
131 #endif /* FAST_IPSEC */
134 #include <netinet6/ipsec.h>
135 #include <netinet6/ipsec_private.h>
136 #include <netinet6/esp.h>
137 #include <netkey/key.h>
141 #include <compat/sys/socket.h>
145 #include <ipkdb/ipkdb.h>
149 * UDP protocol implementation.
150 * Per RFC 768, August, 1980.
153 int udp_do_loopback_cksum
= 0;
155 struct inpcbtable udbtable
;
157 percpu_t
*udpstat_percpu
;
161 static int udp4_espinudp (struct mbuf
**, int, struct sockaddr
*,
164 static void udp4_sendup (struct mbuf
*, int, struct sockaddr
*,
166 static int udp4_realinput (struct sockaddr_in
*, struct sockaddr_in
*,
167 struct mbuf
**, int);
168 static int udp4_input_checksum(struct mbuf
*, const struct udphdr
*, int, int);
171 static void udp6_sendup (struct mbuf
*, int, struct sockaddr
*,
173 static int udp6_realinput (int, struct sockaddr_in6
*,
174 struct sockaddr_in6
*, struct mbuf
*, int);
175 static int udp6_input_checksum(struct mbuf
*, const struct udphdr
*, int, int);
178 static void udp_notify (struct inpcb
*, int);
182 #define UDBHASHSIZE 128
184 int udbhashsize
= UDBHASHSIZE
;
187 struct mowner udp_mowner
= MOWNER_INIT("udp", "");
188 struct mowner udp_rx_mowner
= MOWNER_INIT("udp", "rx");
189 struct mowner udp_tx_mowner
= MOWNER_INIT("udp", "tx");
192 #ifdef UDP_CSUM_COUNTERS
193 #include <sys/device.h>
196 struct evcnt udp_hwcsum_bad
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
197 NULL
, "udp", "hwcsum bad");
198 struct evcnt udp_hwcsum_ok
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
199 NULL
, "udp", "hwcsum ok");
200 struct evcnt udp_hwcsum_data
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
201 NULL
, "udp", "hwcsum data");
202 struct evcnt udp_swcsum
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
203 NULL
, "udp", "swcsum");
205 EVCNT_ATTACH_STATIC(udp_hwcsum_bad
);
206 EVCNT_ATTACH_STATIC(udp_hwcsum_ok
);
207 EVCNT_ATTACH_STATIC(udp_hwcsum_data
);
208 EVCNT_ATTACH_STATIC(udp_swcsum
);
209 #endif /* defined(INET) */
212 struct evcnt udp6_hwcsum_bad
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
213 NULL
, "udp6", "hwcsum bad");
214 struct evcnt udp6_hwcsum_ok
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
215 NULL
, "udp6", "hwcsum ok");
216 struct evcnt udp6_hwcsum_data
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
217 NULL
, "udp6", "hwcsum data");
218 struct evcnt udp6_swcsum
= EVCNT_INITIALIZER(EVCNT_TYPE_MISC
,
219 NULL
, "udp6", "swcsum");
221 EVCNT_ATTACH_STATIC(udp6_hwcsum_bad
);
222 EVCNT_ATTACH_STATIC(udp6_hwcsum_ok
);
223 EVCNT_ATTACH_STATIC(udp6_hwcsum_data
);
224 EVCNT_ATTACH_STATIC(udp6_swcsum
);
225 #endif /* defined(INET6) */
227 #define UDP_CSUM_COUNTER_INCR(ev) (ev)->ev_count++
231 #define UDP_CSUM_COUNTER_INCR(ev) /* nothing */
233 #endif /* UDP_CSUM_COUNTERS */
235 static void sysctl_net_inet_udp_setup(struct sysctllog
**);
241 sysctl_net_inet_udp_setup(NULL
);
243 in_pcbinit(&udbtable
, udbhashsize
, udbhashsize
);
245 MOWNER_ATTACH(&udp_tx_mowner
);
246 MOWNER_ATTACH(&udp_rx_mowner
);
247 MOWNER_ATTACH(&udp_mowner
);
250 udpstat_percpu
= percpu_alloc(sizeof(uint64_t) * UDP_NSTATS
);
253 udp6stat_percpu
= percpu_alloc(sizeof(uint64_t) * UDP6_NSTATS
);
258 * Checksum extended UDP header and data.
262 udp_input_checksum(int af
, struct mbuf
*m
, const struct udphdr
*uh
,
269 return udp4_input_checksum(m
, uh
, iphlen
, len
);
273 return udp6_input_checksum(m
, uh
, iphlen
, len
);
277 panic("udp_input_checksum: unknown af %d", af
);
286 * Checksum extended UDP header and data.
290 udp4_input_checksum(struct mbuf
*m
, const struct udphdr
*uh
,
295 * XXX it's better to record and check if this mbuf is
302 switch (m
->m_pkthdr
.csum_flags
&
303 ((m
->m_pkthdr
.rcvif
->if_csum_flags_rx
& M_CSUM_UDPv4
) |
304 M_CSUM_TCP_UDP_BAD
| M_CSUM_DATA
)) {
305 case M_CSUM_UDPv4
|M_CSUM_TCP_UDP_BAD
:
306 UDP_CSUM_COUNTER_INCR(&udp_hwcsum_bad
);
309 case M_CSUM_UDPv4
|M_CSUM_DATA
: {
310 u_int32_t hw_csum
= m
->m_pkthdr
.csum_data
;
312 UDP_CSUM_COUNTER_INCR(&udp_hwcsum_data
);
313 if (m
->m_pkthdr
.csum_flags
& M_CSUM_NO_PSEUDOHDR
) {
314 const struct ip
*ip
=
315 mtod(m
, const struct ip
*);
317 hw_csum
= in_cksum_phdr(ip
->ip_src
.s_addr
,
319 htons(hw_csum
+ len
+ IPPROTO_UDP
));
321 if ((hw_csum
^ 0xffff) != 0)
327 /* Checksum was okay. */
328 UDP_CSUM_COUNTER_INCR(&udp_hwcsum_ok
);
333 * Need to compute it ourselves. Maybe skip checksum
334 * on loopback interfaces.
336 if (__predict_true(!(m
->m_pkthdr
.rcvif
->if_flags
&
338 udp_do_loopback_cksum
)) {
339 UDP_CSUM_COUNTER_INCR(&udp_swcsum
);
340 if (in4_cksum(m
, IPPROTO_UDP
, iphlen
, len
) != 0)
349 UDP_STATINC(UDP_STAT_BADSUM
);
354 udp_input(struct mbuf
*m
, ...)
357 struct sockaddr_in src
, dst
;
366 iphlen
= va_arg(ap
, int);
367 (void)va_arg(ap
, int); /* ignore value, advance ap */
370 MCLAIM(m
, &udp_rx_mowner
);
371 UDP_STATINC(UDP_STAT_IPACKETS
);
374 * Get IP and UDP header together in first mbuf.
376 ip
= mtod(m
, struct ip
*);
377 IP6_EXTHDR_GET(uh
, struct udphdr
*, m
, iphlen
, sizeof(struct udphdr
));
379 UDP_STATINC(UDP_STAT_HDROPS
);
382 KASSERT(UDP_HDR_ALIGNED_P(uh
));
384 /* destination port of 0 is illegal, based on RFC768. */
385 if (uh
->uh_dport
== 0)
389 * Make mbuf data length reflect UDP length.
390 * If not enough data to reflect UDP length, drop.
392 ip_len
= ntohs(ip
->ip_len
);
393 len
= ntohs((u_int16_t
)uh
->uh_ulen
);
394 if (ip_len
!= iphlen
+ len
) {
395 if (ip_len
< iphlen
+ len
|| len
< sizeof(struct udphdr
)) {
396 UDP_STATINC(UDP_STAT_BADLEN
);
399 m_adj(m
, iphlen
+ len
- ip_len
);
403 * Checksum extended UDP header and data.
405 if (udp4_input_checksum(m
, uh
, iphlen
, len
))
408 /* construct source and dst sockaddrs. */
409 sockaddr_in_init(&src
, &ip
->ip_src
, uh
->uh_sport
);
410 sockaddr_in_init(&dst
, &ip
->ip_dst
, uh
->uh_dport
);
412 if ((n
= udp4_realinput(&src
, &dst
, &m
, iphlen
)) == -1) {
413 UDP_STATINC(UDP_STAT_HDROPS
);
417 if (IN_MULTICAST(ip
->ip_dst
.s_addr
) || n
== 0) {
418 struct sockaddr_in6 src6
, dst6
;
420 memset(&src6
, 0, sizeof(src6
));
421 src6
.sin6_family
= AF_INET6
;
422 src6
.sin6_len
= sizeof(struct sockaddr_in6
);
423 src6
.sin6_addr
.s6_addr
[10] = src6
.sin6_addr
.s6_addr
[11] = 0xff;
424 memcpy(&src6
.sin6_addr
.s6_addr
[12], &ip
->ip_src
,
426 src6
.sin6_port
= uh
->uh_sport
;
427 memset(&dst6
, 0, sizeof(dst6
));
428 dst6
.sin6_family
= AF_INET6
;
429 dst6
.sin6_len
= sizeof(struct sockaddr_in6
);
430 dst6
.sin6_addr
.s6_addr
[10] = dst6
.sin6_addr
.s6_addr
[11] = 0xff;
431 memcpy(&dst6
.sin6_addr
.s6_addr
[12], &ip
->ip_dst
,
433 dst6
.sin6_port
= uh
->uh_dport
;
435 n
+= udp6_realinput(AF_INET
, &src6
, &dst6
, m
, iphlen
);
440 if (m
->m_flags
& (M_BCAST
| M_MCAST
)) {
441 UDP_STATINC(UDP_STAT_NOPORTBCAST
);
444 UDP_STATINC(UDP_STAT_NOPORT
);
446 if (checkipkdb(&ip
->ip_src
, uh
->uh_sport
, uh
->uh_dport
,
447 m
, iphlen
+ sizeof(struct udphdr
),
448 m
->m_pkthdr
.len
- iphlen
- sizeof(struct udphdr
))) {
450 * It was a debugger connect packet,
456 icmp_error(m
, ICMP_UNREACH
, ICMP_UNREACH_PORT
, 0, 0);
472 udp6_input_checksum(struct mbuf
*m
, const struct udphdr
*uh
, int off
, int len
)
476 * XXX it's better to record and check if this mbuf is
480 if (__predict_false((m
->m_flags
& M_LOOP
) && !udp_do_loopback_cksum
)) {
483 if (uh
->uh_sum
== 0) {
484 UDP6_STATINC(UDP6_STAT_NOSUM
);
488 switch (m
->m_pkthdr
.csum_flags
&
489 ((m
->m_pkthdr
.rcvif
->if_csum_flags_rx
& M_CSUM_UDPv6
) |
490 M_CSUM_TCP_UDP_BAD
| M_CSUM_DATA
)) {
491 case M_CSUM_UDPv6
|M_CSUM_TCP_UDP_BAD
:
492 UDP_CSUM_COUNTER_INCR(&udp6_hwcsum_bad
);
493 UDP6_STATINC(UDP6_STAT_BADSUM
);
497 case M_CSUM_UDPv6
|M_CSUM_DATA
:
501 /* Checksum was okay. */
502 UDP_CSUM_COUNTER_INCR(&udp6_hwcsum_ok
);
507 * Need to compute it ourselves. Maybe skip checksum
508 * on loopback interfaces.
510 UDP_CSUM_COUNTER_INCR(&udp6_swcsum
);
511 if (in6_cksum(m
, IPPROTO_UDP
, off
, len
) != 0) {
512 UDP6_STATINC(UDP6_STAT_BADSUM
);
524 udp6_input(struct mbuf
**mp
, int *offp
, int proto
)
526 struct mbuf
*m
= *mp
;
528 struct sockaddr_in6 src
, dst
;
531 u_int32_t plen
, ulen
;
533 ip6
= mtod(m
, struct ip6_hdr
*);
535 #if defined(NFAITH) && 0 < NFAITH
536 if (faithprefix(&ip6
->ip6_dst
)) {
537 /* send icmp6 host unreach? */
543 UDP6_STATINC(UDP6_STAT_IPACKETS
);
545 /* check for jumbogram is done in ip6_input. we can trust pkthdr.len */
546 plen
= m
->m_pkthdr
.len
- off
;
547 IP6_EXTHDR_GET(uh
, struct udphdr
*, m
, off
, sizeof(struct udphdr
));
549 IP6_STATINC(IP6_STAT_TOOSHORT
);
552 KASSERT(UDP_HDR_ALIGNED_P(uh
));
553 ulen
= ntohs((u_short
)uh
->uh_ulen
);
555 * RFC2675 section 4: jumbograms will have 0 in the UDP header field,
556 * iff payload length > 0xffff.
558 if (ulen
== 0 && plen
> 0xffff)
562 UDP6_STATINC(UDP6_STAT_BADLEN
);
566 /* destination port of 0 is illegal, based on RFC768. */
567 if (uh
->uh_dport
== 0)
570 /* Be proactive about malicious use of IPv4 mapped address */
571 if (IN6_IS_ADDR_V4MAPPED(&ip6
->ip6_src
) ||
572 IN6_IS_ADDR_V4MAPPED(&ip6
->ip6_dst
)) {
578 * Checksum extended UDP header and data. Maybe skip checksum
579 * on loopback interfaces.
581 if (udp6_input_checksum(m
, uh
, off
, ulen
))
585 * Construct source and dst sockaddrs.
587 memset(&src
, 0, sizeof(src
));
588 src
.sin6_family
= AF_INET6
;
589 src
.sin6_len
= sizeof(struct sockaddr_in6
);
590 src
.sin6_addr
= ip6
->ip6_src
;
591 src
.sin6_port
= uh
->uh_sport
;
592 memset(&dst
, 0, sizeof(dst
));
593 dst
.sin6_family
= AF_INET6
;
594 dst
.sin6_len
= sizeof(struct sockaddr_in6
);
595 dst
.sin6_addr
= ip6
->ip6_dst
;
596 dst
.sin6_port
= uh
->uh_dport
;
598 if (udp6_realinput(AF_INET6
, &src
, &dst
, m
, off
) == 0) {
599 if (m
->m_flags
& M_MCAST
) {
600 UDP6_STATINC(UDP6_STAT_NOPORTMCAST
);
603 UDP6_STATINC(UDP6_STAT_NOPORT
);
604 icmp6_error(m
, ICMP6_DST_UNREACH
, ICMP6_DST_UNREACH_NOPORT
, 0);
617 udp4_sendup(struct mbuf
*m
, int off
/* offset of data portion */,
618 struct sockaddr
*src
, struct socket
*so
)
620 struct mbuf
*opts
= NULL
;
622 struct inpcb
*inp
= NULL
;
626 switch (so
->so_proto
->pr_domain
->dom_family
) {
638 #if defined(IPSEC) || defined(FAST_IPSEC)
639 /* check AH/ESP integrity. */
640 if (so
!= NULL
&& ipsec4_in_reject_so(m
, so
)) {
641 IPSEC_STATINC(IPSEC_STAT_IN_POLVIO
);
642 if ((n
= m_copypacket(m
, M_DONTWAIT
)) != NULL
)
643 icmp_error(n
, ICMP_UNREACH
, ICMP_UNREACH_ADMIN_PROHIBIT
,
649 if ((n
= m_copypacket(m
, M_DONTWAIT
)) != NULL
) {
650 if (inp
&& (inp
->inp_flags
& INP_CONTROLOPTS
652 || so
->so_options
& SO_OTIMESTAMP
654 || so
->so_options
& SO_TIMESTAMP
)) {
655 struct ip
*ip
= mtod(n
, struct ip
*);
656 ip_savecontrol(inp
, &opts
, ip
, n
);
660 if (sbappendaddr(&so
->so_rcv
, src
, n
,
665 so
->so_rcv
.sb_overflowed
++;
666 UDP_STATINC(UDP_STAT_FULLSOCK
);
675 udp6_sendup(struct mbuf
*m
, int off
/* offset of data portion */,
676 struct sockaddr
*src
, struct socket
*so
)
678 struct mbuf
*opts
= NULL
;
680 struct in6pcb
*in6p
= NULL
;
684 if (so
->so_proto
->pr_domain
->dom_family
!= AF_INET6
)
686 in6p
= sotoin6pcb(so
);
688 #if defined(IPSEC) || defined(FAST_IPSEC)
689 /* check AH/ESP integrity. */
690 if (so
!= NULL
&& ipsec6_in_reject_so(m
, so
)) {
691 IPSEC6_STATINC(IPSEC_STAT_IN_POLVIO
);
692 if ((n
= m_copypacket(m
, M_DONTWAIT
)) != NULL
)
693 icmp6_error(n
, ICMP6_DST_UNREACH
,
694 ICMP6_DST_UNREACH_ADMIN
, 0);
699 if ((n
= m_copypacket(m
, M_DONTWAIT
)) != NULL
) {
700 if (in6p
&& (in6p
->in6p_flags
& IN6P_CONTROLOPTS
702 || in6p
->in6p_socket
->so_options
& SO_OTIMESTAMP
704 || in6p
->in6p_socket
->so_options
& SO_TIMESTAMP
)) {
705 struct ip6_hdr
*ip6
= mtod(n
, struct ip6_hdr
*);
706 ip6_savecontrol(in6p
, &opts
, ip6
, n
);
710 if (sbappendaddr(&so
->so_rcv
, src
, n
, opts
) == 0) {
714 so
->so_rcv
.sb_overflowed
++;
715 UDP6_STATINC(UDP6_STAT_FULLSOCK
);
724 udp4_realinput(struct sockaddr_in
*src
, struct sockaddr_in
*dst
,
725 struct mbuf
**mp
, int off
/* offset of udphdr */)
727 u_int16_t
*sport
, *dport
;
729 struct in_addr
*src4
, *dst4
;
730 struct inpcb_hdr
*inph
;
732 struct mbuf
*m
= *mp
;
735 off
+= sizeof(struct udphdr
); /* now, offset of payload */
737 if (src
->sin_family
!= AF_INET
|| dst
->sin_family
!= AF_INET
)
740 src4
= &src
->sin_addr
;
741 sport
= &src
->sin_port
;
742 dst4
= &dst
->sin_addr
;
743 dport
= &dst
->sin_port
;
745 if (IN_MULTICAST(dst4
->s_addr
) ||
746 in_broadcast(*dst4
, m
->m_pkthdr
.rcvif
)) {
748 * Deliver a multicast or broadcast datagram to *all* sockets
749 * for which the local and remote addresses and ports match
750 * those of the incoming datagram. This allows more than
751 * one process to receive multi/broadcasts on the same port.
752 * (This really ought to be done for unicast datagrams as
753 * well, but that would cause problems with existing
754 * applications that open both address-specific sockets and
755 * a wildcard socket listening to the same port -- they would
756 * end up receiving duplicates of every unicast datagram.
757 * Those applications open the multiple sockets to overcome an
758 * inadequacy of the UDP socket interface, but for backwards
759 * compatibility we avoid the problem here rather than
760 * fixing the interface. Maybe 4.5BSD will remedy this?)
764 * KAME note: traditionally we dropped udpiphdr from mbuf here.
765 * we need udpiphdr for IPsec processing so we do that later.
768 * Locate pcb(s) for datagram.
770 CIRCLEQ_FOREACH(inph
, &udbtable
.inpt_queue
, inph_queue
) {
771 inp
= (struct inpcb
*)inph
;
772 if (inp
->inp_af
!= AF_INET
)
775 if (inp
->inp_lport
!= *dport
)
777 if (!in_nullhost(inp
->inp_laddr
)) {
778 if (!in_hosteq(inp
->inp_laddr
, *dst4
))
781 if (!in_nullhost(inp
->inp_faddr
)) {
782 if (!in_hosteq(inp
->inp_faddr
, *src4
) ||
783 inp
->inp_fport
!= *sport
)
787 udp4_sendup(m
, off
, (struct sockaddr
*)src
,
792 * Don't look for additional matches if this one does
793 * not have either the SO_REUSEPORT or SO_REUSEADDR
794 * socket options set. This heuristic avoids searching
795 * through all pcbs in the common case of a non-shared
796 * port. It assumes that an application will never
797 * clear these options after setting them.
799 if ((inp
->inp_socket
->so_options
&
800 (SO_REUSEPORT
|SO_REUSEADDR
)) == 0)
805 * Locate pcb for datagram.
807 inp
= in_pcblookup_connect(&udbtable
, *src4
, *sport
, *dst4
, *dport
);
809 UDP_STATINC(UDP_STAT_PCBHASHMISS
);
810 inp
= in_pcblookup_bind(&udbtable
, *dst4
, *dport
);
816 /* Handle ESP over UDP */
817 if (inp
->inp_flags
& INP_ESPINUDP_ALL
) {
818 struct sockaddr
*sa
= (struct sockaddr
*)src
;
820 switch(udp4_espinudp(mp
, off
, sa
, inp
->inp_socket
)) {
821 case -1: /* Error, m was freeed */
826 case 1: /* ESP over UDP */
831 case 0: /* plain UDP */
832 default: /* Unexpected */
834 * Normal UDP processing will take place
835 * m may have changed.
844 * Check the minimum TTL for socket.
846 if (mtod(m
, struct ip
*)->ip_ttl
< inp
->inp_ip_minttl
)
849 udp4_sendup(m
, off
, (struct sockaddr
*)src
, inp
->inp_socket
);
860 udp6_realinput(int af
, struct sockaddr_in6
*src
, struct sockaddr_in6
*dst
,
861 struct mbuf
*m
, int off
)
863 u_int16_t sport
, dport
;
865 struct in6_addr src6
, *dst6
;
866 const struct in_addr
*dst4
;
867 struct inpcb_hdr
*inph
;
871 off
+= sizeof(struct udphdr
); /* now, offset of payload */
873 if (af
!= AF_INET
&& af
!= AF_INET6
)
875 if (src
->sin6_family
!= AF_INET6
|| dst
->sin6_family
!= AF_INET6
)
878 src6
= src
->sin6_addr
;
879 if (sa6_recoverscope(src
) != 0) {
880 /* XXX: should be impossible. */
883 sport
= src
->sin6_port
;
885 dport
= dst
->sin6_port
;
886 dst4
= (struct in_addr
*)&dst
->sin6_addr
.s6_addr
[12];
887 dst6
= &dst
->sin6_addr
;
889 if (IN6_IS_ADDR_MULTICAST(dst6
) ||
890 (af
== AF_INET
&& IN_MULTICAST(dst4
->s_addr
))) {
892 * Deliver a multicast or broadcast datagram to *all* sockets
893 * for which the local and remote addresses and ports match
894 * those of the incoming datagram. This allows more than
895 * one process to receive multi/broadcasts on the same port.
896 * (This really ought to be done for unicast datagrams as
897 * well, but that would cause problems with existing
898 * applications that open both address-specific sockets and
899 * a wildcard socket listening to the same port -- they would
900 * end up receiving duplicates of every unicast datagram.
901 * Those applications open the multiple sockets to overcome an
902 * inadequacy of the UDP socket interface, but for backwards
903 * compatibility we avoid the problem here rather than
904 * fixing the interface. Maybe 4.5BSD will remedy this?)
908 * KAME note: traditionally we dropped udpiphdr from mbuf here.
909 * we need udpiphdr for IPsec processing so we do that later.
912 * Locate pcb(s) for datagram.
914 CIRCLEQ_FOREACH(inph
, &udbtable
.inpt_queue
, inph_queue
) {
915 in6p
= (struct in6pcb
*)inph
;
916 if (in6p
->in6p_af
!= AF_INET6
)
919 if (in6p
->in6p_lport
!= dport
)
921 if (!IN6_IS_ADDR_UNSPECIFIED(&in6p
->in6p_laddr
)) {
922 if (!IN6_ARE_ADDR_EQUAL(&in6p
->in6p_laddr
,
926 if (IN6_IS_ADDR_V4MAPPED(dst6
) &&
927 (in6p
->in6p_flags
& IN6P_IPV6_V6ONLY
))
930 if (!IN6_IS_ADDR_UNSPECIFIED(&in6p
->in6p_faddr
)) {
931 if (!IN6_ARE_ADDR_EQUAL(&in6p
->in6p_faddr
,
932 &src6
) || in6p
->in6p_fport
!= sport
)
935 if (IN6_IS_ADDR_V4MAPPED(&src6
) &&
936 (in6p
->in6p_flags
& IN6P_IPV6_V6ONLY
))
940 udp6_sendup(m
, off
, (struct sockaddr
*)src
,
945 * Don't look for additional matches if this one does
946 * not have either the SO_REUSEPORT or SO_REUSEADDR
947 * socket options set. This heuristic avoids searching
948 * through all pcbs in the common case of a non-shared
949 * port. It assumes that an application will never
950 * clear these options after setting them.
952 if ((in6p
->in6p_socket
->so_options
&
953 (SO_REUSEPORT
|SO_REUSEADDR
)) == 0)
958 * Locate pcb for datagram.
960 in6p
= in6_pcblookup_connect(&udbtable
, &src6
, sport
, dst6
,
963 UDP_STATINC(UDP_STAT_PCBHASHMISS
);
964 in6p
= in6_pcblookup_bind(&udbtable
, dst6
, dport
, 0);
969 udp6_sendup(m
, off
, (struct sockaddr
*)src
, in6p
->in6p_socket
);
980 * Notify a udp user of an asynchronous error;
981 * just wake up so that he can collect error status.
984 udp_notify(struct inpcb
*inp
, int errno
)
986 inp
->inp_socket
->so_error
= errno
;
987 sorwakeup(inp
->inp_socket
);
988 sowwakeup(inp
->inp_socket
);
992 udp_ctlinput(int cmd
, const struct sockaddr
*sa
, void *v
)
996 void (*notify
)(struct inpcb
*, int) = udp_notify
;
999 if (sa
->sa_family
!= AF_INET
1000 || sa
->sa_len
!= sizeof(struct sockaddr_in
))
1002 if ((unsigned)cmd
>= PRC_NCMDS
)
1004 errno
= inetctlerrmap
[cmd
];
1005 if (PRC_IS_REDIRECT(cmd
))
1006 notify
= in_rtchange
, ip
= 0;
1007 else if (cmd
== PRC_HOSTDEAD
)
1009 else if (errno
== 0)
1012 uh
= (struct udphdr
*)((char *)ip
+ (ip
->ip_hl
<< 2));
1013 in_pcbnotify(&udbtable
, satocsin(sa
)->sin_addr
, uh
->uh_dport
,
1014 ip
->ip_src
, uh
->uh_sport
, errno
, notify
);
1016 /* XXX mapped address case */
1018 in_pcbnotifyall(&udbtable
, satocsin(sa
)->sin_addr
, errno
,
1024 udp_ctloutput(int op
, struct socket
*so
, struct sockopt
*sopt
)
1032 family
= so
->so_proto
->pr_domain
->dom_family
;
1038 if (sopt
->sopt_level
!= IPPROTO_UDP
) {
1039 error
= ip_ctloutput(op
, so
, sopt
);
1046 if (sopt
->sopt_level
!= IPPROTO_UDP
) {
1047 error
= ip6_ctloutput(op
, so
, sopt
);
1053 error
= EAFNOSUPPORT
;
1060 inp
= sotoinpcb(so
);
1062 switch (sopt
->sopt_name
) {
1064 error
= sockopt_getint(sopt
, &optval
);
1071 inp
->inp_flags
&= ~INP_ESPINUDP_ALL
;
1074 case UDP_ENCAP_ESPINUDP
:
1075 inp
->inp_flags
&= ~INP_ESPINUDP_ALL
;
1076 inp
->inp_flags
|= INP_ESPINUDP
;
1079 case UDP_ENCAP_ESPINUDP_NON_IKE
:
1080 inp
->inp_flags
&= ~INP_ESPINUDP_ALL
;
1081 inp
->inp_flags
|= INP_ESPINUDP_NON_IKE
;
1091 error
= ENOPROTOOPT
;
1108 udp_output(struct mbuf
*m
, ...)
1111 struct udpiphdr
*ui
;
1113 int len
= m
->m_pkthdr
.len
;
1117 MCLAIM(m
, &udp_tx_mowner
);
1119 inp
= va_arg(ap
, struct inpcb
*);
1123 * Calculate data length and get a mbuf
1124 * for UDP and IP headers.
1126 M_PREPEND(m
, sizeof(struct udpiphdr
), M_DONTWAIT
);
1133 * Compute the packet length of the IP header, and
1134 * punt if the length looks bogus.
1136 if (len
+ sizeof(struct udpiphdr
) > IP_MAXPACKET
) {
1142 * Fill in mbuf with extended UDP header
1143 * and addresses and length put into network format.
1145 ui
= mtod(m
, struct udpiphdr
*);
1146 ui
->ui_pr
= IPPROTO_UDP
;
1147 ui
->ui_src
= inp
->inp_laddr
;
1148 ui
->ui_dst
= inp
->inp_faddr
;
1149 ui
->ui_sport
= inp
->inp_lport
;
1150 ui
->ui_dport
= inp
->inp_fport
;
1151 ui
->ui_ulen
= htons((u_int16_t
)len
+ sizeof(struct udphdr
));
1153 ro
= &inp
->inp_route
;
1156 * Set up checksum and output datagram.
1160 * XXX Cache pseudo-header checksum part for
1161 * XXX "connected" UDP sockets.
1163 ui
->ui_sum
= in_cksum_phdr(ui
->ui_src
.s_addr
,
1164 ui
->ui_dst
.s_addr
, htons((u_int16_t
)len
+
1165 sizeof(struct udphdr
) + IPPROTO_UDP
));
1166 m
->m_pkthdr
.csum_flags
= M_CSUM_UDPv4
;
1167 m
->m_pkthdr
.csum_data
= offsetof(struct udphdr
, uh_sum
);
1170 ((struct ip
*)ui
)->ip_len
= htons(sizeof (struct udpiphdr
) + len
);
1171 ((struct ip
*)ui
)->ip_ttl
= inp
->inp_ip
.ip_ttl
; /* XXX */
1172 ((struct ip
*)ui
)->ip_tos
= inp
->inp_ip
.ip_tos
; /* XXX */
1173 UDP_STATINC(UDP_STAT_OPACKETS
);
1175 return (ip_output(m
, inp
->inp_options
, ro
,
1176 inp
->inp_socket
->so_options
& (SO_DONTROUTE
| SO_BROADCAST
),
1177 inp
->inp_moptions
, inp
->inp_socket
));
1184 int udp_sendspace
= 9216; /* really max datagram size */
1185 int udp_recvspace
= 40 * (1024 + sizeof(struct sockaddr_in
));
1186 /* 40 1K datagrams */
1190 udp_usrreq(struct socket
*so
, int req
, struct mbuf
*m
, struct mbuf
*nam
,
1191 struct mbuf
*control
, struct lwp
*l
)
1197 if (req
== PRU_CONTROL
)
1198 return (in_control(so
, (long)m
, (void *)nam
,
1199 (struct ifnet
*)control
, l
));
1203 if (req
== PRU_PURGEIF
) {
1204 mutex_enter(softnet_lock
);
1205 in_pcbpurgeif0(&udbtable
, (struct ifnet
*)control
);
1206 in_purgeif((struct ifnet
*)control
);
1207 in_pcbpurgeif(&udbtable
, (struct ifnet
*)control
);
1208 mutex_exit(softnet_lock
);
1213 inp
= sotoinpcb(so
);
1215 if (req
!= PRU_SEND
&& req
!= PRU_SENDOOB
&& control
)
1216 panic("udp_usrreq: unexpected control mbuf");
1218 if (req
== PRU_ATTACH
) {
1220 } else if (inp
== 0) {
1226 * Note: need to block udp_input while changing
1227 * the udp pcb queue and/or pcb addresses.
1237 so
->so_mowner
= &udp_mowner
;
1238 so
->so_rcv
.sb_mowner
= &udp_rx_mowner
;
1239 so
->so_snd
.sb_mowner
= &udp_tx_mowner
;
1241 if (so
->so_snd
.sb_hiwat
== 0 || so
->so_rcv
.sb_hiwat
== 0) {
1242 error
= soreserve(so
, udp_sendspace
, udp_recvspace
);
1246 error
= in_pcballoc(so
, &udbtable
);
1249 inp
= sotoinpcb(so
);
1250 inp
->inp_ip
.ip_ttl
= ip_defttl
;
1258 error
= in_pcbbind(inp
, nam
, l
);
1266 error
= in_pcbconnect(inp
, nam
, l
);
1276 case PRU_DISCONNECT
:
1277 /*soisdisconnected(so);*/
1278 so
->so_state
&= ~SS_ISCONNECTED
; /* XXX */
1279 in_pcbdisconnect(inp
);
1280 inp
->inp_laddr
= zeroin_addr
; /* XXX */
1281 in_pcbstate(inp
, INP_BOUND
); /* XXX */
1293 if (control
&& control
->m_len
) {
1300 struct in_addr laddr
; /* XXX */
1303 laddr
= inp
->inp_laddr
; /* XXX */
1304 if ((so
->so_state
& SS_ISCONNECTED
) != 0) {
1308 error
= in_pcbconnect(inp
, nam
, l
);
1312 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1317 error
= udp_output(m
, inp
);
1320 in_pcbdisconnect(inp
);
1321 inp
->inp_laddr
= laddr
; /* XXX */
1322 in_pcbstate(inp
, INP_BOUND
); /* XXX */
1332 * stat: don't bother with a blocksize.
1348 in_setsockaddr(inp
, nam
);
1352 in_setpeeraddr(inp
, nam
);
1356 panic("udp_usrreq");
1365 sysctl_net_inet_udp_stats(SYSCTLFN_ARGS
)
1368 return (NETSTAT_SYSCTL(udpstat_percpu
, UDP_NSTATS
));
1372 * Sysctl for udp variables.
1375 sysctl_net_inet_udp_setup(struct sysctllog
**clog
)
1378 sysctl_createv(clog
, 0, NULL
, NULL
,
1380 CTLTYPE_NODE
, "net", NULL
,
1383 sysctl_createv(clog
, 0, NULL
, NULL
,
1385 CTLTYPE_NODE
, "inet", NULL
,
1387 CTL_NET
, PF_INET
, CTL_EOL
);
1388 sysctl_createv(clog
, 0, NULL
, NULL
,
1390 CTLTYPE_NODE
, "udp",
1391 SYSCTL_DESCR("UDPv4 related settings"),
1393 CTL_NET
, PF_INET
, IPPROTO_UDP
, CTL_EOL
);
1395 sysctl_createv(clog
, 0, NULL
, NULL
,
1396 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
1397 CTLTYPE_INT
, "checksum",
1398 SYSCTL_DESCR("Compute UDP checksums"),
1399 NULL
, 0, &udpcksum
, 0,
1400 CTL_NET
, PF_INET
, IPPROTO_UDP
, UDPCTL_CHECKSUM
,
1402 sysctl_createv(clog
, 0, NULL
, NULL
,
1403 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
1404 CTLTYPE_INT
, "sendspace",
1405 SYSCTL_DESCR("Default UDP send buffer size"),
1406 NULL
, 0, &udp_sendspace
, 0,
1407 CTL_NET
, PF_INET
, IPPROTO_UDP
, UDPCTL_SENDSPACE
,
1409 sysctl_createv(clog
, 0, NULL
, NULL
,
1410 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
1411 CTLTYPE_INT
, "recvspace",
1412 SYSCTL_DESCR("Default UDP receive buffer size"),
1413 NULL
, 0, &udp_recvspace
, 0,
1414 CTL_NET
, PF_INET
, IPPROTO_UDP
, UDPCTL_RECVSPACE
,
1416 sysctl_createv(clog
, 0, NULL
, NULL
,
1417 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
1418 CTLTYPE_INT
, "do_loopback_cksum",
1419 SYSCTL_DESCR("Perform UDP checksum on loopback"),
1420 NULL
, 0, &udp_do_loopback_cksum
, 0,
1421 CTL_NET
, PF_INET
, IPPROTO_UDP
, UDPCTL_LOOPBACKCKSUM
,
1423 sysctl_createv(clog
, 0, NULL
, NULL
,
1425 CTLTYPE_STRUCT
, "pcblist",
1426 SYSCTL_DESCR("UDP protocol control block list"),
1427 sysctl_inpcblist
, 0, &udbtable
, 0,
1428 CTL_NET
, PF_INET
, IPPROTO_UDP
, CTL_CREATE
,
1430 sysctl_createv(clog
, 0, NULL
, NULL
,
1432 CTLTYPE_STRUCT
, "stats",
1433 SYSCTL_DESCR("UDP statistics"),
1434 sysctl_net_inet_udp_stats
, 0, NULL
, 0,
1435 CTL_NET
, PF_INET
, IPPROTO_UDP
, UDPCTL_STATS
,
1441 udp_statinc(u_int stat
)
1444 KASSERT(stat
< UDP_NSTATS
);
1448 #if (defined INET && defined IPSEC_NAT_T)
1451 * 1 if the packet was processed
1452 * 0 if normal UDP processing should take place
1453 * -1 if an error occurent and m was freed
1456 udp4_espinudp(struct mbuf
**mp
, int off
, struct sockaddr
*src
,
1468 struct udphdr
*udphdr
;
1469 u_int16_t sport
, dport
;
1470 struct mbuf
*m
= *mp
;
1473 * Collapse the mbuf chain if the first mbuf is too short
1474 * The longest case is: UDP + non ESP marker + ESP
1476 minlen
= off
+ sizeof(u_int64_t
) + sizeof(struct esp
);
1477 if (minlen
> m
->m_pkthdr
.len
)
1478 minlen
= m
->m_pkthdr
.len
;
1480 if (m
->m_len
< minlen
) {
1481 if ((*mp
= m_pullup(m
, minlen
)) == NULL
) {
1482 printf("udp4_espinudp: m_pullup failed\n");
1488 len
= m
->m_len
- off
;
1489 data
= mtod(m
, char *) + off
;
1490 inp
= sotoinpcb(so
);
1492 /* Ignore keepalive packets */
1493 if ((len
== 1) && (*(unsigned char *)data
== 0xff)) {
1498 * Check that the payload is long enough to hold
1499 * an ESP header and compute the length of encapsulation
1502 if (inp
->inp_flags
& INP_ESPINUDP
) {
1503 u_int32_t
*st
= (u_int32_t
*)data
;
1505 if ((len
<= sizeof(struct esp
)) || (*st
== 0))
1506 return 0; /* Normal UDP processing */
1508 skip
= sizeof(struct udphdr
);
1511 if (inp
->inp_flags
& INP_ESPINUDP_NON_IKE
) {
1512 u_int32_t
*st
= (u_int32_t
*)data
;
1514 if ((len
<= sizeof(u_int64_t
) + sizeof(struct esp
))
1515 || ((st
[0] | st
[1]) != 0))
1516 return 0; /* Normal UDP processing */
1518 skip
= sizeof(struct udphdr
) + sizeof(u_int64_t
);
1522 * Get the UDP ports. They are handled in network
1523 * order everywhere in IPSEC_NAT_T code.
1525 udphdr
= (struct udphdr
*)((char *)data
- skip
);
1526 sport
= udphdr
->uh_sport
;
1527 dport
= udphdr
->uh_dport
;
1530 * Remove the UDP header (and possibly the non ESP marker)
1531 * IP header lendth is iphdrlen
1534 * +----+------+-----+
1535 * | IP | UDP | ESP |
1536 * +----+------+-----+
1544 iphdrlen
= off
- sizeof(struct udphdr
);
1545 memmove(mtod(m
, char *) + skip
, mtod(m
, void *), iphdrlen
);
1548 ip
= mtod(m
, struct ip
*);
1549 ip
->ip_len
= htons(ntohs(ip
->ip_len
) - skip
);
1550 ip
->ip_p
= IPPROTO_ESP
;
1553 * Copy the mbuf to avoid multiple free, as both
1554 * esp4_input (which we call) and udp_input (which
1555 * called us) free the mbuf.
1557 if ((n
= m_dup(m
, 0, M_COPYALL
, M_DONTWAIT
)) == NULL
) {
1558 printf("udp4_espinudp: m_dup failed\n");
1563 * Add a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember
1564 * the source UDP port. This is required if we want
1565 * to select the right SPD for multiple hosts behind
1568 if ((tag
= m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS
,
1569 sizeof(sport
) + sizeof(dport
), M_DONTWAIT
)) == NULL
) {
1570 printf("udp4_espinudp: m_tag_get failed\n");
1574 ((u_int16_t
*)(tag
+ 1))[0] = sport
;
1575 ((u_int16_t
*)(tag
+ 1))[1] = dport
;
1576 m_tag_prepend(n
, tag
);
1579 ipsec4_common_input(n
, iphdrlen
, IPPROTO_ESP
);
1581 esp4_input(n
, iphdrlen
);
1584 /* We handled it, it shoudln't be handled by UDP */