1 /* $NetBSD: ip_output.c,v 1.204 2009/07/16 04:09:51 minskim Exp $ */
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 * All rights reserved.
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
63 * Copyright (c) 1982, 1986, 1988, 1990, 1993
64 * The Regents of the University of California. All rights reserved.
66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions
69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
90 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
93 #include <sys/cdefs.h>
94 __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.204 2009/07/16 04:09:51 minskim Exp $");
96 #include "opt_pfil_hooks.h"
98 #include "opt_ipsec.h"
99 #include "opt_mrouting.h"
101 #include <sys/param.h>
102 #include <sys/malloc.h>
103 #include <sys/mbuf.h>
104 #include <sys/errno.h>
105 #include <sys/protosw.h>
106 #include <sys/socket.h>
107 #include <sys/socketvar.h>
108 #include <sys/kauth.h>
110 #include <sys/domain.h>
112 #include <sys/systm.h>
113 #include <sys/proc.h>
116 #include <net/route.h>
117 #include <net/pfil.h>
119 #include <netinet/in.h>
120 #include <netinet/in_systm.h>
121 #include <netinet/ip.h>
122 #include <netinet/in_pcb.h>
123 #include <netinet/in_var.h>
124 #include <netinet/ip_var.h>
125 #include <netinet/ip_private.h>
126 #include <netinet/in_offload.h>
129 #include <netinet/ip_mroute.h>
132 #include <machine/stdarg.h>
135 #include <netinet6/ipsec.h>
136 #include <netinet6/ipsec_private.h>
137 #include <netkey/key.h>
138 #include <netkey/key_debug.h>
142 #include <netipsec/ipsec.h>
143 #include <netipsec/key.h>
144 #include <netipsec/xform.h>
145 #endif /* FAST_IPSEC*/
148 #include <netinet/udp.h>
151 static struct mbuf
*ip_insertoptions(struct mbuf
*, struct mbuf
*, int *);
152 static struct ifnet
*ip_multicast_if(struct in_addr
*, int *);
153 static void ip_mloopback(struct ifnet
*, struct mbuf
*,
154 const struct sockaddr_in
*);
157 extern struct pfil_head inet_pfil_hook
; /* XXX */
160 int ip_do_loopback_cksum
= 0;
163 * IP output. The packet in mbuf chain m contains a skeletal IP
164 * header (with len, off, ttl, proto, tos, src, dst).
165 * The mbuf chain containing the packet will be freed.
166 * The mbuf opt, if present, will not be freed.
169 ip_output(struct mbuf
*m0
, ...)
175 int hlen
= sizeof (struct ip
);
177 struct route iproute
;
178 const struct sockaddr_in
*dst
;
179 struct in_ifaddr
*ia
;
186 struct ip_moptions
*imo
;
193 struct secpolicy
*sp
= NULL
;
197 struct secpolicy
*sp
= NULL
;
203 struct sockaddr_in dst4
;
205 struct sockaddr
*rdst
= &u
.dst
; /* real IP destination, as opposed
211 opt
= va_arg(ap
, struct mbuf
*);
212 ro
= va_arg(ap
, struct route
*);
213 flags
= va_arg(ap
, int);
214 imo
= va_arg(ap
, struct ip_moptions
*);
215 so
= va_arg(ap
, struct socket
*);
216 if (flags
& IP_RETURNMTU
)
217 mtu_p
= va_arg(ap
, int *);
222 MCLAIM(m
, &ip_tx_mowner
);
224 if (so
!= NULL
&& so
->so_proto
->pr_domain
->dom_family
== AF_INET
)
225 inp
= (struct inpcb
*)so
->so_pcb
;
228 #endif /* FAST_IPSEC */
231 if ((m
->m_flags
& M_PKTHDR
) == 0)
232 panic("ip_output: no HDR");
234 if ((m
->m_pkthdr
.csum_flags
& (M_CSUM_TCPv6
|M_CSUM_UDPv6
)) != 0) {
235 panic("ip_output: IPv6 checksum offload flags: %d",
236 m
->m_pkthdr
.csum_flags
);
239 if ((m
->m_pkthdr
.csum_flags
& (M_CSUM_TCPv4
|M_CSUM_UDPv4
)) ==
240 (M_CSUM_TCPv4
|M_CSUM_UDPv4
)) {
241 panic("ip_output: conflicting checksum offload flags: %d",
242 m
->m_pkthdr
.csum_flags
);
246 m
= ip_insertoptions(m
, opt
, &len
);
247 if (len
>= sizeof(struct ip
))
250 ip
= mtod(m
, struct ip
*);
254 if ((flags
& (IP_FORWARDING
|IP_RAWOUTPUT
)) == 0) {
255 ip
->ip_v
= IPVERSION
;
256 ip
->ip_off
= htons(0);
257 /* ip->ip_id filled in after we find out source ia */
258 ip
->ip_hl
= hlen
>> 2;
259 IP_STATINC(IP_STAT_LOCALOUT
);
261 hlen
= ip
->ip_hl
<< 2;
266 memset(&iproute
, 0, sizeof(iproute
));
269 sockaddr_in_init(&u
.dst4
, &ip
->ip_dst
, 0);
270 dst
= satocsin(rtcache_getdst(ro
));
272 * If there is a cached route,
273 * check that it is to the same destination
274 * and is still up. If not, free it and try again.
275 * The address family should also be checked in case of sharing the
280 else if (dst
->sin_family
!= AF_INET
||
281 !in_hosteq(dst
->sin_addr
, ip
->ip_dst
))
284 if ((rt
= rtcache_validate(ro
)) == NULL
&&
285 (rt
= rtcache_update(ro
, 1)) == NULL
) {
287 rtcache_setdst(ro
, &u
.dst
);
290 * If routing to interface only,
291 * short circuit routing lookup.
293 if (flags
& IP_ROUTETOIF
) {
294 if ((ia
= ifatoia(ifa_ifwithladdr(sintocsa(dst
)))) == NULL
) {
295 IP_STATINC(IP_STAT_NOROUTE
);
302 } else if ((IN_MULTICAST(ip
->ip_dst
.s_addr
) ||
303 ip
->ip_dst
.s_addr
== INADDR_BROADCAST
) &&
304 imo
!= NULL
&& imo
->imo_multicast_ifp
!= NULL
) {
305 ifp
= imo
->imo_multicast_ifp
;
310 rt
= rtcache_init(ro
);
312 IP_STATINC(IP_STAT_NOROUTE
);
313 error
= EHOSTUNREACH
;
316 ia
= ifatoia(rt
->rt_ifa
);
318 if ((mtu
= rt
->rt_rmx
.rmx_mtu
) == 0)
321 if (rt
->rt_flags
& RTF_GATEWAY
)
322 dst
= satosin(rt
->rt_gateway
);
324 if (IN_MULTICAST(ip
->ip_dst
.s_addr
) ||
325 (ip
->ip_dst
.s_addr
== INADDR_BROADCAST
)) {
326 struct in_multi
*inm
;
328 m
->m_flags
|= (ip
->ip_dst
.s_addr
== INADDR_BROADCAST
) ?
331 * See if the caller provided any multicast options
334 ip
->ip_ttl
= imo
->imo_multicast_ttl
;
336 ip
->ip_ttl
= IP_DEFAULT_MULTICAST_TTL
;
339 * if we don't know the outgoing ifp yet, we can't generate
343 IP_STATINC(IP_STAT_NOROUTE
);
349 * If the packet is multicast or broadcast, confirm that
350 * the outgoing interface can transmit it.
352 if (((m
->m_flags
& M_MCAST
) &&
353 (ifp
->if_flags
& IFF_MULTICAST
) == 0) ||
354 ((m
->m_flags
& M_BCAST
) &&
355 (ifp
->if_flags
& (IFF_BROADCAST
|IFF_POINTOPOINT
)) == 0)) {
356 IP_STATINC(IP_STAT_NOROUTE
);
361 * If source address not specified yet, use an address
362 * of outgoing interface.
364 if (in_nullhost(ip
->ip_src
)) {
365 struct in_ifaddr
*xia
;
369 error
= EADDRNOTAVAIL
;
373 if (xifa
->ifa_getifa
!= NULL
) {
374 xia
= ifatoia((*xifa
->ifa_getifa
)(xifa
, rdst
));
376 ip
->ip_src
= xia
->ia_addr
.sin_addr
;
379 IN_LOOKUP_MULTI(ip
->ip_dst
, ifp
, inm
);
381 (imo
== NULL
|| imo
->imo_multicast_loop
)) {
383 * If we belong to the destination multicast group
384 * on the outgoing interface, and the caller did not
385 * forbid loopback, loop back a copy.
387 ip_mloopback(ifp
, m
, &u
.dst4
);
392 * If we are acting as a multicast router, perform
393 * multicast forwarding as if the packet had just
394 * arrived on the interface to which we are about
395 * to send. The multicast forwarding function
396 * recursively calls this function, using the
397 * IP_FORWARDING flag to prevent infinite recursion.
399 * Multicasts that are looped back by ip_mloopback(),
400 * above, will be forwarded by the ip_input() routine,
403 extern struct socket
*ip_mrouter
;
405 if (ip_mrouter
&& (flags
& IP_FORWARDING
) == 0) {
406 if (ip_mforward(m
, ifp
) != 0) {
414 * Multicasts with a time-to-live of zero may be looped-
415 * back, above, but must not be transmitted on a network.
416 * Also, multicasts addressed to the loopback interface
417 * are not sent -- the above call to ip_mloopback() will
418 * loop back a copy if this host actually belongs to the
419 * destination group on the loopback interface.
421 if (ip
->ip_ttl
== 0 || (ifp
->if_flags
& IFF_LOOPBACK
) != 0) {
429 * If source address not specified yet, use address
430 * of outgoing interface.
432 if (in_nullhost(ip
->ip_src
)) {
434 if (xifa
->ifa_getifa
!= NULL
)
435 ia
= ifatoia((*xifa
->ifa_getifa
)(xifa
, rdst
));
436 ip
->ip_src
= ia
->ia_addr
.sin_addr
;
440 * packets with Class-D address as source are not valid per
443 if (IN_MULTICAST(ip
->ip_src
.s_addr
)) {
444 IP_STATINC(IP_STAT_ODROPPED
);
445 error
= EADDRNOTAVAIL
;
450 * Look for broadcast address and
451 * and verify user is allowed to send
454 if (in_broadcast(dst
->sin_addr
, ifp
)) {
455 if ((ifp
->if_flags
& IFF_BROADCAST
) == 0) {
456 error
= EADDRNOTAVAIL
;
459 if ((flags
& IP_ALLOWBROADCAST
) == 0) {
463 /* don't allow broadcast messages to be fragmented */
464 if (ntohs(ip
->ip_len
) > ifp
->if_mtu
) {
468 m
->m_flags
|= M_BCAST
;
470 m
->m_flags
&= ~M_BCAST
;
473 if ((flags
& (IP_FORWARDING
|IP_NOIPNEWID
)) == 0) {
474 if (m
->m_pkthdr
.len
< IP_MINFRAGSIZE
) {
476 } else if ((m
->m_pkthdr
.csum_flags
& M_CSUM_TSOv4
) == 0) {
477 ip
->ip_id
= ip_newid(ia
);
481 * TSO capable interfaces (typically?) increment
482 * ip_id for each segment.
483 * "allocate" enough ids here to increase the chance
484 * for them to be unique.
486 * note that the following calculation is not
487 * needed to be precise. wasting some ip_id is fine.
490 unsigned int segsz
= m
->m_pkthdr
.segsz
;
491 unsigned int datasz
= ntohs(ip
->ip_len
) - hlen
;
492 unsigned int num
= howmany(datasz
, segsz
);
494 ip
->ip_id
= ip_newid_range(ia
, num
);
498 * If we're doing Path MTU Discovery, we need to set DF unless
499 * the route's MTU is locked.
501 if ((flags
& IP_MTUDISC
) != 0 && rt
!= NULL
&&
502 (rt
->rt_rmx
.rmx_locks
& RTV_MTU
) == 0)
503 ip
->ip_off
|= htons(IP_DF
);
505 /* Remember the current ip_len */
506 ip_len
= ntohs(ip
->ip_len
);
509 /* get SP for this packet */
511 sp
= ipsec4_getpolicybyaddr(m
, IPSEC_DIR_OUTBOUND
,
514 if (IPSEC_PCB_SKIP_IPSEC(sotoinpcb_hdr(so
)->inph_sp
,
517 sp
= ipsec4_getpolicybysock(m
, IPSEC_DIR_OUTBOUND
, so
, &error
);
521 IPSEC_STATINC(IPSEC_STAT_IN_INVAL
);
528 switch (sp
->policy
) {
529 case IPSEC_POLICY_DISCARD
:
531 * This packet is just discarded.
533 IPSEC_STATINC(IPSEC_STAT_OUT_POLVIO
);
536 case IPSEC_POLICY_BYPASS
:
537 case IPSEC_POLICY_NONE
:
538 /* no need to do IPsec. */
541 case IPSEC_POLICY_IPSEC
:
542 if (sp
->req
== NULL
) {
543 /* XXX should be panic ? */
544 printf("ip_output: No IPsec request specified.\n");
550 case IPSEC_POLICY_ENTRUST
:
552 printf("ip_output: Invalid policy found. %d\n", sp
->policy
);
557 * NAT-T ESP fragmentation: don't do IPSec processing now,
558 * we'll do it on each fragmented packet.
561 ((sp
->req
->sav
->natt_type
& UDP_ENCAP_ESPINUDP
) ||
562 (sp
->req
->sav
->natt_type
& UDP_ENCAP_ESPINUDP_NON_IKE
))) {
563 if (ntohs(ip
->ip_len
) > sp
->req
->sav
->esp_frag
) {
565 mtu
= sp
->req
->sav
->esp_frag
;
569 #endif /* IPSEC_NAT_T */
572 * ipsec4_output() expects ip_len and ip_off in network
573 * order. They have been set to network order above.
577 struct ipsec_output_state state
;
578 memset(&state
, 0, sizeof(state
));
580 if (flags
& IP_ROUTETOIF
) {
582 memset(&iproute
, 0, sizeof(iproute
));
585 state
.dst
= sintocsa(dst
);
588 * We can't defer the checksum of payload data if
589 * we're about to encrypt/authenticate it.
591 * XXX When we support crypto offloading functions of
592 * XXX network interfaces, we need to reconsider this,
593 * XXX since it's likely that they'll support checksumming,
596 if (m
->m_pkthdr
.csum_flags
& (M_CSUM_TCPv4
|M_CSUM_UDPv4
)) {
598 m
->m_pkthdr
.csum_flags
&= ~(M_CSUM_TCPv4
|M_CSUM_UDPv4
);
601 error
= ipsec4_output(&state
, sp
, flags
);
604 if (flags
& IP_ROUTETOIF
) {
606 * if we have tunnel mode SA, we may need to ignore
609 if (state
.ro
!= &iproute
||
610 rtcache_validate(state
.ro
) != NULL
) {
611 flags
&= ~IP_ROUTETOIF
;
616 dst
= satocsin(state
.dst
);
618 /* mbuf is already reclaimed in ipsec4_output. */
628 printf("ip4_output (ipsec): error code %d\n", error
);
631 /* don't show these error codes to the user */
638 /* be sure to update variables that are affected by ipsec4_output() */
639 ip
= mtod(m
, struct ip
*);
640 hlen
= ip
->ip_hl
<< 2;
641 ip_len
= ntohs(ip
->ip_len
);
643 if ((rt
= rtcache_validate(ro
)) == NULL
) {
644 if ((flags
& IP_ROUTETOIF
) == 0) {
646 "can't update route after IPsec processing\n");
647 error
= EHOSTUNREACH
; /*XXX*/
651 /* nobody uses ia beyond here */
654 if ((mtu
= rt
->rt_rmx
.rmx_mtu
) == 0)
663 * Check the security policy (SP) for the packet and, if
664 * required, do IPsec-related processing. There are two
665 * cases here; the first time a packet is sent through
666 * it will be untagged and handled by ipsec4_checkpolicy.
667 * If the packet is resubmitted to ip_output (e.g. after
668 * AH, ESP, etc. processing), there will be a tag to bypass
669 * the lookup and related policy checking.
671 if (!ipsec_outdone(m
)) {
674 IPSEC_PCB_SKIP_IPSEC(inp
->inp_sp
, IPSEC_DIR_OUTBOUND
)) {
678 sp
= ipsec4_checkpolicy(m
, IPSEC_DIR_OUTBOUND
, flags
,
681 * There are four return cases:
682 * sp != NULL apply IPsec policy
683 * sp == NULL, error == 0 no IPsec handling needed
684 * sp == NULL, error == -EINVAL discard packet w/o error
685 * sp == NULL, error != 0 discard packet, report error
690 * NAT-T ESP fragmentation: don't do IPSec processing now,
691 * we'll do it on each fragmented packet.
694 ((sp
->req
->sav
->natt_type
& UDP_ENCAP_ESPINUDP
) ||
695 (sp
->req
->sav
->natt_type
& UDP_ENCAP_ESPINUDP_NON_IKE
))) {
696 if (ntohs(ip
->ip_len
) > sp
->req
->sav
->esp_frag
) {
698 mtu
= sp
->req
->sav
->esp_frag
;
703 #endif /* IPSEC_NAT_T */
706 * Do delayed checksums now because we send before
707 * this is done in the normal processing path.
709 if (m
->m_pkthdr
.csum_flags
& (M_CSUM_TCPv4
|M_CSUM_UDPv4
)) {
711 m
->m_pkthdr
.csum_flags
&= ~(M_CSUM_TCPv4
|M_CSUM_UDPv4
);
715 ip
->ip_len
= htons(ip
->ip_len
);
716 ip
->ip_off
= htons(ip
->ip_off
);
719 /* NB: callee frees mbuf */
720 error
= ipsec4_process_packet(m
, sp
->req
, flags
, 0);
722 * Preserve KAME behaviour: ENOENT can be returned
723 * when an SA acquire is in progress. Don't propagate
724 * this to user-level; it confuses applications.
726 * XXX this will go away when the SADB is redone.
737 * Hack: -EINVAL is used to signal that a packet
738 * should be silently discarded. This is typically
739 * because we asked key management for an SA and
740 * it was delayed (e.g. kicked up to IKE).
742 if (error
== -EINVAL
)
746 /* No IPsec processing for this packet. */
751 #endif /* FAST_IPSEC */
755 * Run through list of hooks for output packets.
757 if ((error
= pfil_run_hooks(&inet_pfil_hook
, &m
, ifp
, PFIL_OUT
)) != 0)
762 ip
= mtod(m
, struct ip
*);
763 hlen
= ip
->ip_hl
<< 2;
764 ip_len
= ntohs(ip
->ip_len
);
765 #endif /* PFIL_HOOKS */
767 m
->m_pkthdr
.csum_data
|= hlen
<< 16;
771 * search for the source address structure to
772 * maintain output statistics.
774 INADDR_TO_IA(ip
->ip_src
, ia
);
777 /* Maybe skip checksums on loopback interfaces. */
778 if (IN_NEED_CHECKSUM(ifp
, M_CSUM_IPv4
)) {
779 m
->m_pkthdr
.csum_flags
|= M_CSUM_IPv4
;
781 sw_csum
= m
->m_pkthdr
.csum_flags
& ~ifp
->if_csum_flags_tx
;
783 * If small enough for mtu of path, or if using TCP segmentation
784 * offload, can just send directly.
787 (m
->m_pkthdr
.csum_flags
& M_CSUM_TSOv4
) != 0) {
790 ia
->ia_ifa
.ifa_data
.ifad_outbytes
+= ip_len
;
793 * Always initialize the sum to 0! Some HW assisted
794 * checksumming requires this.
798 if ((m
->m_pkthdr
.csum_flags
& M_CSUM_TSOv4
) == 0) {
800 * Perform any checksums that the hardware can't do
803 * XXX Does any hardware require the {th,uh}_sum
804 * XXX fields to be 0?
806 if (sw_csum
& M_CSUM_IPv4
) {
807 KASSERT(IN_NEED_CHECKSUM(ifp
, M_CSUM_IPv4
));
808 ip
->ip_sum
= in_cksum(m
, hlen
);
809 m
->m_pkthdr
.csum_flags
&= ~M_CSUM_IPv4
;
811 if (sw_csum
& (M_CSUM_TCPv4
|M_CSUM_UDPv4
)) {
812 if (IN_NEED_CHECKSUM(ifp
,
813 sw_csum
& (M_CSUM_TCPv4
|M_CSUM_UDPv4
))) {
816 m
->m_pkthdr
.csum_flags
&=
817 ~(M_CSUM_TCPv4
|M_CSUM_UDPv4
);
822 /* clean ipsec history once it goes out of the node */
827 (m
->m_pkthdr
.csum_flags
& M_CSUM_TSOv4
) == 0 ||
828 (ifp
->if_capenable
& IFCAP_TSOv4
) != 0)) {
830 (*ifp
->if_output
)(ifp
, m
,
831 (m
->m_flags
& M_MCAST
) ?
832 sintocsa(rdst
) : sintocsa(dst
),
836 ip_tso_output(ifp
, m
,
837 (m
->m_flags
& M_MCAST
) ?
838 sintocsa(rdst
) : sintocsa(dst
),
845 * We can't use HW checksumming if we're about to
846 * to fragment the packet.
848 * XXX Some hardware can do this.
850 if (m
->m_pkthdr
.csum_flags
& (M_CSUM_TCPv4
|M_CSUM_UDPv4
)) {
851 if (IN_NEED_CHECKSUM(ifp
,
852 m
->m_pkthdr
.csum_flags
& (M_CSUM_TCPv4
|M_CSUM_UDPv4
))) {
855 m
->m_pkthdr
.csum_flags
&= ~(M_CSUM_TCPv4
|M_CSUM_UDPv4
);
859 * Too large for interface; fragment if possible.
860 * Must be able to put at least 8 bytes per fragment.
862 if (ntohs(ip
->ip_off
) & IP_DF
) {
863 if (flags
& IP_RETURNMTU
)
866 IP_STATINC(IP_STAT_CANTFRAG
);
870 error
= ip_fragment(m
, ifp
, mtu
);
882 ia
->ia_ifa
.ifa_data
.ifad_outbytes
+=
886 /* clean ipsec history once it goes out of the node */
892 * If we get there, the packet has not been handeld by
893 * IPSec whereas it should have. Now that it has been
894 * fragmented, re-inject it in ip_output so that IPsec
895 * processing can occur.
898 error
= ip_output(m
, opt
,
899 ro
, flags
, imo
, so
, mtu_p
);
901 #endif /* IPSEC_NAT_T */
903 KASSERT((m
->m_pkthdr
.csum_flags
&
904 (M_CSUM_UDPv4
| M_CSUM_TCPv4
)) == 0);
905 error
= (*ifp
->if_output
)(ifp
, m
,
906 (m
->m_flags
& M_MCAST
) ?
907 sintocsa(rdst
) : sintocsa(dst
),
915 IP_STATINC(IP_STAT_FRAGMENTED
);
917 rtcache_free(&iproute
);
921 KEYDEBUG(KEYDEBUG_IPSEC_STAMP
,
922 printf("DP ip_output call free SP:%p\n", sp
));
929 #endif /* FAST_IPSEC */
938 ip_fragment(struct mbuf
*m
, struct ifnet
*ifp
, u_long mtu
)
940 struct ip
*ip
, *mhip
;
945 int sw_csum
= m
->m_pkthdr
.csum_flags
;
950 ip
= mtod(m
, struct ip
*);
951 hlen
= ip
->ip_hl
<< 2;
953 sw_csum
&= ~ifp
->if_csum_flags_tx
;
955 len
= (mtu
- hlen
) &~ 7;
962 mnext
= &m
->m_nextpkt
;
965 * Loop through length of segment after first fragment,
966 * make new header and copy data of each part and link onto chain.
969 mhlen
= sizeof (struct ip
);
970 for (off
= hlen
+ len
; off
< ntohs(ip
->ip_len
); off
+= len
) {
971 MGETHDR(m
, M_DONTWAIT
, MT_HEADER
);
974 IP_STATINC(IP_STAT_ODROPPED
);
977 MCLAIM(m
, m0
->m_owner
);
979 mnext
= &m
->m_nextpkt
;
980 m
->m_data
+= max_linkhdr
;
981 mhip
= mtod(m
, struct ip
*);
983 /* we must inherit MCAST and BCAST flags */
984 m
->m_flags
|= m0
->m_flags
& (M_MCAST
|M_BCAST
);
985 if (hlen
> sizeof (struct ip
)) {
986 mhlen
= ip_optcopy(ip
, mhip
) + sizeof (struct ip
);
987 mhip
->ip_hl
= mhlen
>> 2;
990 mhip
->ip_off
= ((off
- hlen
) >> 3) +
991 (ntohs(ip
->ip_off
) & ~IP_MF
);
992 if (ip
->ip_off
& htons(IP_MF
))
993 mhip
->ip_off
|= IP_MF
;
994 if (off
+ len
>= ntohs(ip
->ip_len
))
995 len
= ntohs(ip
->ip_len
) - off
;
997 mhip
->ip_off
|= IP_MF
;
999 mhip
->ip_len
= htons((u_int16_t
)(len
+ mhlen
));
1000 m
->m_next
= m_copym(m0
, off
, len
, M_DONTWAIT
);
1001 if (m
->m_next
== 0) {
1002 error
= ENOBUFS
; /* ??? */
1003 IP_STATINC(IP_STAT_ODROPPED
);
1006 m
->m_pkthdr
.len
= mhlen
+ len
;
1007 m
->m_pkthdr
.rcvif
= (struct ifnet
*)0;
1009 if (sw_csum
& M_CSUM_IPv4
) {
1010 mhip
->ip_sum
= in_cksum(m
, mhlen
);
1011 KASSERT((m
->m_pkthdr
.csum_flags
& M_CSUM_IPv4
) == 0);
1013 m
->m_pkthdr
.csum_flags
|= M_CSUM_IPv4
;
1014 m
->m_pkthdr
.csum_data
|= mhlen
<< 16;
1016 IP_STATINC(IP_STAT_OFRAGMENTS
);
1020 * Update first fragment by trimming what's been copied out
1021 * and updating header, then send each fragment (in order).
1024 m_adj(m
, hlen
+ firstlen
- ntohs(ip
->ip_len
));
1025 m
->m_pkthdr
.len
= hlen
+ firstlen
;
1026 ip
->ip_len
= htons((u_int16_t
)m
->m_pkthdr
.len
);
1027 ip
->ip_off
|= htons(IP_MF
);
1029 if (sw_csum
& M_CSUM_IPv4
) {
1030 ip
->ip_sum
= in_cksum(m
, hlen
);
1031 m
->m_pkthdr
.csum_flags
&= ~M_CSUM_IPv4
;
1033 KASSERT(m
->m_pkthdr
.csum_flags
& M_CSUM_IPv4
);
1034 KASSERT(M_CSUM_DATA_IPv4_IPHL(m
->m_pkthdr
.csum_data
) >=
1039 * If there is no room for all the fragments, don't queue
1044 if (ifp
->if_snd
.ifq_maxlen
- ifp
->if_snd
.ifq_len
< fragments
&&
1047 IP_STATINC(IP_STAT_ODROPPED
);
1048 IFQ_INC_DROPS(&ifp
->if_snd
);
1053 for (m
= m0
; m
; m
= m0
) {
1055 m
->m_nextpkt
= NULL
;
1063 * Process a delayed payload checksum calculation.
1066 in_delayed_cksum(struct mbuf
*m
)
1069 u_int16_t csum
, offset
;
1071 ip
= mtod(m
, struct ip
*);
1072 offset
= ip
->ip_hl
<< 2;
1073 csum
= in4_cksum(m
, 0, offset
, ntohs(ip
->ip_len
) - offset
);
1074 if (csum
== 0 && (m
->m_pkthdr
.csum_flags
& M_CSUM_UDPv4
) != 0)
1077 offset
+= M_CSUM_DATA_IPv4_OFFSET(m
->m_pkthdr
.csum_data
);
1079 if ((offset
+ sizeof(u_int16_t
)) > m
->m_len
) {
1080 /* This happen when ip options were inserted
1081 printf("in_delayed_cksum: pullup len %d off %d proto %d\n",
1082 m->m_len, offset, ip->ip_p);
1084 m_copyback(m
, offset
, sizeof(csum
), (void *) &csum
);
1086 *(u_int16_t
*)(mtod(m
, char *) + offset
) = csum
;
1090 * Determine the maximum length of the options to be inserted;
1091 * we would far rather allocate too much space rather than too little.
1095 ip_optlen(struct inpcb
*inp
)
1097 struct mbuf
*m
= inp
->inp_options
;
1099 if (m
&& m
->m_len
> offsetof(struct ipoption
, ipopt_dst
))
1100 return (m
->m_len
- offsetof(struct ipoption
, ipopt_dst
));
1107 * Insert IP options into preformed packet.
1108 * Adjust IP destination as required for IP source routing,
1109 * as indicated by a non-zero in_addr at the start of the options.
1111 static struct mbuf
*
1112 ip_insertoptions(struct mbuf
*m
, struct mbuf
*opt
, int *phlen
)
1114 struct ipoption
*p
= mtod(opt
, struct ipoption
*);
1116 struct ip
*ip
= mtod(m
, struct ip
*);
1119 optlen
= opt
->m_len
- sizeof(p
->ipopt_dst
);
1120 if (optlen
+ ntohs(ip
->ip_len
) > IP_MAXPACKET
)
1121 return (m
); /* XXX should fail */
1122 if (!in_nullhost(p
->ipopt_dst
))
1123 ip
->ip_dst
= p
->ipopt_dst
;
1124 if (M_READONLY(m
) || M_LEADINGSPACE(m
) < optlen
) {
1125 MGETHDR(n
, M_DONTWAIT
, MT_HEADER
);
1128 MCLAIM(n
, m
->m_owner
);
1129 M_MOVE_PKTHDR(n
, m
);
1130 m
->m_len
-= sizeof(struct ip
);
1131 m
->m_data
+= sizeof(struct ip
);
1134 m
->m_len
= optlen
+ sizeof(struct ip
);
1135 m
->m_data
+= max_linkhdr
;
1136 bcopy((void *)ip
, mtod(m
, void *), sizeof(struct ip
));
1138 m
->m_data
-= optlen
;
1140 memmove(mtod(m
, void *), ip
, sizeof(struct ip
));
1142 m
->m_pkthdr
.len
+= optlen
;
1143 ip
= mtod(m
, struct ip
*);
1144 bcopy((void *)p
->ipopt_list
, (void *)(ip
+ 1), (unsigned)optlen
);
1145 *phlen
= sizeof(struct ip
) + optlen
;
1146 ip
->ip_len
= htons(ntohs(ip
->ip_len
) + optlen
);
1151 * Copy options from ip to jp,
1152 * omitting those not copied during fragmentation.
1155 ip_optcopy(struct ip
*ip
, struct ip
*jp
)
1158 int opt
, optlen
, cnt
;
1160 cp
= (u_char
*)(ip
+ 1);
1161 dp
= (u_char
*)(jp
+ 1);
1162 cnt
= (ip
->ip_hl
<< 2) - sizeof (struct ip
);
1163 for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
1165 if (opt
== IPOPT_EOL
)
1167 if (opt
== IPOPT_NOP
) {
1168 /* Preserve for IP mcast tunnel's LSRR alignment. */
1174 if (cnt
< IPOPT_OLEN
+ sizeof(*cp
))
1175 panic("malformed IPv4 option passed to ip_optcopy");
1177 optlen
= cp
[IPOPT_OLEN
];
1179 if (optlen
< IPOPT_OLEN
+ sizeof(*cp
) || optlen
> cnt
)
1180 panic("malformed IPv4 option passed to ip_optcopy");
1182 /* bogus lengths should have been caught by ip_dooptions */
1185 if (IPOPT_COPIED(opt
)) {
1186 bcopy((void *)cp
, (void *)dp
, (unsigned)optlen
);
1190 for (optlen
= dp
- (u_char
*)(jp
+1); optlen
& 0x3; optlen
++)
1196 * IP socket option processing.
1199 ip_ctloutput(int op
, struct socket
*so
, struct sockopt
*sopt
)
1201 struct inpcb
*inp
= sotoinpcb(so
);
1204 #if defined(IPSEC) || defined(FAST_IPSEC)
1205 struct lwp
*l
= curlwp
; /*XXX*/
1208 if (sopt
->sopt_level
!= IPPROTO_IP
) {
1209 if (sopt
->sopt_level
== SOL_SOCKET
&& sopt
->sopt_name
== SO_NOHEADER
)
1216 switch (sopt
->sopt_name
) {
1221 error
= ip_pcbopts(&inp
->inp_options
, sopt
);
1228 case IP_RECVRETOPTS
:
1229 case IP_RECVDSTADDR
:
1232 error
= sockopt_getint(sopt
, &optval
);
1236 switch (sopt
->sopt_name
) {
1238 inp
->inp_ip
.ip_tos
= optval
;
1242 inp
->inp_ip
.ip_ttl
= optval
;
1246 if (optval
> 0 && optval
<= MAXTTL
)
1247 inp
->inp_ip_minttl
= optval
;
1251 #define OPTSET(bit) \
1253 inp->inp_flags |= bit; \
1255 inp->inp_flags &= ~bit;
1258 OPTSET(INP_RECVOPTS
);
1261 case IP_RECVRETOPTS
:
1262 OPTSET(INP_RECVRETOPTS
);
1265 case IP_RECVDSTADDR
:
1266 OPTSET(INP_RECVDSTADDR
);
1274 OPTSET(INP_RECVTTL
);
1280 case IP_MULTICAST_IF
:
1281 case IP_MULTICAST_TTL
:
1282 case IP_MULTICAST_LOOP
:
1283 case IP_ADD_MEMBERSHIP
:
1284 case IP_DROP_MEMBERSHIP
:
1285 error
= ip_setmoptions(&inp
->inp_moptions
, sopt
);
1289 error
= sockopt_getint(sopt
, &optval
);
1293 /* INP_LOCK(inp); */
1295 case IP_PORTRANGE_DEFAULT
:
1296 case IP_PORTRANGE_HIGH
:
1297 inp
->inp_flags
&= ~(INP_LOWPORT
);
1300 case IP_PORTRANGE_LOW
:
1301 inp
->inp_flags
|= INP_LOWPORT
;
1308 /* INP_UNLOCK(inp); */
1311 #if defined(IPSEC) || defined(FAST_IPSEC)
1312 case IP_IPSEC_POLICY
:
1314 error
= ipsec4_set_policy(inp
, sopt
->sopt_name
,
1315 sopt
->sopt_data
, sopt
->sopt_size
, l
->l_cred
);
1321 error
= ENOPROTOOPT
;
1327 switch (sopt
->sopt_name
) {
1330 if (inp
->inp_options
) {
1333 m
= m_copym(inp
->inp_options
, 0, M_COPYALL
,
1340 error
= sockopt_setmbuf(sopt
, m
);
1348 case IP_RECVRETOPTS
:
1349 case IP_RECVDSTADDR
:
1353 switch (sopt
->sopt_name
) {
1355 optval
= inp
->inp_ip
.ip_tos
;
1359 optval
= inp
->inp_ip
.ip_ttl
;
1363 optval
= inp
->inp_ip_minttl
;
1367 optval
= inp
->inp_errormtu
;
1370 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1373 optval
= OPTBIT(INP_RECVOPTS
);
1376 case IP_RECVRETOPTS
:
1377 optval
= OPTBIT(INP_RECVRETOPTS
);
1380 case IP_RECVDSTADDR
:
1381 optval
= OPTBIT(INP_RECVDSTADDR
);
1385 optval
= OPTBIT(INP_RECVIF
);
1389 optval
= OPTBIT(INP_RECVTTL
);
1392 error
= sockopt_setint(sopt
, optval
);
1395 #if 0 /* defined(IPSEC) || defined(FAST_IPSEC) */
1396 case IP_IPSEC_POLICY
:
1398 struct mbuf
*m
= NULL
;
1400 /* XXX this will return EINVAL as sopt is empty */
1401 error
= ipsec4_get_policy(inp
, sopt
->sopt_data
,
1402 sopt
->sopt_size
, &m
);
1404 error
= sockopt_setmbuf(sopt
, m
);
1409 case IP_MULTICAST_IF
:
1410 case IP_MULTICAST_TTL
:
1411 case IP_MULTICAST_LOOP
:
1412 case IP_ADD_MEMBERSHIP
:
1413 case IP_DROP_MEMBERSHIP
:
1414 error
= ip_getmoptions(inp
->inp_moptions
, sopt
);
1418 if (inp
->inp_flags
& INP_LOWPORT
)
1419 optval
= IP_PORTRANGE_LOW
;
1421 optval
= IP_PORTRANGE_DEFAULT
;
1423 error
= sockopt_setint(sopt
, optval
);
1428 error
= ENOPROTOOPT
;
1437 * Set up IP options in pcb for insertion in output packets.
1438 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1439 * with destination address if source routed.
1442 ip_pcbopts(struct mbuf
**pcbopt
, const struct sockopt
*sopt
)
1448 uint8_t optval
, olen
, offset
;
1450 /* turn off any old options */
1452 (void)m_free(*pcbopt
);
1455 cp
= sopt
->sopt_data
;
1456 cnt
= sopt
->sopt_size
;
1459 return (0); /* Only turning off any previous options */
1462 if (cnt
% sizeof(int32_t))
1466 m
= m_get(M_DONTWAIT
, MT_SOOPTS
);
1470 dp
= mtod(m
, u_char
*);
1471 memset(dp
, 0, sizeof(struct in_addr
));
1472 dp
+= sizeof(struct in_addr
);
1473 m
->m_len
= sizeof(struct in_addr
);
1476 * IP option list according to RFC791. Each option is of the form
1478 * [optval] [olen] [(olen - 2) data bytes]
1480 * we validate the list and copy options to an mbuf for prepending
1481 * to data packets. The IP first-hop destination address will be
1482 * stored before actual options and is zero if unset.
1485 optval
= cp
[IPOPT_OPTVAL
];
1487 if (optval
== IPOPT_EOL
|| optval
== IPOPT_NOP
) {
1490 if (cnt
< IPOPT_OLEN
+ 1)
1493 olen
= cp
[IPOPT_OLEN
];
1494 if (olen
< IPOPT_OLEN
+ 1 || olen
> cnt
)
1498 if (optval
== IPOPT_LSRR
|| optval
== IPOPT_SSRR
) {
1500 * user process specifies route as:
1502 * D must be our final destination (but we can't
1503 * check that since we may not have connected yet).
1504 * A is first hop destination, which doesn't appear in
1505 * actual IP option, but is stored before the options.
1507 if (olen
< IPOPT_OFFSET
+ 1 + sizeof(struct in_addr
))
1510 offset
= cp
[IPOPT_OFFSET
];
1511 memcpy(mtod(m
, u_char
*), cp
+ IPOPT_OFFSET
+ 1,
1512 sizeof(struct in_addr
));
1514 cp
+= sizeof(struct in_addr
);
1515 cnt
-= sizeof(struct in_addr
);
1516 olen
-= sizeof(struct in_addr
);
1518 if (m
->m_len
+ olen
> MAX_IPOPTLEN
+ sizeof(struct in_addr
))
1521 memcpy(dp
, cp
, olen
);
1522 dp
[IPOPT_OPTVAL
] = optval
;
1523 dp
[IPOPT_OLEN
] = olen
;
1524 dp
[IPOPT_OFFSET
] = offset
;
1527 if (m
->m_len
+ olen
> MAX_IPOPTLEN
+ sizeof(struct in_addr
))
1530 memcpy(dp
, cp
, olen
);
1537 if (optval
== IPOPT_EOL
)
1553 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1555 static struct ifnet
*
1556 ip_multicast_if(struct in_addr
*a
, int *ifindexp
)
1559 struct ifnet
*ifp
= NULL
;
1560 struct in_ifaddr
*ia
;
1564 if (ntohl(a
->s_addr
) >> 24 == 0) {
1565 ifindex
= ntohl(a
->s_addr
) & 0xffffff;
1566 if (ifindex
< 0 || if_indexlim
<= ifindex
)
1568 ifp
= ifindex2ifnet
[ifindex
];
1572 *ifindexp
= ifindex
;
1574 LIST_FOREACH(ia
, &IN_IFADDR_HASH(a
->s_addr
), ia_hash
) {
1575 if (in_hosteq(ia
->ia_addr
.sin_addr
, *a
) &&
1576 (ia
->ia_ifp
->if_flags
& IFF_MULTICAST
) != 0) {
1586 ip_getoptval(const struct sockopt
*sopt
, u_int8_t
*val
, u_int maxval
)
1595 switch (sopt
->sopt_size
) {
1596 case sizeof(u_char
):
1597 error
= sockopt_get(sopt
, &cval
, sizeof(u_char
));
1602 error
= sockopt_get(sopt
, &tval
, sizeof(u_int
));
1620 * Set the IP multicast options in response to user setsockopt().
1623 ip_setmoptions(struct ip_moptions
**imop
, const struct sockopt
*sopt
)
1627 struct in_addr addr
;
1628 struct ip_mreq lmreq
, *mreq
;
1630 struct ip_moptions
*imo
= *imop
;
1635 * No multicast option buffer attached to the pcb;
1636 * allocate one and initialize to default values.
1638 imo
= malloc(sizeof(*imo
), M_IPMOPTS
, M_NOWAIT
);
1643 imo
->imo_multicast_ifp
= NULL
;
1644 imo
->imo_multicast_addr
.s_addr
= INADDR_ANY
;
1645 imo
->imo_multicast_ttl
= IP_DEFAULT_MULTICAST_TTL
;
1646 imo
->imo_multicast_loop
= IP_DEFAULT_MULTICAST_LOOP
;
1647 imo
->imo_num_memberships
= 0;
1650 switch (sopt
->sopt_name
) {
1651 case IP_MULTICAST_IF
:
1653 * Select the interface for outgoing multicast packets.
1655 error
= sockopt_get(sopt
, &addr
, sizeof(addr
));
1660 * INADDR_ANY is used to remove a previous selection.
1661 * When no interface is selected, a default one is
1662 * chosen every time a multicast packet is sent.
1664 if (in_nullhost(addr
)) {
1665 imo
->imo_multicast_ifp
= NULL
;
1669 * The selected interface is identified by its local
1670 * IP address. Find the interface and confirm that
1671 * it supports multicasting.
1673 ifp
= ip_multicast_if(&addr
, &ifindex
);
1674 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0) {
1675 error
= EADDRNOTAVAIL
;
1678 imo
->imo_multicast_ifp
= ifp
;
1680 imo
->imo_multicast_addr
= addr
;
1682 imo
->imo_multicast_addr
.s_addr
= INADDR_ANY
;
1685 case IP_MULTICAST_TTL
:
1687 * Set the IP time-to-live for outgoing multicast packets.
1689 error
= ip_getoptval(sopt
, &imo
->imo_multicast_ttl
, MAXTTL
);
1692 case IP_MULTICAST_LOOP
:
1694 * Set the loopback flag for outgoing multicast packets.
1695 * Must be zero or one.
1697 error
= ip_getoptval(sopt
, &imo
->imo_multicast_loop
, 1);
1700 case IP_ADD_MEMBERSHIP
:
1702 * Add a multicast group membership.
1703 * Group must be a valid IP multicast address.
1705 error
= sockopt_get(sopt
, &lmreq
, sizeof(lmreq
));
1711 if (!IN_MULTICAST(mreq
->imr_multiaddr
.s_addr
)) {
1716 * If no interface address was provided, use the interface of
1717 * the route to the given multicast address.
1719 if (in_nullhost(mreq
->imr_interface
)) {
1722 struct sockaddr dst
;
1723 struct sockaddr_in dst4
;
1727 memset(&ro
, 0, sizeof(ro
));
1729 sockaddr_in_init(&u
.dst4
, &mreq
->imr_multiaddr
, 0);
1730 rtcache_setdst(&ro
, &u
.dst
);
1731 ifp
= (rt
= rtcache_init(&ro
)) != NULL
? rt
->rt_ifp
1735 ifp
= ip_multicast_if(&mreq
->imr_interface
, NULL
);
1738 * See if we found an interface, and confirm that it
1739 * supports multicast.
1741 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0) {
1742 error
= EADDRNOTAVAIL
;
1746 * See if the membership already exists or if all the
1747 * membership slots are full.
1749 for (i
= 0; i
< imo
->imo_num_memberships
; ++i
) {
1750 if (imo
->imo_membership
[i
]->inm_ifp
== ifp
&&
1751 in_hosteq(imo
->imo_membership
[i
]->inm_addr
,
1752 mreq
->imr_multiaddr
))
1755 if (i
< imo
->imo_num_memberships
) {
1759 if (i
== IP_MAX_MEMBERSHIPS
) {
1760 error
= ETOOMANYREFS
;
1764 * Everything looks good; add a new record to the multicast
1765 * address list for the given interface.
1767 if ((imo
->imo_membership
[i
] =
1768 in_addmulti(&mreq
->imr_multiaddr
, ifp
)) == NULL
) {
1772 ++imo
->imo_num_memberships
;
1775 case IP_DROP_MEMBERSHIP
:
1777 * Drop a multicast group membership.
1778 * Group must be a valid IP multicast address.
1780 error
= sockopt_get(sopt
, &lmreq
, sizeof(lmreq
));
1786 if (!IN_MULTICAST(mreq
->imr_multiaddr
.s_addr
)) {
1791 * If an interface address was specified, get a pointer
1792 * to its ifnet structure.
1794 if (in_nullhost(mreq
->imr_interface
))
1797 ifp
= ip_multicast_if(&mreq
->imr_interface
, NULL
);
1799 error
= EADDRNOTAVAIL
;
1804 * Find the membership in the membership array.
1806 for (i
= 0; i
< imo
->imo_num_memberships
; ++i
) {
1808 imo
->imo_membership
[i
]->inm_ifp
== ifp
) &&
1809 in_hosteq(imo
->imo_membership
[i
]->inm_addr
,
1810 mreq
->imr_multiaddr
))
1813 if (i
== imo
->imo_num_memberships
) {
1814 error
= EADDRNOTAVAIL
;
1818 * Give up the multicast address record to which the
1819 * membership points.
1821 in_delmulti(imo
->imo_membership
[i
]);
1823 * Remove the gap in the membership array.
1825 for (++i
; i
< imo
->imo_num_memberships
; ++i
)
1826 imo
->imo_membership
[i
-1] = imo
->imo_membership
[i
];
1827 --imo
->imo_num_memberships
;
1836 * If all options have default values, no need to keep the mbuf.
1838 if (imo
->imo_multicast_ifp
== NULL
&&
1839 imo
->imo_multicast_ttl
== IP_DEFAULT_MULTICAST_TTL
&&
1840 imo
->imo_multicast_loop
== IP_DEFAULT_MULTICAST_LOOP
&&
1841 imo
->imo_num_memberships
== 0) {
1842 free(*imop
, M_IPMOPTS
);
1850 * Return the IP multicast options in response to user getsockopt().
1853 ip_getmoptions(struct ip_moptions
*imo
, struct sockopt
*sopt
)
1855 struct in_addr addr
;
1856 struct in_ifaddr
*ia
;
1862 switch (sopt
->sopt_name
) {
1863 case IP_MULTICAST_IF
:
1864 if (imo
== NULL
|| imo
->imo_multicast_ifp
== NULL
)
1866 else if (imo
->imo_multicast_addr
.s_addr
) {
1867 /* return the value user has set */
1868 addr
= imo
->imo_multicast_addr
;
1870 IFP_TO_IA(imo
->imo_multicast_ifp
, ia
);
1871 addr
= ia
? ia
->ia_addr
.sin_addr
: zeroin_addr
;
1873 error
= sockopt_set(sopt
, &addr
, sizeof(addr
));
1876 case IP_MULTICAST_TTL
:
1877 optval
= imo
? imo
->imo_multicast_ttl
1878 : IP_DEFAULT_MULTICAST_TTL
;
1880 error
= sockopt_set(sopt
, &optval
, sizeof(optval
));
1883 case IP_MULTICAST_LOOP
:
1884 optval
= imo
? imo
->imo_multicast_loop
1885 : IP_DEFAULT_MULTICAST_LOOP
;
1887 error
= sockopt_set(sopt
, &optval
, sizeof(optval
));
1898 * Discard the IP multicast options.
1901 ip_freemoptions(struct ip_moptions
*imo
)
1906 for (i
= 0; i
< imo
->imo_num_memberships
; ++i
)
1907 in_delmulti(imo
->imo_membership
[i
]);
1908 free(imo
, M_IPMOPTS
);
1913 * Routine called from ip_output() to loop back a copy of an IP multicast
1914 * packet to the input queue of a specified interface. Note that this
1915 * calls the output routine of the loopback "driver", but with an interface
1916 * pointer that might NOT be lo0ifp -- easier than replicating that code here.
1919 ip_mloopback(struct ifnet
*ifp
, struct mbuf
*m
, const struct sockaddr_in
*dst
)
1924 copym
= m_copypacket(m
, M_DONTWAIT
);
1926 && (copym
->m_flags
& M_EXT
|| copym
->m_len
< sizeof(struct ip
)))
1927 copym
= m_pullup(copym
, sizeof(struct ip
));
1931 * We don't bother to fragment if the IP length is greater
1932 * than the interface's MTU. Can this possibly matter?
1934 ip
= mtod(copym
, struct ip
*);
1936 if (copym
->m_pkthdr
.csum_flags
& (M_CSUM_TCPv4
|M_CSUM_UDPv4
)) {
1937 in_delayed_cksum(copym
);
1938 copym
->m_pkthdr
.csum_flags
&=
1939 ~(M_CSUM_TCPv4
|M_CSUM_UDPv4
);
1943 ip
->ip_sum
= in_cksum(copym
, ip
->ip_hl
<< 2);
1944 (void)looutput(ifp
, copym
, sintocsa(dst
), NULL
);