1 /* $NetBSD: raw_ip.c,v 1.110 2009/09/16 15:23:05 pooka Exp $ */
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * Copyright (c) 1982, 1986, 1988, 1993
34 * The Regents of the University of California. All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.110 2009/09/16 15:23:05 pooka Exp $");
67 #include "opt_compat_netbsd.h"
68 #include "opt_ipsec.h"
69 #include "opt_mrouting.h"
71 #include <sys/param.h>
72 #include <sys/sysctl.h>
73 #include <sys/malloc.h>
75 #include <sys/socket.h>
76 #include <sys/protosw.h>
77 #include <sys/socketvar.h>
78 #include <sys/errno.h>
79 #include <sys/systm.h>
81 #include <sys/kauth.h>
84 #include <net/route.h>
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/ip.h>
89 #include <netinet/ip_var.h>
90 #include <netinet/ip_private.h>
91 #include <netinet/ip_mroute.h>
92 #include <netinet/ip_icmp.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet/in_proto.h>
95 #include <netinet/in_var.h>
97 #include <machine/stdarg.h>
100 #include <netinet6/ipsec.h>
101 #include <netinet6/ipsec_private.h>
105 #include <netipsec/ipsec.h>
106 #include <netipsec/ipsec_var.h>
107 #include <netipsec/ipsec_private.h>
108 #endif /* FAST_IPSEC */
111 #include <compat/sys/socket.h>
114 struct inpcbtable rawcbtable
;
116 int rip_pcbnotify(struct inpcbtable
*, struct in_addr
,
117 struct in_addr
, int, int, void (*)(struct inpcb
*, int));
118 int rip_bind(struct inpcb
*, struct mbuf
*);
119 int rip_connect(struct inpcb
*, struct mbuf
*);
120 void rip_disconnect(struct inpcb
*);
122 static void sysctl_net_inet_raw_setup(struct sysctllog
**);
125 * Nominal space allocated to a raw ip socket.
131 * Raw interface to IP protocol.
135 * Initialize raw connection block q.
141 sysctl_net_inet_raw_setup(NULL
);
142 in_pcbinit(&rawcbtable
, 1, 1);
146 rip_sbappendaddr(struct inpcb
*last
, struct ip
*ip
, const struct sockaddr
*sa
,
147 int hlen
, struct mbuf
*opts
, struct mbuf
*n
)
149 if (last
->inp_flags
& INP_NOHEADER
)
151 if (last
->inp_flags
& INP_CONTROLOPTS
153 || last
->inp_socket
->so_options
& SO_OTIMESTAMP
155 || last
->inp_socket
->so_options
& SO_TIMESTAMP
)
156 ip_savecontrol(last
, &opts
, ip
, n
);
157 if (sbappendaddr(&last
->inp_socket
->so_rcv
, sa
, n
, opts
) == 0) {
158 /* should notify about lost packet */
163 sorwakeup(last
->inp_socket
);
167 * Setup generic address and protocol structures
168 * for raw_input routine, then pass them along with
172 rip_input(struct mbuf
*m
, ...)
175 struct ip
*ip
= mtod(m
, struct ip
*);
176 struct inpcb_hdr
*inph
;
178 struct inpcb
*last
= NULL
;
179 struct mbuf
*n
, *opts
= NULL
;
180 struct sockaddr_in ripsrc
;
184 (void)va_arg(ap
, int); /* ignore value, advance ap */
185 proto
= va_arg(ap
, int);
188 sockaddr_in_init(&ripsrc
, &ip
->ip_src
, 0);
191 * XXX Compatibility: programs using raw IP expect ip_len
192 * XXX to have the header length subtracted, and in host order.
193 * XXX ip_off is also expected to be host order.
195 hlen
= ip
->ip_hl
<< 2;
196 ip
->ip_len
= ntohs(ip
->ip_len
) - hlen
;
199 CIRCLEQ_FOREACH(inph
, &rawcbtable
.inpt_queue
, inph_queue
) {
200 inp
= (struct inpcb
*)inph
;
201 if (inp
->inp_af
!= AF_INET
)
203 if (inp
->inp_ip
.ip_p
&& inp
->inp_ip
.ip_p
!= proto
)
205 if (!in_nullhost(inp
->inp_laddr
) &&
206 !in_hosteq(inp
->inp_laddr
, ip
->ip_dst
))
208 if (!in_nullhost(inp
->inp_faddr
) &&
209 !in_hosteq(inp
->inp_faddr
, ip
->ip_src
))
213 #if defined(IPSEC) || defined(FAST_IPSEC)
214 /* check AH/ESP integrity. */
215 else if (ipsec4_in_reject_so(m
, last
->inp_socket
)) {
216 IPSEC_STATINC(IPSEC_STAT_IN_POLVIO
);
217 /* do not inject data to pcb */
220 else if ((n
= m_copypacket(m
, M_DONTWAIT
)) != NULL
) {
221 rip_sbappendaddr(last
, ip
, sintosa(&ripsrc
), hlen
, opts
,
227 #if defined(IPSEC) || defined(FAST_IPSEC)
228 /* check AH/ESP integrity. */
229 if (last
!= NULL
&& ipsec4_in_reject_so(m
, last
->inp_socket
)) {
231 IPSEC_STATINC(IPSEC_STAT_IN_POLVIO
);
232 IP_STATDEC(IP_STAT_DELIVERED
);
233 /* do not inject data to pcb */
237 rip_sbappendaddr(last
, ip
, sintosa(&ripsrc
), hlen
, opts
, m
);
238 else if (inetsw
[ip_protox
[ip
->ip_p
]].pr_input
== rip_input
) {
241 icmp_error(m
, ICMP_UNREACH
, ICMP_UNREACH_PROTOCOL
,
243 ips
= IP_STAT_GETREF();
244 ips
[IP_STAT_NOPROTO
]++;
245 ips
[IP_STAT_DELIVERED
]--;
253 rip_pcbnotify(struct inpcbtable
*table
,
254 struct in_addr faddr
, struct in_addr laddr
, int proto
, int errno
,
255 void (*notify
)(struct inpcb
*, int))
257 struct inpcb
*inp
, *ninp
;
261 for (inp
= (struct inpcb
*)CIRCLEQ_FIRST(&table
->inpt_queue
);
262 inp
!= (struct inpcb
*)&table
->inpt_queue
;
264 ninp
= (struct inpcb
*)inp
->inp_queue
.cqe_next
;
265 if (inp
->inp_af
!= AF_INET
)
267 if (inp
->inp_ip
.ip_p
&& inp
->inp_ip
.ip_p
!= proto
)
269 if (in_hosteq(inp
->inp_faddr
, faddr
) &&
270 in_hosteq(inp
->inp_laddr
, laddr
)) {
271 (*notify
)(inp
, errno
);
280 rip_ctlinput(int cmd
, const struct sockaddr
*sa
, void *v
)
283 void (*notify
)(struct inpcb
*, int) = in_rtchange
;
286 if (sa
->sa_family
!= AF_INET
||
287 sa
->sa_len
!= sizeof(struct sockaddr_in
))
289 if ((unsigned)cmd
>= PRC_NCMDS
)
291 errno
= inetctlerrmap
[cmd
];
292 if (PRC_IS_REDIRECT(cmd
))
293 notify
= in_rtchange
, ip
= 0;
294 else if (cmd
== PRC_HOSTDEAD
)
299 rip_pcbnotify(&rawcbtable
, satocsin(sa
)->sin_addr
,
300 ip
->ip_src
, ip
->ip_p
, errno
, notify
);
302 /* XXX mapped address case */
304 in_pcbnotifyall(&rawcbtable
, satocsin(sa
)->sin_addr
, errno
,
310 * Generate IP header and pass packet to ip_output.
311 * Tack on options user may have setup with control call.
314 rip_output(struct mbuf
*m
, ...)
323 inp
= va_arg(ap
, struct inpcb
*);
327 (inp
->inp_socket
->so_options
& SO_DONTROUTE
) | IP_ALLOWBROADCAST
331 * If the user handed us a complete IP packet, use it.
332 * Otherwise, allocate an mbuf for a header and fill it in.
334 if ((inp
->inp_flags
& INP_HDRINCL
) == 0) {
335 if ((m
->m_pkthdr
.len
+ sizeof(struct ip
)) > IP_MAXPACKET
) {
339 M_PREPEND(m
, sizeof(struct ip
), M_DONTWAIT
);
342 ip
= mtod(m
, struct ip
*);
344 ip
->ip_off
= htons(0);
345 ip
->ip_p
= inp
->inp_ip
.ip_p
;
346 ip
->ip_len
= htons(m
->m_pkthdr
.len
);
347 ip
->ip_src
= inp
->inp_laddr
;
348 ip
->ip_dst
= inp
->inp_faddr
;
350 opts
= inp
->inp_options
;
352 if (m
->m_pkthdr
.len
> IP_MAXPACKET
) {
356 ip
= mtod(m
, struct ip
*);
359 * If the mbuf is read-only, we need to allocate
360 * a new mbuf for the header, since we need to
364 int hlen
= ip
->ip_hl
<< 2;
366 m
= m_copyup(m
, hlen
, (max_linkhdr
+ 3) & ~3);
368 return (ENOMEM
); /* XXX */
369 ip
= mtod(m
, struct ip
*);
372 /* XXX userland passes ip_len and ip_off in host order */
373 if (m
->m_pkthdr
.len
!= ip
->ip_len
) {
379 if (ip
->ip_id
!= 0 || m
->m_pkthdr
.len
< IP_MINFRAGSIZE
)
380 flags
|= IP_NOIPNEWID
;
382 /* XXX prevent ip_output from overwriting header fields */
383 flags
|= IP_RAWOUTPUT
;
384 IP_STATINC(IP_STAT_RAWOUT
);
386 return (ip_output(m
, opts
, &inp
->inp_route
, flags
, inp
->inp_moptions
,
387 inp
->inp_socket
, &inp
->inp_errormtu
));
391 * Raw IP socket option processing.
394 rip_ctloutput(int op
, struct socket
*so
, struct sockopt
*sopt
)
396 struct inpcb
*inp
= sotoinpcb(so
);
400 if (sopt
->sopt_level
== SOL_SOCKET
&& sopt
->sopt_name
== SO_NOHEADER
) {
401 if (op
== PRCO_GETOPT
) {
402 optval
= (inp
->inp_flags
& INP_NOHEADER
) ? 1 : 0;
403 error
= sockopt_set(sopt
, &optval
, sizeof(optval
));
404 } else if (op
== PRCO_SETOPT
) {
405 error
= sockopt_getint(sopt
, &optval
);
409 inp
->inp_flags
&= ~INP_HDRINCL
;
410 inp
->inp_flags
|= INP_NOHEADER
;
412 inp
->inp_flags
&= ~INP_NOHEADER
;
415 } else if (sopt
->sopt_level
!= IPPROTO_IP
)
416 return ip_ctloutput(op
, so
, sopt
);
421 switch (sopt
->sopt_name
) {
423 error
= sockopt_getint(sopt
, &optval
);
427 inp
->inp_flags
|= INP_HDRINCL
;
429 inp
->inp_flags
&= ~INP_HDRINCL
;
441 case MRT_ADD_BW_UPCALL
:
442 case MRT_DEL_BW_UPCALL
:
443 error
= ip_mrouter_set(so
, sopt
);
448 error
= ip_ctloutput(op
, so
, sopt
);
454 switch (sopt
->sopt_name
) {
456 optval
= inp
->inp_flags
& INP_HDRINCL
;
457 error
= sockopt_set(sopt
, &optval
, sizeof(optval
));
463 case MRT_API_SUPPORT
:
465 error
= ip_mrouter_get(so
, sopt
);
470 error
= ip_ctloutput(op
, so
, sopt
);
480 rip_bind(struct inpcb
*inp
, struct mbuf
*nam
)
482 struct sockaddr_in
*addr
= mtod(nam
, struct sockaddr_in
*);
484 if (nam
->m_len
!= sizeof(*addr
))
486 if (TAILQ_FIRST(&ifnet
) == 0)
487 return (EADDRNOTAVAIL
);
488 if (addr
->sin_family
!= AF_INET
&&
489 addr
->sin_family
!= AF_IMPLINK
)
490 return (EAFNOSUPPORT
);
491 if (!in_nullhost(addr
->sin_addr
) &&
492 ifa_ifwithaddr(sintosa(addr
)) == 0)
493 return (EADDRNOTAVAIL
);
494 inp
->inp_laddr
= addr
->sin_addr
;
499 rip_connect(struct inpcb
*inp
, struct mbuf
*nam
)
501 struct sockaddr_in
*addr
= mtod(nam
, struct sockaddr_in
*);
503 if (nam
->m_len
!= sizeof(*addr
))
505 if (TAILQ_FIRST(&ifnet
) == 0)
506 return (EADDRNOTAVAIL
);
507 if (addr
->sin_family
!= AF_INET
&&
508 addr
->sin_family
!= AF_IMPLINK
)
509 return (EAFNOSUPPORT
);
510 inp
->inp_faddr
= addr
->sin_addr
;
515 rip_disconnect(struct inpcb
*inp
)
518 inp
->inp_faddr
= zeroin_addr
;
521 u_long rip_sendspace
= RIPSNDQ
;
522 u_long rip_recvspace
= RIPRCVQ
;
526 rip_usrreq(struct socket
*so
, int req
,
527 struct mbuf
*m
, struct mbuf
*nam
, struct mbuf
*control
, struct lwp
*l
)
533 extern struct socket
*ip_mrouter
;
536 if (req
== PRU_CONTROL
)
537 return in_control(so
, (long)m
, nam
, (struct ifnet
*)control
, l
);
541 if (req
== PRU_PURGEIF
) {
542 mutex_enter(softnet_lock
);
543 in_pcbpurgeif0(&rawcbtable
, (struct ifnet
*)control
);
544 in_purgeif((struct ifnet
*)control
);
545 in_pcbpurgeif(&rawcbtable
, (struct ifnet
*)control
);
546 mutex_exit(softnet_lock
);
553 if (req
!= PRU_SEND
&& req
!= PRU_SENDOOB
&& control
)
554 panic("rip_usrreq: unexpected control mbuf");
556 if (inp
== NULL
&& req
!= PRU_ATTACH
) {
575 /* XXX: raw socket permissions are checked in socreate() */
577 if (so
->so_snd
.sb_hiwat
== 0 || so
->so_rcv
.sb_hiwat
== 0) {
578 error
= soreserve(so
, rip_sendspace
, rip_recvspace
);
582 error
= in_pcballoc(so
, &rawcbtable
);
586 inp
->inp_ip
.ip_p
= (long)nam
;
591 if (so
== ip_mrouter
)
598 error
= rip_bind(inp
, nam
);
606 error
= rip_connect(inp
, nam
);
617 soisdisconnected(so
);
622 * Mark the connection as being incapable of further input.
633 * Ship a packet out. The appropriate raw output
634 * routine handles any massaging necessary.
637 if (control
&& control
->m_len
) {
645 if ((so
->so_state
& SS_ISCONNECTED
) != 0) {
649 error
= rip_connect(inp
, nam
);
656 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
661 error
= rip_output(m
, inp
);
669 * stat: don't bother with a blocksize.
685 in_setsockaddr(inp
, nam
);
689 in_setpeeraddr(inp
, nam
);
702 sysctl_net_inet_raw_setup(struct sysctllog
**clog
)
705 sysctl_createv(clog
, 0, NULL
, NULL
,
707 CTLTYPE_NODE
, "net", NULL
,
710 sysctl_createv(clog
, 0, NULL
, NULL
,
712 CTLTYPE_NODE
, "inet", NULL
,
714 CTL_NET
, PF_INET
, CTL_EOL
);
715 sysctl_createv(clog
, 0, NULL
, NULL
,
718 SYSCTL_DESCR("Raw IPv4 settings"),
720 CTL_NET
, PF_INET
, IPPROTO_RAW
, CTL_EOL
);
722 sysctl_createv(clog
, 0, NULL
, NULL
,
724 CTLTYPE_STRUCT
, "pcblist",
725 SYSCTL_DESCR("Raw IPv4 control block list"),
726 sysctl_inpcblist
, 0, &rawcbtable
, 0,
727 CTL_NET
, PF_INET
, IPPROTO_RAW
,
728 CTL_CREATE
, CTL_EOL
);