2 * Copyright (c) 1982, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 2006 Pavel Fedin
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/protosw.h>
45 #include <sys/ioctl.h>
46 #include <sys/errno.h>
48 #include <sys/queue.h>
49 #include <sys/synch.h>
51 #include <net/route.h>
53 #include <net/if_protos.h>
55 #include <netinet/in.h>
56 #include <netinet/in_systm.h>
57 #include <netinet/ip.h>
58 #include <netinet/in_pcb.h>
59 #include <netinet/in_var.h>
60 #include <netinet/ip_var.h>
62 extern u_char inetctlerrmap
[];
64 struct in_addr zeroin_addr
;
67 in_pcballoc(so
, pcbinfo
)
69 struct inpcbinfo
*pcbinfo
;
71 register struct inpcb
*inp
;
74 MALLOC(inp
, struct inpcb
*, sizeof(*inp
), M_PCB
, M_NOWAIT
);
77 bzero((caddr_t
)inp
, sizeof(*inp
));
78 inp
->inp_pcbinfo
= pcbinfo
;
81 LIST_INSERT_HEAD(pcbinfo
->listhead
, inp
, inp_list
);
84 so
->so_pcb
= (caddr_t
)inp
;
90 register struct inpcb
*inp
;
93 register struct socket
*so
= inp
->inp_socket
;
94 struct inpcbhead
*head
= inp
->inp_pcbinfo
->listhead
;
95 unsigned short *lastport
= &inp
->inp_pcbinfo
->lastport
;
96 struct sockaddr_in
*sin
;
97 // struct proc *p = curproc; /* XXX */
99 int wild
= 0, reuseport
= (so
->so_options
& SO_REUSEPORT
);
103 return (EADDRNOTAVAIL
);
104 if (inp
->inp_lport
|| inp
->inp_laddr
.s_addr
!= INADDR_ANY
)
106 if ((so
->so_options
& (SO_REUSEADDR
|SO_REUSEPORT
)) == 0 &&
107 ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) == 0 ||
108 (so
->so_options
& SO_ACCEPTCONN
) == 0))
109 wild
= INPLOOKUP_WILDCARD
;
111 sin
= mtod(nam
, struct sockaddr_in
*);
112 if (nam
->m_len
!= sizeof (*sin
))
116 * We should check the family, but old programs
117 * incorrectly fail to initialize it.
119 if (sin
->sin_family
!= AF_INET
)
120 return (EAFNOSUPPORT
);
122 lport
= sin
->sin_port
;
123 if (IN_MULTICAST(ntohl(sin
->sin_addr
.s_addr
))) {
125 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
126 * allow complete duplication of binding if
127 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
128 * and a multicast address is bound on both
129 * new and duplicated sockets.
131 if (so
->so_options
& SO_REUSEADDR
)
132 reuseport
= SO_REUSEADDR
|SO_REUSEPORT
;
133 } else if (sin
->sin_addr
.s_addr
!= INADDR_ANY
) {
134 sin
->sin_port
= 0; /* yech... */
135 if (ifa_ifwithaddr((struct sockaddr
*)sin
) == 0)
136 return (EADDRNOTAVAIL
);
142 /* if (ntohs(lport) < IPPORT_RESERVED &&
143 (error = suser(p->p_ucred, &p->p_acflag)))
145 t
= in_pcblookup(head
, zeroin_addr
, 0,
146 sin
->sin_addr
, lport
, wild
);
147 if (t
&& (reuseport
& t
->inp_socket
->so_options
) == 0)
150 inp
->inp_laddr
= sin
->sin_addr
;
155 if (*lastport
< IPPORT_RESERVED
||
156 *lastport
> IPPORT_USERRESERVED
)
157 *lastport
= IPPORT_RESERVED
;
158 lport
= htons(*lastport
);
159 } while (in_pcblookup(head
,
160 zeroin_addr
, 0, inp
->inp_laddr
, lport
, wild
));
161 inp
->inp_lport
= lport
;
167 * Transform old in_pcbconnect() into an inner subroutine for new
168 * in_pcbconnect(): Do some validity-checking on the remote
169 * address (in mbuf 'nam') and then determine local host address
170 * (i.e., which interface) to use to access that remote host.
172 * This preserves definition of in_pcbconnect(), while supporting a
173 * slightly different version for T/TCP. (This is more than
174 * a bit of a kludge, but cleaning up the internal interfaces would
175 * have forced minor changes in every protocol).
179 in_pcbladdr(inp
, nam
, plocal_sin
)
180 register struct inpcb
*inp
;
182 struct sockaddr_in
**plocal_sin
;
184 struct in_ifaddr
*ia
;
185 register struct sockaddr_in
*sin
= mtod(nam
, struct sockaddr_in
*);
187 if (nam
->m_len
!= sizeof (*sin
))
189 if (sin
->sin_family
!= AF_INET
)
190 return (EAFNOSUPPORT
);
191 if (sin
->sin_port
== 0)
192 return (EADDRNOTAVAIL
);
195 * If the destination address is INADDR_ANY,
196 * use the primary local address.
197 * If the supplied address is INADDR_BROADCAST,
198 * and the primary interface supports broadcast,
199 * choose the broadcast address for that interface.
201 #define satosin(sa) ((struct sockaddr_in *)(sa))
202 #define sintosa(sin) ((struct sockaddr *)(sin))
203 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
204 if (sin
->sin_addr
.s_addr
== INADDR_ANY
)
205 sin
->sin_addr
= IA_SIN(in_ifaddr
)->sin_addr
;
206 else if (sin
->sin_addr
.s_addr
== (u_long
)INADDR_BROADCAST
&&
207 (in_ifaddr
->ia_ifp
->if_flags
& IFF_BROADCAST
))
208 sin
->sin_addr
= satosin(&in_ifaddr
->ia_broadaddr
)->sin_addr
;
210 if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
211 register struct route
*ro
;
212 struct sockaddr_in
*rodst_saddr
;
214 ia
= (struct in_ifaddr
*)0;
216 * If route is known or can be allocated now,
217 * our src addr is taken from the i/f, else punt.
219 ro
= &inp
->inp_route
;
220 rodst_saddr
= (struct sockaddr_in
*)&ro
->ro_dst
;
223 (rodst_saddr
->sin_addr
.s_addr
!=
224 sin
->sin_addr
.s_addr
||
225 inp
->inp_socket
->so_options
& SO_DONTROUTE
)) {
227 ro
->ro_rt
= (struct rtentry
*)0;
229 if ((inp
->inp_socket
->so_options
& SO_DONTROUTE
) == 0 && /*XXX*/
230 (ro
->ro_rt
== (struct rtentry
*)0 ||
231 ro
->ro_rt
->rt_ifp
== (struct ifnet
*)0)) {
232 /* No route yet, so try to acquire one */
233 ro
->ro_dst
.sa_family
= AF_INET
;
234 ro
->ro_dst
.sa_len
= sizeof(struct sockaddr_in
);
235 rodst_saddr
->sin_addr
= sin
->sin_addr
;
239 * If we found a route, use the address
240 * corresponding to the outgoing interface
241 * unless it is the loopback (in case a route
242 * to our address on another net goes to loopback).
244 if (ro
->ro_rt
&& !(ro
->ro_rt
->rt_ifp
->if_flags
& IFF_LOOPBACK
))
245 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
247 u_short fport
= sin
->sin_port
;
250 ia
= ifatoia(ifa_ifwithdstaddr(sintosa(sin
)));
252 ia
= ifatoia(ifa_ifwithnet(sintosa(sin
)));
253 sin
->sin_port
= fport
;
257 return (EADDRNOTAVAIL
);
259 #ifdef ENABLE_MULTICAST
261 * If the destination address is multicast and an outgoing
262 * interface has been set as a multicast option, use the
263 * address of that interface as our source address.
265 if (IN_MULTICAST(ntohl(sin
->sin_addr
.s_addr
)) &&
266 inp
->inp_moptions
!= NULL
) {
267 struct ip_moptions
*imo
;
270 imo
= inp
->inp_moptions
;
271 if (imo
->imo_multicast_ifp
!= NULL
) {
272 ifp
= imo
->imo_multicast_ifp
;
273 for (ia
= in_ifaddr
; ia
; ia
= ia
->ia_next
)
274 if (ia
->ia_ifp
== ifp
)
277 return (EADDRNOTAVAIL
);
282 * Don't do pcblookup call here; return interface in plocal_sin
283 * and exit to caller, that will do the lookup.
285 *plocal_sin
= &ia
->ia_addr
;
293 * Connect from a socket to a specified address.
294 * Both address and port must be specified in argument sin.
295 * If don't have a local address for this socket yet,
299 in_pcbconnect(inp
, nam
)
300 register struct inpcb
*inp
;
303 struct sockaddr_in
*ifaddr
;
304 register struct sockaddr_in
*sin
= mtod(nam
, struct sockaddr_in
*);
308 * Call inner routine, to assign local interface address.
310 if (error
= in_pcbladdr(inp
, nam
, &ifaddr
))
313 if (in_pcblookuphash(inp
->inp_pcbinfo
, sin
->sin_addr
, sin
->sin_port
,
314 inp
->inp_laddr
.s_addr
? inp
->inp_laddr
: ifaddr
->sin_addr
,
315 inp
->inp_lport
) != NULL
)
317 if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
318 if (inp
->inp_lport
== 0)
319 (void)in_pcbbind(inp
, (struct mbuf
*)0);
320 inp
->inp_laddr
= ifaddr
->sin_addr
;
322 inp
->inp_faddr
= sin
->sin_addr
;
323 inp
->inp_fport
= sin
->sin_port
;
329 in_pcbdisconnect(inp
)
333 inp
->inp_faddr
.s_addr
= INADDR_ANY
;
336 if (inp
->inp_socket
->so_state
& SS_NOFDREF
)
344 struct socket
*so
= inp
->inp_socket
;
349 if (inp
->inp_options
)
350 (void)m_free(inp
->inp_options
);
351 if (inp
->inp_route
.ro_rt
)
352 rtfree(inp
->inp_route
.ro_rt
);
353 #ifdef ENABLE_MUPTICAST
354 ip_freemoptions(inp
->inp_moptions
);
357 LIST_REMOVE(inp
, inp_hash
);
358 LIST_REMOVE(inp
, inp_list
);
364 in_setsockaddr(inp
, nam
)
365 register struct inpcb
*inp
;
368 register struct sockaddr_in
*sin
;
370 nam
->m_len
= sizeof (*sin
);
371 sin
= mtod(nam
, struct sockaddr_in
*);
372 bzero((caddr_t
)sin
, sizeof (*sin
));
373 sin
->sin_family
= AF_INET
;
374 sin
->sin_len
= sizeof(*sin
);
375 sin
->sin_port
= inp
->inp_lport
;
376 sin
->sin_addr
= inp
->inp_laddr
;
380 in_setpeeraddr(inp
, nam
)
384 register struct sockaddr_in
*sin
;
386 nam
->m_len
= sizeof (*sin
);
387 sin
= mtod(nam
, struct sockaddr_in
*);
388 bzero((caddr_t
)sin
, sizeof (*sin
));
389 sin
->sin_family
= AF_INET
;
390 sin
->sin_len
= sizeof(*sin
);
391 sin
->sin_port
= inp
->inp_fport
;
392 sin
->sin_addr
= inp
->inp_faddr
;
396 * Pass some notification to all connections of a protocol
397 * associated with address dst. The local address and/or port numbers
398 * may be specified to limit the search. The "usual action" will be
399 * taken, depending on the ctlinput cmd. The caller must filter any
400 * cmds that are uninteresting (e.g., no error in the map).
401 * Call the protocol specific routine (if any) to report
402 * any errors for each matching socket.
404 * Must be called at splnet.
407 in_pcbnotify(head
, dst
, fport_arg
, laddr
, lport_arg
, cmd
, notify
)
408 struct inpcbhead
*head
;
409 struct sockaddr
*dst
;
410 u_int fport_arg
, lport_arg
;
411 struct in_addr laddr
;
413 void (*notify
) __P((struct inpcb
*, int));
415 register struct inpcb
*inp
, *oinp
;
416 struct in_addr faddr
;
417 u_short fport
= fport_arg
, lport
= lport_arg
;
420 if ((unsigned)cmd
> PRC_NCMDS
|| dst
->sa_family
!= AF_INET
)
422 faddr
= ((struct sockaddr_in
*)dst
)->sin_addr
;
423 if (faddr
.s_addr
== INADDR_ANY
)
427 * Redirects go to all references to the destination,
428 * and use in_rtchange to invalidate the route cache.
429 * Dead host indications: notify all references to the destination.
430 * Otherwise, if we have knowledge of the local port and address,
431 * deliver only to that socket.
433 if (PRC_IS_REDIRECT(cmd
) || cmd
== PRC_HOSTDEAD
) {
437 if (cmd
!= PRC_HOSTDEAD
)
438 notify
= in_rtchange
;
440 _errno
= inetctlerrmap
[cmd
];
442 for (inp
= head
->lh_first
; inp
!= NULL
;) {
443 if (inp
->inp_faddr
.s_addr
!= faddr
.s_addr
||
444 inp
->inp_socket
== 0 ||
445 (lport
&& inp
->inp_lport
!= lport
) ||
446 (laddr
.s_addr
&& inp
->inp_laddr
.s_addr
!= laddr
.s_addr
) ||
447 (fport
&& inp
->inp_fport
!= fport
)) {
448 inp
= inp
->inp_list
.le_next
;
452 inp
= inp
->inp_list
.le_next
;
454 (*notify
)(oinp
, _errno
);
460 * Check for alternatives when higher level complains
461 * about service problems. For now, invalidate cached
462 * routing information. If the route was created dynamically
463 * (by a redirect), time to try a default gateway again.
469 register struct rtentry
*rt
;
471 if ((rt
= inp
->inp_route
.ro_rt
)) {
472 inp
->inp_route
.ro_rt
= 0;
473 rt_missmsg(RTM_LOSING
, &inp
->inp_route
.ro_dst
,
474 rt
->rt_gateway
, (struct sockaddr
*)rt_mask(rt
),
475 (struct sockaddr
*)0, rt
->rt_flags
, 0);
476 if (rt
->rt_flags
& RTF_DYNAMIC
)
477 (void) rtrequest(RTM_DELETE
, rt_key(rt
),
478 rt
->rt_gateway
, rt_mask(rt
), rt
->rt_flags
,
479 (struct rtentry
**)0);
482 * A new route can be allocated
483 * the next time output is attempted.
490 * After a routing change, flush old routing
491 * and allocate a (hopefully) better one.
494 in_rtchange(inp
, _errno
)
498 if (inp
->inp_route
.ro_rt
) {
499 rtfree(inp
->inp_route
.ro_rt
);
500 inp
->inp_route
.ro_rt
= 0;
502 * A new route can be allocated the next time
503 * output is attempted.
509 in_pcblookup(head
, faddr
, fport_arg
, laddr
, lport_arg
, flags
)
510 struct inpcbhead
*head
;
511 struct in_addr faddr
, laddr
;
512 u_int fport_arg
, lport_arg
;
515 register struct inpcb
*inp
, *match
= NULL
;
516 int matchwild
= 3, wildcard
;
517 u_short fport
= fport_arg
, lport
= lport_arg
;
522 for (inp
= head
->lh_first
; inp
!= NULL
; inp
= inp
->inp_list
.le_next
) {
523 if (inp
->inp_lport
!= lport
)
526 if (inp
->inp_faddr
.s_addr
!= INADDR_ANY
) {
527 if (faddr
.s_addr
== INADDR_ANY
)
529 else if (inp
->inp_faddr
.s_addr
!= faddr
.s_addr
||
530 inp
->inp_fport
!= fport
)
533 if (faddr
.s_addr
!= INADDR_ANY
)
536 if (inp
->inp_laddr
.s_addr
!= INADDR_ANY
) {
537 if (laddr
.s_addr
== INADDR_ANY
)
539 else if (inp
->inp_laddr
.s_addr
!= laddr
.s_addr
)
542 if (laddr
.s_addr
!= INADDR_ANY
)
545 if (wildcard
&& (flags
& INPLOOKUP_WILDCARD
) == 0)
547 if (wildcard
< matchwild
) {
549 matchwild
= wildcard
;
550 if (matchwild
== 0) {
560 * Lookup PCB in hash list.
563 in_pcblookuphash(pcbinfo
, faddr
, fport_arg
, laddr
, lport_arg
)
564 struct inpcbinfo
*pcbinfo
;
565 struct in_addr faddr
, laddr
;
566 u_int fport_arg
, lport_arg
;
568 struct inpcbhead
*head
;
569 register struct inpcb
*inp
;
570 u_short fport
= fport_arg
, lport
= lport_arg
;
575 * First look for an exact match.
577 head
= &pcbinfo
->hashbase
[(faddr
.s_addr
+ lport
+ fport
) % pcbinfo
->hashsize
];
579 for (inp
= head
->lh_first
; inp
!= NULL
; inp
= inp
->inp_hash
.le_next
) {
580 if (inp
->inp_faddr
.s_addr
!= faddr
.s_addr
||
581 inp
->inp_fport
!= fport
||
582 inp
->inp_lport
!= lport
||
583 inp
->inp_laddr
.s_addr
!= laddr
.s_addr
)
586 * Move PCB to head of this hash chain so that it can be
587 * found more quickly in the future.
589 if (inp
!= head
->lh_first
) {
590 LIST_REMOVE(inp
, inp_hash
);
591 LIST_INSERT_HEAD(head
, inp
, inp_hash
);
600 * Insert PCB into hash chain. Must be called at splnet.
606 struct inpcbhead
*head
;
608 head
= &inp
->inp_pcbinfo
->hashbase
[(inp
->inp_faddr
.s_addr
+
609 inp
->inp_lport
+ inp
->inp_fport
) % inp
->inp_pcbinfo
->hashsize
];
611 LIST_INSERT_HEAD(head
, inp
, inp_hash
);
618 struct inpcbhead
*head
;
622 LIST_REMOVE(inp
, inp_hash
);
624 head
= &inp
->inp_pcbinfo
->hashbase
[(inp
->inp_faddr
.s_addr
+
625 inp
->inp_lport
+ inp
->inp_fport
) % inp
->inp_pcbinfo
->hashsize
];
627 LIST_INSERT_HEAD(head
, inp
, inp_hash
);