docs/how-to-build.md: use proper markup for directory names
[unleashed/tickless.git] / kernel / net / tcp / tcp_bind.c
blobb502faffa615cb7a7c3c31011c531d72e8fd1bab
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/strsun.h>
31 #include <sys/strsubr.h>
32 #include <sys/stropts.h>
33 #include <sys/strlog.h>
34 #define _SUN_TPI_VERSION 2
35 #include <sys/tihdr.h>
36 #include <sys/suntpi.h>
37 #include <sys/xti_inet.h>
38 #include <sys/policy.h>
39 #include <sys/squeue_impl.h>
40 #include <sys/squeue.h>
42 #include <rpc/pmap_prot.h>
44 #include <inet/common.h>
45 #include <inet/ip.h>
46 #include <inet/tcp.h>
47 #include <inet/tcp_impl.h>
48 #include <inet/proto_set.h>
49 #include <inet/ipsec_impl.h>
51 /* Setable in /etc/system */
52 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
53 static uint32_t tcp_random_anon_port = 1;
55 static int tcp_bind_select_lport(tcp_t *, in_port_t *, boolean_t,
56 cred_t *cr);
57 static in_port_t tcp_get_next_priv_port(const tcp_t *);
60 * Hash list insertion routine for tcp_t structures. Each hash bucket
61 * contains a list of tcp_t entries, and each entry is bound to a unique
62 * port. If there are multiple tcp_t's that are bound to the same port, then
63 * one of them will be linked into the hash bucket list, and the rest will
64 * hang off of that one entry. For each port, entries bound to a specific IP
65 * address will be inserted before those those bound to INADDR_ANY.
67 void
68 tcp_bind_hash_insert(tf_t *tbf, tcp_t *tcp, int caller_holds_lock)
70 tcp_t **tcpp;
71 tcp_t *tcpnext;
72 tcp_t *tcphash;
73 conn_t *connp = tcp->tcp_connp;
74 conn_t *connext;
76 if (tcp->tcp_ptpbhn != NULL) {
77 ASSERT(!caller_holds_lock);
78 tcp_bind_hash_remove(tcp);
80 tcpp = &tbf->tf_tcp;
81 if (!caller_holds_lock) {
82 mutex_enter(&tbf->tf_lock);
83 } else {
84 ASSERT(MUTEX_HELD(&tbf->tf_lock));
86 tcphash = tcpp[0];
87 tcpnext = NULL;
88 if (tcphash != NULL) {
89 /* Look for an entry using the same port */
90 while ((tcphash = tcpp[0]) != NULL &&
91 connp->conn_lport != tcphash->tcp_connp->conn_lport)
92 tcpp = &(tcphash->tcp_bind_hash);
94 /* The port was not found, just add to the end */
95 if (tcphash == NULL)
96 goto insert;
99 * OK, there already exists an entry bound to the
100 * same port.
102 * If the new tcp bound to the INADDR_ANY address
103 * and the first one in the list is not bound to
104 * INADDR_ANY we skip all entries until we find the
105 * first one bound to INADDR_ANY.
106 * This makes sure that applications binding to a
107 * specific address get preference over those binding to
108 * INADDR_ANY.
110 tcpnext = tcphash;
111 connext = tcpnext->tcp_connp;
112 tcphash = NULL;
113 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
114 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
115 while ((tcpnext = tcpp[0]) != NULL) {
116 connext = tcpnext->tcp_connp;
117 if (!V6_OR_V4_INADDR_ANY(
118 connext->conn_bound_addr_v6))
119 tcpp = &(tcpnext->tcp_bind_hash_port);
120 else
121 break;
123 if (tcpnext != NULL) {
124 tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash_port;
125 tcphash = tcpnext->tcp_bind_hash;
126 if (tcphash != NULL) {
127 tcphash->tcp_ptpbhn =
128 &(tcp->tcp_bind_hash);
129 tcpnext->tcp_bind_hash = NULL;
132 } else {
133 tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash_port;
134 tcphash = tcpnext->tcp_bind_hash;
135 if (tcphash != NULL) {
136 tcphash->tcp_ptpbhn =
137 &(tcp->tcp_bind_hash);
138 tcpnext->tcp_bind_hash = NULL;
142 insert:
143 tcp->tcp_bind_hash_port = tcpnext;
144 tcp->tcp_bind_hash = tcphash;
145 tcp->tcp_ptpbhn = tcpp;
146 tcpp[0] = tcp;
147 if (!caller_holds_lock)
148 mutex_exit(&tbf->tf_lock);
152 * Hash list removal routine for tcp_t structures.
154 void
155 tcp_bind_hash_remove(tcp_t *tcp)
157 tcp_t *tcpnext;
158 kmutex_t *lockp;
159 tcp_stack_t *tcps = tcp->tcp_tcps;
160 conn_t *connp = tcp->tcp_connp;
162 if (tcp->tcp_ptpbhn == NULL)
163 return;
166 * Extract the lock pointer in case there are concurrent
167 * hash_remove's for this instance.
169 ASSERT(connp->conn_lport != 0);
170 lockp = &tcps->tcps_bind_fanout[TCP_BIND_HASH(
171 connp->conn_lport)].tf_lock;
173 ASSERT(lockp != NULL);
174 mutex_enter(lockp);
175 if (tcp->tcp_ptpbhn) {
176 tcpnext = tcp->tcp_bind_hash_port;
177 if (tcpnext != NULL) {
178 tcp->tcp_bind_hash_port = NULL;
179 tcpnext->tcp_ptpbhn = tcp->tcp_ptpbhn;
180 tcpnext->tcp_bind_hash = tcp->tcp_bind_hash;
181 if (tcpnext->tcp_bind_hash != NULL) {
182 tcpnext->tcp_bind_hash->tcp_ptpbhn =
183 &(tcpnext->tcp_bind_hash);
184 tcp->tcp_bind_hash = NULL;
186 } else if ((tcpnext = tcp->tcp_bind_hash) != NULL) {
187 tcpnext->tcp_ptpbhn = tcp->tcp_ptpbhn;
188 tcp->tcp_bind_hash = NULL;
190 *tcp->tcp_ptpbhn = tcpnext;
191 tcp->tcp_ptpbhn = NULL;
193 mutex_exit(lockp);
197 * Don't let port fall into the privileged range.
198 * Since the extra privileged ports can be arbitrary we also
199 * ensure that we exclude those from consideration.
200 * tcp_g_epriv_ports is not sorted thus we loop over it until
201 * there are no changes.
203 * Note: No locks are held when inspecting tcp_g_*epriv_ports
204 * but instead the code relies on:
205 * - the fact that the address of the array and its size never changes
206 * - the atomic assignment of the elements of the array
208 * Returns 0 if there are no more ports available.
210 * TS note: skip multilevel ports.
212 in_port_t
213 tcp_update_next_port(in_port_t port, const tcp_t *tcp, boolean_t random)
215 int i, bump;
216 boolean_t restart = B_FALSE;
217 tcp_stack_t *tcps = tcp->tcp_tcps;
219 if (random && tcp_random_anon_port != 0) {
220 (void) random_get_pseudo_bytes((uint8_t *)&port,
221 sizeof (in_port_t));
223 * Unless changed by a sys admin, the smallest anon port
224 * is 32768 and the largest anon port is 65535. It is
225 * very likely (50%) for the random port to be smaller
226 * than the smallest anon port. When that happens,
227 * add port % (anon port range) to the smallest anon
228 * port to get the random port. It should fall into the
229 * valid anon port range.
231 if ((port < tcps->tcps_smallest_anon_port) ||
232 (port > tcps->tcps_largest_anon_port)) {
233 if (tcps->tcps_smallest_anon_port ==
234 tcps->tcps_largest_anon_port) {
235 bump = 0;
236 } else {
237 bump = port % (tcps->tcps_largest_anon_port -
238 tcps->tcps_smallest_anon_port);
240 port = tcps->tcps_smallest_anon_port + bump;
244 retry:
245 if (port < tcps->tcps_smallest_anon_port)
246 port = (in_port_t)tcps->tcps_smallest_anon_port;
248 if (port > tcps->tcps_largest_anon_port) {
249 if (restart)
250 return (0);
251 restart = B_TRUE;
252 port = (in_port_t)tcps->tcps_smallest_anon_port;
255 if (port < tcps->tcps_smallest_nonpriv_port)
256 port = (in_port_t)tcps->tcps_smallest_nonpriv_port;
258 for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
259 if (port == tcps->tcps_g_epriv_ports[i]) {
260 port++;
262 * Make sure whether the port is in the
263 * valid range.
265 goto retry;
268 return (port);
272 * Return the next anonymous port in the privileged port range for
273 * bind checking. It starts at IPPORT_RESERVED - 1 and goes
274 * downwards. This is the same behavior as documented in the userland
275 * library call rresvport(3N).
277 * TS note: skip multilevel ports.
279 static in_port_t
280 tcp_get_next_priv_port(const tcp_t *tcp)
282 static in_port_t next_priv_port = IPPORT_RESERVED - 1;
283 in_port_t nextport;
284 boolean_t restart = B_FALSE;
285 tcp_stack_t *tcps = tcp->tcp_tcps;
286 retry:
287 if (next_priv_port < tcps->tcps_min_anonpriv_port ||
288 next_priv_port >= IPPORT_RESERVED) {
289 next_priv_port = IPPORT_RESERVED - 1;
290 if (restart)
291 return (0);
292 restart = B_TRUE;
294 return (next_priv_port--);
297 static int
298 tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
299 boolean_t bind_to_req_port_only, cred_t *cr)
301 boolean_t user_specified;
302 in_port_t allocated_port;
303 in_port_t requested_port = *requested_port_ptr;
304 conn_t *connp = tcp->tcp_connp;
305 zone_t *zone;
306 tcp_stack_t *tcps = tcp->tcp_tcps;
307 in6_addr_t v6addr = connp->conn_laddr_v6;
310 * XXX It's up to the caller to specify bind_to_req_port_only or not.
312 ASSERT(cr != NULL);
315 * Get a valid port (within the anonymous range and should not
316 * be a privileged one) to use if the user has not given a port.
317 * If multiple threads are here, they may all start with
318 * with the same initial port. But, it should be fine as long as
319 * tcp_bindi will ensure that no two threads will be assigned
320 * the same port.
322 * NOTE: XXX If a privileged process asks for an anonymous port, we
323 * still check for ports only in the range > tcp_smallest_non_priv_port,
324 * unless TCP_ANONPRIVBIND option is set.
326 if (requested_port == 0) {
327 requested_port = connp->conn_anon_priv_bind ?
328 tcp_get_next_priv_port(tcp) :
329 tcp_update_next_port(tcps->tcps_next_port_to_try,
330 tcp, B_TRUE);
331 if (requested_port == 0) {
332 return (-TNOADDR);
334 user_specified = B_FALSE;
335 } else {
336 int i;
337 boolean_t priv = B_FALSE;
340 * If the requested_port is in the well-known privileged range,
341 * verify that the stream was opened by a privileged user.
342 * Note: No locks are held when inspecting tcp_g_*epriv_ports
343 * but instead the code relies on:
344 * - the fact that the address of the array and its size never
345 * changes
346 * - the atomic assignment of the elements of the array
348 if (requested_port < tcps->tcps_smallest_nonpriv_port) {
349 priv = B_TRUE;
350 } else {
351 for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
352 if (requested_port ==
353 tcps->tcps_g_epriv_ports[i]) {
354 priv = B_TRUE;
355 break;
359 if (priv) {
360 if (secpolicy_net_privaddr(cr, requested_port,
361 IPPROTO_TCP) != 0) {
362 if (connp->conn_debug) {
363 (void) strlog(TCP_MOD_ID, 0, 1,
364 SL_ERROR|SL_TRACE,
365 "tcp_bind: no priv for port %d",
366 requested_port);
368 return (-TACCES);
371 user_specified = B_TRUE;
373 connp = tcp->tcp_connp;
376 allocated_port = tcp_bindi(tcp, requested_port, &v6addr,
377 connp->conn_reuseaddr, B_FALSE, bind_to_req_port_only,
378 user_specified);
380 if (allocated_port == 0) {
381 if (bind_to_req_port_only) {
382 if (connp->conn_debug) {
383 (void) strlog(TCP_MOD_ID, 0, 1,
384 SL_ERROR|SL_TRACE,
385 "tcp_bind: requested addr busy");
387 return (-TADDRBUSY);
388 } else {
389 /* If we are out of ports, fail the bind. */
390 if (connp->conn_debug) {
391 (void) strlog(TCP_MOD_ID, 0, 1,
392 SL_ERROR|SL_TRACE,
393 "tcp_bind: out of ports?");
395 return (-TNOADDR);
399 /* Pass the allocated port back */
400 *requested_port_ptr = allocated_port;
401 return (0);
405 * Check the address and check/pick a local port number.
408 tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
409 boolean_t bind_to_req_port_only)
411 tcp_t *tcp = connp->conn_tcp;
412 sin_t *sin;
413 sin6_t *sin6;
414 in_port_t requested_port;
415 ipaddr_t v4addr;
416 in6_addr_t v6addr;
417 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */
418 zoneid_t zoneid = IPCL_ZONEID(connp);
419 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
420 uint_t scopeid = 0;
421 int error = 0;
422 ip_xmit_attr_t *ixa = connp->conn_ixa;
424 ASSERT((uintptr_t)len <= (uintptr_t)INT_MAX);
426 if (tcp->tcp_state == TCPS_BOUND) {
427 return (0);
428 } else if (tcp->tcp_state > TCPS_BOUND) {
429 if (connp->conn_debug) {
430 (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
431 "tcp_bind: bad state, %d", tcp->tcp_state);
433 return (-TOUTSTATE);
436 ASSERT(sa != NULL && len != 0);
438 if (!OK_32PTR((char *)sa)) {
439 if (connp->conn_debug) {
440 (void) strlog(TCP_MOD_ID, 0, 1,
441 SL_ERROR|SL_TRACE,
442 "tcp_bind: bad address parameter, "
443 "address %p, len %d",
444 (void *)sa, len);
446 return (-TPROTO);
449 error = proto_verify_ip_addr(connp->conn_family, sa, len);
450 if (error != 0) {
451 return (error);
454 switch (len) {
455 case sizeof (sin_t): /* Complete IPv4 address */
456 sin = (sin_t *)sa;
457 requested_port = ntohs(sin->sin_port);
458 v4addr = sin->sin_addr.s_addr;
459 IN6_IPADDR_TO_V4MAPPED(v4addr, &v6addr);
460 if (v4addr != INADDR_ANY) {
461 laddr_type = ip_laddr_verify_v4(v4addr, zoneid, ipst,
462 B_FALSE);
464 break;
466 case sizeof (sin6_t): /* Complete IPv6 address */
467 sin6 = (sin6_t *)sa;
468 v6addr = sin6->sin6_addr;
469 requested_port = ntohs(sin6->sin6_port);
470 if (IN6_IS_ADDR_V4MAPPED(&v6addr)) {
471 if (connp->conn_ipv6_v6only)
472 return (EADDRNOTAVAIL);
474 IN6_V4MAPPED_TO_IPADDR(&v6addr, v4addr);
475 if (v4addr != INADDR_ANY) {
476 laddr_type = ip_laddr_verify_v4(v4addr,
477 zoneid, ipst, B_FALSE);
479 } else {
480 if (!IN6_IS_ADDR_UNSPECIFIED(&v6addr)) {
481 if (IN6_IS_ADDR_LINKSCOPE(&v6addr))
482 scopeid = sin6->sin6_scope_id;
483 laddr_type = ip_laddr_verify_v6(&v6addr,
484 zoneid, ipst, B_FALSE, scopeid);
487 break;
489 default:
490 if (connp->conn_debug) {
491 (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
492 "tcp_bind: bad address length, %d", len);
494 return (EAFNOSUPPORT);
495 /* return (-TBADADDR); */
498 /* Is the local address a valid unicast address? */
499 if (laddr_type == IPVL_BAD)
500 return (EADDRNOTAVAIL);
502 connp->conn_bound_addr_v6 = v6addr;
503 if (scopeid != 0) {
504 ixa->ixa_flags |= IXAF_SCOPEID_SET;
505 ixa->ixa_scopeid = scopeid;
506 connp->conn_incoming_ifindex = scopeid;
507 } else {
508 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
509 connp->conn_incoming_ifindex = connp->conn_bound_if;
512 connp->conn_laddr_v6 = v6addr;
513 connp->conn_saddr_v6 = v6addr;
515 bind_to_req_port_only = requested_port != 0 && bind_to_req_port_only;
517 error = tcp_bind_select_lport(tcp, &requested_port,
518 bind_to_req_port_only, cr);
519 if (error != 0) {
520 connp->conn_laddr_v6 = ipv6_all_zeros;
521 connp->conn_saddr_v6 = ipv6_all_zeros;
522 connp->conn_bound_addr_v6 = ipv6_all_zeros;
524 return (error);
528 * If the "bind_to_req_port_only" parameter is set, if the requested port
529 * number is available, return it, If not return 0
531 * If "bind_to_req_port_only" parameter is not set and
532 * If the requested port number is available, return it. If not, return
533 * the first anonymous port we happen across. If no anonymous ports are
534 * available, return 0. addr is the requested local address, if any.
536 * In either case, when succeeding update the tcp_t to record the port number
537 * and insert it in the bind hash table.
539 * Note that TCP over IPv4 and IPv6 sockets can use the same port number
540 * without setting SO_REUSEADDR. This is needed so that they
541 * can be viewed as two independent transport protocols.
543 in_port_t
544 tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
545 int reuseaddr, boolean_t quick_connect,
546 boolean_t bind_to_req_port_only, boolean_t user_specified)
548 /* number of times we have run around the loop */
549 int count = 0;
550 /* maximum number of times to run around the loop */
551 int loopmax;
552 conn_t *connp = tcp->tcp_connp;
553 tcp_stack_t *tcps = tcp->tcp_tcps;
556 * Lookup for free addresses is done in a loop and "loopmax"
557 * influences how long we spin in the loop
559 if (bind_to_req_port_only) {
561 * If the requested port is busy, don't bother to look
562 * for a new one. Setting loop maximum count to 1 has
563 * that effect.
565 loopmax = 1;
566 } else {
568 * If the requested port is busy, look for a free one
569 * in the anonymous port range.
570 * Set loopmax appropriately so that one does not look
571 * forever in the case all of the anonymous ports are in use.
573 if (connp->conn_anon_priv_bind) {
575 * loopmax =
576 * (IPPORT_RESERVED-1) - tcp_min_anonpriv_port + 1
578 loopmax = IPPORT_RESERVED -
579 tcps->tcps_min_anonpriv_port;
580 } else {
581 loopmax = (tcps->tcps_largest_anon_port -
582 tcps->tcps_smallest_anon_port + 1);
585 do {
586 uint16_t lport;
587 tf_t *tbf;
588 tcp_t *ltcp;
589 conn_t *lconnp;
591 lport = htons(port);
594 * Ensure that the tcp_t is not currently in the bind hash.
595 * Hold the lock on the hash bucket to ensure that
596 * the duplicate check plus the insertion is an atomic
597 * operation.
599 * This function does an inline lookup on the bind hash list
600 * Make sure that we access only members of tcp_t
601 * and that we don't look at tcp_tcp, since we are not
602 * doing a CONN_INC_REF.
604 tcp_bind_hash_remove(tcp);
605 tbf = &tcps->tcps_bind_fanout[TCP_BIND_HASH(lport)];
606 mutex_enter(&tbf->tf_lock);
607 for (ltcp = tbf->tf_tcp; ltcp != NULL;
608 ltcp = ltcp->tcp_bind_hash) {
609 if (lport == ltcp->tcp_connp->conn_lport)
610 break;
613 for (; ltcp != NULL; ltcp = ltcp->tcp_bind_hash_port) {
614 boolean_t not_socket;
615 boolean_t exclbind;
617 lconnp = ltcp->tcp_connp;
619 if (!IPCL_BIND_ZONE_MATCH(lconnp, connp))
620 continue;
623 * If TCP_EXCLBIND is set for either the bound or
624 * binding endpoint, the semantics of bind
625 * is changed according to the following.
627 * spec = specified address (v4 or v6)
628 * unspec = unspecified address (v4 or v6)
629 * A = specified addresses are different for endpoints
631 * bound bind to allowed
632 * -------------------------------------
633 * unspec unspec no
634 * unspec spec no
635 * spec unspec no
636 * spec spec yes if A
638 * Note:
640 * 1. Because of TLI semantics, an endpoint can go
641 * back from, say TCP_ESTABLISHED to TCPS_LISTEN or
642 * TCPS_BOUND, depending on whether it is originally
643 * a listener or not. That is why we need to check
644 * for states greater than or equal to TCPS_BOUND
645 * here.
647 * 2. Ideally, we should only check for state equals
648 * to TCPS_LISTEN. And the following check should be
649 * added.
651 * if (ltcp->tcp_state == TCPS_LISTEN ||
652 * !reuseaddr || !lconnp->conn_reuseaddr) {
653 * ...
656 * The semantics will be changed to this. If the
657 * endpoint on the list is in state not equal to
658 * TCPS_LISTEN and both endpoints have SO_REUSEADDR
659 * set, let the bind succeed.
661 * Because of (1), we cannot do that for TLI
662 * endpoints. But we can do that for socket endpoints.
663 * If in future, we can change this going back
664 * semantics, we can use the above check for TLI also.
666 not_socket = !(TCP_IS_SOCKET(ltcp) &&
667 TCP_IS_SOCKET(tcp));
668 exclbind = lconnp->conn_exclbind ||
669 connp->conn_exclbind;
671 if ((exclbind && (not_socket ||
672 ltcp->tcp_state <= TCPS_ESTABLISHED))) {
673 if (V6_OR_V4_INADDR_ANY(
674 lconnp->conn_bound_addr_v6) ||
675 V6_OR_V4_INADDR_ANY(*laddr) ||
676 IN6_ARE_ADDR_EQUAL(laddr,
677 &lconnp->conn_bound_addr_v6)) {
678 break;
680 continue;
684 * Check ipversion to allow IPv4 and IPv6 sockets to
685 * have disjoint port number spaces, if *_EXCLBIND
686 * is not set and only if the application binds to a
687 * specific port. We use the same autoassigned port
688 * number space for IPv4 and IPv6 sockets.
690 if (connp->conn_ipversion != lconnp->conn_ipversion &&
691 bind_to_req_port_only)
692 continue;
695 * Ideally, we should make sure that the source
696 * address, remote address, and remote port in the
697 * four tuple for this tcp-connection is unique.
698 * However, trying to find out the local source
699 * address would require too much code duplication
700 * with IP, since IP needs needs to have that code
701 * to support userland TCP implementations.
703 if (quick_connect &&
704 (ltcp->tcp_state > TCPS_LISTEN) &&
705 ((connp->conn_fport != lconnp->conn_fport) ||
706 !IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6,
707 &lconnp->conn_faddr_v6)))
708 continue;
710 if (!reuseaddr) {
712 * No socket option SO_REUSEADDR.
713 * If existing port is bound to
714 * a non-wildcard IP address
715 * and the requesting stream is
716 * bound to a distinct
717 * different IP addresses
718 * (non-wildcard, also), keep
719 * going.
721 if (!V6_OR_V4_INADDR_ANY(*laddr) &&
722 !V6_OR_V4_INADDR_ANY(
723 lconnp->conn_bound_addr_v6) &&
724 !IN6_ARE_ADDR_EQUAL(laddr,
725 &lconnp->conn_bound_addr_v6))
726 continue;
727 if (ltcp->tcp_state >= TCPS_BOUND) {
729 * This port is being used and
730 * its state is >= TCPS_BOUND,
731 * so we can't bind to it.
733 break;
735 } else {
737 * socket option SO_REUSEADDR is set on the
738 * binding tcp_t.
740 * If two streams are bound to
741 * same IP address or both addr
742 * and bound source are wildcards
743 * (INADDR_ANY), we want to stop
744 * searching.
745 * We have found a match of IP source
746 * address and source port, which is
747 * refused regardless of the
748 * SO_REUSEADDR setting, so we break.
750 if (IN6_ARE_ADDR_EQUAL(laddr,
751 &lconnp->conn_bound_addr_v6) &&
752 (ltcp->tcp_state == TCPS_LISTEN ||
753 ltcp->tcp_state == TCPS_BOUND))
754 break;
757 if (ltcp != NULL) {
758 /* The port number is busy */
759 mutex_exit(&tbf->tf_lock);
760 } else {
762 * This port is ours. Insert in fanout and mark as
763 * bound to prevent others from getting the port
764 * number.
766 tcp->tcp_state = TCPS_BOUND;
767 DTRACE_TCP6(state__change, void, NULL,
768 ip_xmit_attr_t *, connp->conn_ixa,
769 void, NULL, tcp_t *, tcp, void, NULL,
770 int32_t, TCPS_IDLE);
772 connp->conn_lport = htons(port);
774 ASSERT(&tcps->tcps_bind_fanout[TCP_BIND_HASH(
775 connp->conn_lport)] == tbf);
776 tcp_bind_hash_insert(tbf, tcp, 1);
778 mutex_exit(&tbf->tf_lock);
781 * We don't want tcp_next_port_to_try to "inherit"
782 * a port number supplied by the user in a bind.
784 if (user_specified)
785 return (port);
788 * This is the only place where tcp_next_port_to_try
789 * is updated. After the update, it may or may not
790 * be in the valid range.
792 if (!connp->conn_anon_priv_bind)
793 tcps->tcps_next_port_to_try = port + 1;
794 return (port);
797 if (connp->conn_anon_priv_bind) {
798 port = tcp_get_next_priv_port(tcp);
799 } else {
800 if (count == 0 && user_specified) {
802 * We may have to return an anonymous port. So
803 * get one to start with.
805 port =
806 tcp_update_next_port(
807 tcps->tcps_next_port_to_try,
808 tcp, B_TRUE);
809 user_specified = B_FALSE;
810 } else {
811 port = tcp_update_next_port(port + 1, tcp,
812 B_FALSE);
815 if (port == 0)
816 break;
819 * Don't let this loop run forever in the case where
820 * all of the anonymous ports are in use.
822 } while (++count < loopmax);
823 return (0);