4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/cmn_err.h>
31 #define _SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/stropts.h>
34 #include <sys/socket.h>
35 #include <sys/random.h>
36 #include <sys/policy.h>
37 #include <sys/tsol/tndb.h>
38 #include <sys/tsol/tnet.h>
40 #include <netinet/in.h>
41 #include <netinet/ip6.h>
43 #include <inet/common.h>
46 #include <inet/ipclassifier.h>
47 #include "sctp_impl.h"
48 #include "sctp_asconf.h"
49 #include "sctp_addr.h"
52 * Minimum number of associations which can be created per listener. Used
53 * when the listener association count is in effect.
55 static uint32_t sctp_min_assoc_listener
= 2;
58 * Returns 0 on success, EACCES on permission failure.
61 sctp_select_port(sctp_t
*sctp
, in_port_t
*requested_port
, int *user_specified
)
63 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
64 conn_t
*connp
= sctp
->sctp_connp
;
67 * Get a valid port (within the anonymous range and should not
68 * be a privileged one) to use if the user has not given a port.
69 * If multiple threads are here, they may all start with
70 * with the same initial port. But, it should be fine as long as
71 * sctp_bindi will ensure that no two threads will be assigned
74 if (*requested_port
== 0) {
75 *requested_port
= sctp_update_next_port(
76 sctps
->sctps_next_port_to_try
,
77 crgetzone(connp
->conn_cred
), sctps
);
78 if (*requested_port
== 0)
83 boolean_t priv
= B_FALSE
;
86 * If the requested_port is in the well-known privileged range,
87 * verify that the stream was opened by a privileged user.
88 * Note: No locks are held when inspecting sctp_g_*epriv_ports
89 * but instead the code relies on:
90 * - the fact that the address of the array and its size never
92 * - the atomic assignment of the elements of the array
94 if (*requested_port
< sctps
->sctps_smallest_nonpriv_port
) {
97 for (i
= 0; i
< sctps
->sctps_g_num_epriv_ports
; i
++) {
98 if (*requested_port
==
99 sctps
->sctps_g_epriv_ports
[i
]) {
107 * sctp_bind() should take a cred_t argument so that
108 * we can use it here.
110 if (secpolicy_net_privaddr(connp
->conn_cred
,
111 *requested_port
, IPPROTO_SCTP
) != 0) {
113 ("sctp_bind(x): no prive for port %d",
125 sctp_listen(sctp_t
*sctp
)
128 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
129 conn_t
*connp
= sctp
->sctp_connp
;
133 * TCP handles listen() increasing the backlog, need to check
134 * if it should be handled here too
136 if (sctp
->sctp_state
> SCTPS_BOUND
||
137 (sctp
->sctp_connp
->conn_state_flags
& CONN_CLOSING
)) {
142 /* Do an anonymous bind for unbound socket doing listen(). */
143 if (sctp
->sctp_nsaddrs
== 0) {
144 struct sockaddr_storage ss
;
147 bzero(&ss
, sizeof (ss
));
148 ss
.ss_family
= connp
->conn_family
;
151 if ((ret
= sctp_bind(sctp
, (struct sockaddr
*)&ss
,
157 /* Cache things in the ixa without any refhold */
158 ASSERT(!(connp
->conn_ixa
->ixa_free_flags
& IXA_FREE_CRED
));
159 connp
->conn_ixa
->ixa_cred
= connp
->conn_cred
;
160 connp
->conn_ixa
->ixa_cpid
= connp
->conn_cpid
;
161 if (is_system_labeled())
162 connp
->conn_ixa
->ixa_tsl
= crgetlabel(connp
->conn_cred
);
164 sctp
->sctp_state
= SCTPS_LISTEN
;
165 (void) random_get_pseudo_bytes(sctp
->sctp_secret
, SCTP_SECRET_LEN
);
166 sctp
->sctp_last_secret_update
= ddi_get_lbolt64();
167 bzero(sctp
->sctp_old_secret
, SCTP_SECRET_LEN
);
170 * If there is an association limit, allocate and initialize
171 * the counter struct. Note that since listen can be called
172 * multiple times, the struct may have been allready allocated.
174 if (!list_is_empty(&sctps
->sctps_listener_conf
) &&
175 sctp
->sctp_listen_cnt
== NULL
) {
176 sctp_listen_cnt_t
*slc
;
179 ratio
= sctp_find_listener_conf(sctps
,
180 ntohs(connp
->conn_lport
));
182 uint32_t mem_ratio
, tot_buf
;
184 slc
= kmem_alloc(sizeof (sctp_listen_cnt_t
), KM_SLEEP
);
186 * Calculate the connection limit based on
187 * the configured ratio and maxusers. Maxusers
188 * are calculated based on memory size,
189 * ~ 1 user per MB. Note that the conn_rcvbuf
190 * and conn_sndbuf may change after a
191 * connection is accepted. So what we have
192 * is only an approximation.
194 if ((tot_buf
= connp
->conn_rcvbuf
+
195 connp
->conn_sndbuf
) < MB
) {
196 mem_ratio
= MB
/ tot_buf
;
197 slc
->slc_max
= maxusers
/ ratio
* mem_ratio
;
199 mem_ratio
= tot_buf
/ MB
;
200 slc
->slc_max
= maxusers
/ ratio
/ mem_ratio
;
202 /* At least we should allow some associations! */
203 if (slc
->slc_max
< sctp_min_assoc_listener
)
204 slc
->slc_max
= sctp_min_assoc_listener
;
207 sctp
->sctp_listen_cnt
= slc
;
212 tf
= &sctps
->sctps_listen_fanout
[SCTP_LISTEN_HASH(
213 ntohs(connp
->conn_lport
))];
214 sctp_listen_hash_insert(tf
, sctp
);
221 * Bind the sctp_t to a sockaddr, which includes an address and other
222 * information, such as port or flowinfo.
225 sctp_bind(sctp_t
*sctp
, struct sockaddr
*sa
, socklen_t len
)
228 boolean_t bind_to_req_port_only
;
229 in_port_t requested_port
;
230 in_port_t allocated_port
;
232 conn_t
*connp
= sctp
->sctp_connp
;
237 ASSERT(sctp
!= NULL
);
241 if ((sctp
->sctp_state
>= SCTPS_BOUND
) ||
242 (sctp
->sctp_connp
->conn_state_flags
& CONN_CLOSING
) ||
243 (sa
== NULL
|| len
== 0)) {
245 * Multiple binds not allowed for any SCTP socket
246 * Also binding with null address is not supported.
252 switch (sa
->sa_family
) {
255 if (len
< sizeof (struct sockaddr_in
) ||
256 connp
->conn_family
== AF_INET6
) {
260 requested_port
= ntohs(sin
->sin_port
);
264 if (len
< sizeof (struct sockaddr_in6
) ||
265 connp
->conn_family
== AF_INET
) {
269 requested_port
= ntohs(sin6
->sin6_port
);
270 /* Set the flowinfo. */
271 connp
->conn_flowinfo
=
272 sin6
->sin6_flowinfo
& ~IPV6_VERS_AND_FLOW_MASK
;
274 scope_id
= sin6
->sin6_scope_id
;
275 if (scope_id
!= 0 && IN6_IS_ADDR_LINKSCOPE(&sin6
->sin6_addr
)) {
276 connp
->conn_ixa
->ixa_flags
|= IXAF_SCOPEID_SET
;
277 connp
->conn_ixa
->ixa_scopeid
= scope_id
;
278 connp
->conn_incoming_ifindex
= scope_id
;
280 connp
->conn_ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
281 connp
->conn_incoming_ifindex
= connp
->conn_bound_if
;
288 bind_to_req_port_only
= requested_port
== 0 ? B_FALSE
: B_TRUE
;
290 err
= sctp_select_port(sctp
, &requested_port
, &user_specified
);
294 if ((err
= sctp_bind_add(sctp
, sa
, 1, B_TRUE
,
295 user_specified
== 1 ? htons(requested_port
) : 0)) != 0) {
298 err
= sctp_bindi(sctp
, requested_port
, bind_to_req_port_only
,
299 user_specified
, &allocated_port
);
301 sctp_free_saddrs(sctp
);
303 ASSERT(sctp
->sctp_state
== SCTPS_BOUND
);
311 * Perform bind/unbind operation of a list of addresses on a sctp_t
314 sctp_bindx(sctp_t
*sctp
, const void *addrs
, int addrcnt
, int bindop
)
316 ASSERT(sctp
!= NULL
);
317 ASSERT(addrs
!= NULL
);
321 case SCTP_BINDX_ADD_ADDR
:
322 return (sctp_bind_add(sctp
, addrs
, addrcnt
, B_FALSE
,
323 sctp
->sctp_connp
->conn_lport
));
324 case SCTP_BINDX_REM_ADDR
:
325 return (sctp_bind_del(sctp
, addrs
, addrcnt
, B_FALSE
));
332 * Add a list of addresses to a sctp_t.
335 sctp_bind_add(sctp_t
*sctp
, const void *addrs
, uint32_t addrcnt
,
336 boolean_t caller_hold_lock
, in_port_t port
)
339 boolean_t do_asconf
= B_FALSE
;
340 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
341 conn_t
*connp
= sctp
->sctp_connp
;
343 if (!caller_hold_lock
)
346 if (sctp
->sctp_state
> SCTPS_ESTABLISHED
||
347 (sctp
->sctp_connp
->conn_state_flags
& CONN_CLOSING
)) {
348 if (!caller_hold_lock
)
353 if (sctp
->sctp_state
> SCTPS_LISTEN
) {
355 * Let's do some checking here rather than undoing the
356 * add later (for these reasons).
358 if (!sctps
->sctps_addip_enabled
||
359 !sctp
->sctp_understands_asconf
||
360 !sctp
->sctp_understands_addip
) {
361 if (!caller_hold_lock
)
368 * On a clustered node, for an inaddr_any bind, we will pass the list
369 * of all the addresses in the global list, minus any address on the
370 * loopback interface, and expect the clustering susbsystem to give us
371 * the correct list for the 'port'. For explicit binds we give the
372 * list of addresses and the clustering module validates it for the
375 * On a non-clustered node, cl_sctp_check_addrs will be NULL and
376 * we proceed as usual.
378 if (cl_sctp_check_addrs
!= NULL
) {
379 uchar_t
*addrlist
= NULL
;
383 uchar_t
*llist
= NULL
;
387 * If we are adding addresses after listening, but before
388 * an association is established, we need to update the
389 * clustering module with this info.
391 do_listen
= !do_asconf
&& sctp
->sctp_state
> SCTPS_BOUND
&&
392 cl_sctp_listen
!= NULL
;
394 err
= sctp_get_addrlist(sctp
, addrs
, &addrcnt
, &addrlist
,
397 ASSERT(addrlist
== NULL
);
398 ASSERT(addrcnt
== 0);
400 if (!caller_hold_lock
)
402 SCTP_KSTAT(sctps
, sctp_cl_check_addrs
);
405 ASSERT(addrlist
!= NULL
);
406 (*cl_sctp_check_addrs
)(connp
->conn_family
, port
, &addrlist
,
407 size
, &addrcnt
, unspec
== 1);
409 /* We free the list */
410 kmem_free(addrlist
, size
);
411 if (!caller_hold_lock
)
416 lsize
= sizeof (in6_addr_t
) * addrcnt
;
417 llist
= kmem_alloc(lsize
, KM_SLEEP
);
419 err
= sctp_valid_addr_list(sctp
, addrlist
, addrcnt
, llist
,
421 if (err
== 0 && do_listen
) {
422 (*cl_sctp_listen
)(connp
->conn_family
, llist
,
423 addrcnt
, connp
->conn_lport
);
424 /* list will be freed by the clustering module */
425 } else if (err
!= 0 && llist
!= NULL
) {
426 kmem_free(llist
, lsize
);
428 /* free the list we allocated */
429 kmem_free(addrlist
, size
);
431 err
= sctp_valid_addr_list(sctp
, addrs
, addrcnt
, NULL
, 0);
434 if (!caller_hold_lock
)
438 /* Need to send ASCONF messages */
440 err
= sctp_add_ip(sctp
, addrs
, addrcnt
);
442 sctp_del_saddr_list(sctp
, addrs
, addrcnt
, B_FALSE
);
443 if (!caller_hold_lock
)
448 if (!caller_hold_lock
)
454 * Remove one or more addresses bound to the sctp_t.
457 sctp_bind_del(sctp_t
*sctp
, const void *addrs
, uint32_t addrcnt
,
458 boolean_t caller_hold_lock
)
461 boolean_t do_asconf
= B_FALSE
;
462 uchar_t
*ulist
= NULL
;
464 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
465 conn_t
*connp
= sctp
->sctp_connp
;
467 if (!caller_hold_lock
)
470 if (sctp
->sctp_state
> SCTPS_ESTABLISHED
||
471 (sctp
->sctp_connp
->conn_state_flags
& CONN_CLOSING
)) {
472 if (!caller_hold_lock
)
477 * Fail the remove if we are beyond listen, but can't send this
480 if (sctp
->sctp_state
> SCTPS_LISTEN
) {
481 if (!sctps
->sctps_addip_enabled
||
482 !sctp
->sctp_understands_asconf
||
483 !sctp
->sctp_understands_addip
) {
484 if (!caller_hold_lock
)
491 /* Can't delete the last address nor all of the addresses */
492 if (sctp
->sctp_nsaddrs
== 1 || addrcnt
>= sctp
->sctp_nsaddrs
) {
493 if (!caller_hold_lock
)
498 if (cl_sctp_unlisten
!= NULL
&& !do_asconf
&&
499 sctp
->sctp_state
> SCTPS_BOUND
) {
500 usize
= sizeof (in6_addr_t
) * addrcnt
;
501 ulist
= kmem_alloc(usize
, KM_SLEEP
);
504 error
= sctp_del_ip(sctp
, addrs
, addrcnt
, ulist
, usize
);
507 kmem_free(ulist
, usize
);
508 if (!caller_hold_lock
)
512 /* ulist will be non-NULL only if cl_sctp_unlisten is non-NULL */
514 ASSERT(cl_sctp_unlisten
!= NULL
);
515 (*cl_sctp_unlisten
)(connp
->conn_family
, ulist
, addrcnt
,
517 /* ulist will be freed by the clustering module */
519 if (!caller_hold_lock
)
525 * Returns 0 for success, errno value otherwise.
527 * If the "bind_to_req_port_only" parameter is set and the requested port
528 * number is available, then set allocated_port to it. If not available,
531 * If the "bind_to_req_port_only" parameter is not set and the requested port
532 * number is available, then set allocated_port to it. If not available,
533 * find the first anonymous port we can and set allocated_port to that. If no
534 * anonymous ports are available, return an error.
536 * In either case, when succeeding, update the sctp_t to record the port number
537 * and insert it in the bind hash table.
540 sctp_bindi(sctp_t
*sctp
, in_port_t port
, boolean_t bind_to_req_port_only
,
541 int user_specified
, in_port_t
*allocated_port
)
543 /* number of times we have run around the loop */
545 /* maximum number of times to run around the loop */
547 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
548 conn_t
*connp
= sctp
->sctp_connp
;
549 zone_t
*zone
= crgetzone(connp
->conn_cred
);
550 zoneid_t zoneid
= connp
->conn_zoneid
;
553 * Lookup for free addresses is done in a loop and "loopmax"
554 * influences how long we spin in the loop
556 if (bind_to_req_port_only
) {
558 * If the requested port is busy, don't bother to look
559 * for a new one. Setting loop maximum count to 1 has
565 * If the requested port is busy, look for a free one
566 * in the anonymous port range.
567 * Set loopmax appropriately so that one does not look
568 * forever in the case all of the anonymous ports are in use.
570 loopmax
= (sctps
->sctps_largest_anon_port
-
571 sctps
->sctps_smallest_anon_port
+ 1);
582 * Ensure that the sctp_t is not currently in the bind hash.
583 * Hold the lock on the hash bucket to ensure that
584 * the duplicate check plus the insertion is an atomic
587 * This function does an inline lookup on the bind hash list
588 * Make sure that we access only members of sctp_t
589 * and that we don't look at sctp_sctp, since we are not
590 * doing a SCTPB_REFHOLD. For more details please see the notes
593 sctp_bind_hash_remove(sctp
);
594 tbf
= &sctps
->sctps_bind_fanout
[SCTP_BIND_HASH(port
)];
595 mutex_enter(&tbf
->tf_lock
);
596 for (lsctp
= tbf
->tf_sctp
; lsctp
!= NULL
;
597 lsctp
= lsctp
->sctp_bind_hash
) {
598 conn_t
*lconnp
= lsctp
->sctp_connp
;
600 if (lport
!= lconnp
->conn_lport
||
601 lsctp
->sctp_state
< SCTPS_BOUND
)
605 * On a labeled system, we must treat bindings to ports
606 * on shared IP addresses by sockets with MAC exemption
607 * privilege as being in all zones, as there's
608 * otherwise no way to identify the right receiver.
610 if (lconnp
->conn_zoneid
!= zoneid
&&
611 lconnp
->conn_mac_mode
== CONN_MAC_DEFAULT
&&
612 connp
->conn_mac_mode
== CONN_MAC_DEFAULT
)
615 addrcmp
= sctp_compare_saddrs(sctp
, lsctp
);
616 if (addrcmp
!= SCTP_ADDR_DISJOINT
) {
617 if (!connp
->conn_reuseaddr
) {
620 } else if (lsctp
->sctp_state
== SCTPS_BOUND
||
621 lsctp
->sctp_state
== SCTPS_LISTEN
) {
623 * socket option SO_REUSEADDR is set
624 * on the binding sctp_t.
626 * We have found a match of IP source
627 * address and source port, which is
628 * refused regardless of the
629 * SO_REUSEADDR setting, so we break.
636 /* The port number is busy */
637 mutex_exit(&tbf
->tf_lock
);
639 if (is_system_labeled()) {
640 mlp_type_t addrtype
, mlptype
;
644 * On a labeled system we must check the type
645 * of the binding requested by the user (either
646 * MLP or SLP on shared and private addresses),
647 * and that the user's requested binding
650 if (connp
->conn_family
== AF_INET
)
651 ipversion
= IPV4_VERSION
;
653 ipversion
= IPV6_VERSION
;
655 addrtype
= tsol_mlp_addr_type(
656 connp
->conn_allzones
? ALL_ZONES
:
659 connp
->conn_family
== AF_INET
?
660 (void *)&sctp
->sctp_ipha
->ipha_src
:
661 (void *)&sctp
->sctp_ip6h
->ip6_src
,
662 sctps
->sctps_netstack
->netstack_ip
);
665 * tsol_mlp_addr_type returns the possibilities
666 * for the selected address. Since all local
667 * addresses are either private or shared, the
668 * return value mlptSingle means "local address
669 * not valid (interface not present)."
671 if (addrtype
== mlptSingle
) {
672 mutex_exit(&tbf
->tf_lock
);
673 return (EADDRNOTAVAIL
);
675 mlptype
= tsol_mlp_port_type(zone
, IPPROTO_SCTP
,
677 if (mlptype
!= mlptSingle
) {
678 if (secpolicy_net_bindmlp(connp
->
680 mutex_exit(&tbf
->tf_lock
);
684 * If we're binding a shared MLP, then
685 * make sure that this zone is the one
686 * that owns that MLP. Shared MLPs can
687 * be owned by at most one zone.
689 * No need to handle exclusive-stack
690 * zones since ALL_ZONES only applies
691 * to the shared stack.
694 if (mlptype
== mlptShared
&&
695 addrtype
== mlptShared
&&
696 connp
->conn_zoneid
!=
697 tsol_mlp_findzone(IPPROTO_SCTP
,
699 mutex_exit(&tbf
->tf_lock
);
702 connp
->conn_mlp_type
= mlptype
;
706 * This port is ours. Insert in fanout and mark as
707 * bound to prevent others from getting the port
710 sctp
->sctp_state
= SCTPS_BOUND
;
711 connp
->conn_lport
= lport
;
713 ASSERT(&sctps
->sctps_bind_fanout
[
714 SCTP_BIND_HASH(port
)] == tbf
);
715 sctp_bind_hash_insert(tbf
, sctp
, 1);
717 mutex_exit(&tbf
->tf_lock
);
720 * We don't want sctp_next_port_to_try to "inherit"
721 * a port number supplied by the user in a bind.
723 * This is the only place where sctp_next_port_to_try
724 * is updated. After the update, it may or may not
725 * be in the valid range.
727 if (user_specified
== 0)
728 sctps
->sctps_next_port_to_try
= port
+ 1;
730 *allocated_port
= port
;
735 if ((count
== 0) && (user_specified
)) {
737 * We may have to return an anonymous port. So
738 * get one to start with.
740 port
= sctp_update_next_port(
741 sctps
->sctps_next_port_to_try
,
745 port
= sctp_update_next_port(port
+ 1, zone
, sctps
);
751 * Don't let this loop run forever in the case where
752 * all of the anonymous ports are in use.
754 } while (++count
< loopmax
);
756 return (bind_to_req_port_only
? EADDRINUSE
: EADDRNOTAVAIL
);
760 * Don't let port fall into the privileged range.
761 * Since the extra privileged ports can be arbitrary we also
762 * ensure that we exclude those from consideration.
763 * sctp_g_epriv_ports is not sorted thus we loop over it until
764 * there are no changes.
766 * Note: No locks are held when inspecting sctp_g_*epriv_ports
767 * but instead the code relies on:
768 * - the fact that the address of the array and its size never changes
769 * - the atomic assignment of the elements of the array
772 sctp_update_next_port(in_port_t port
, zone_t
*zone
, sctp_stack_t
*sctps
)
775 boolean_t restart
= B_FALSE
;
778 if (port
< sctps
->sctps_smallest_anon_port
)
779 port
= sctps
->sctps_smallest_anon_port
;
781 if (port
> sctps
->sctps_largest_anon_port
) {
785 port
= sctps
->sctps_smallest_anon_port
;
788 if (port
< sctps
->sctps_smallest_nonpriv_port
)
789 port
= sctps
->sctps_smallest_nonpriv_port
;
791 for (i
= 0; i
< sctps
->sctps_g_num_epriv_ports
; i
++) {
792 if (port
== sctps
->sctps_g_epriv_ports
[i
]) {
795 * Make sure whether the port is in the
798 * XXX Note that if sctp_g_epriv_ports contains
799 * all the anonymous ports this will be an
806 if (is_system_labeled() &&
807 (i
= tsol_next_port(zone
, port
, IPPROTO_SCTP
, B_TRUE
)) != 0) {