4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/sysmacros.h>
27 #include <sys/socket.h>
29 #include <sys/sunddi.h>
31 #include <netinet/in.h>
32 #include <netinet/ip6.h>
34 #include <inet/common.h>
37 #include <inet/ipclassifier.h>
38 #include <inet/ipsec_impl.h>
39 #include <inet/ipp_common.h>
40 #include <inet/sctp_ip.h>
42 #include <inet/sctp/sctp_impl.h>
43 #include <inet/sctp/sctp_addr.h>
45 /* Default association hash size. The size must be a power of 2. */
46 #define SCTP_CONN_HASH_SIZE 8192
48 uint_t sctp_conn_hash_size
= SCTP_CONN_HASH_SIZE
; /* /etc/system */
51 sctp_hash_init(sctp_stack_t
*sctps
)
55 /* Start with /etc/system value */
56 sctps
->sctps_conn_hash_size
= sctp_conn_hash_size
;
58 if (!ISP2(sctps
->sctps_conn_hash_size
)) {
59 /* Not a power of two. Round up to nearest power of two */
60 for (i
= 0; i
< 31; i
++) {
61 if (sctps
->sctps_conn_hash_size
< (1 << i
))
64 sctps
->sctps_conn_hash_size
= 1 << i
;
66 if (sctps
->sctps_conn_hash_size
< SCTP_CONN_HASH_SIZE
) {
67 sctps
->sctps_conn_hash_size
= SCTP_CONN_HASH_SIZE
;
68 cmn_err(CE_CONT
, "using sctp_conn_hash_size = %u\n",
69 sctps
->sctps_conn_hash_size
);
71 sctps
->sctps_conn_fanout
=
72 (sctp_tf_t
*)kmem_zalloc(sctps
->sctps_conn_hash_size
*
73 sizeof (sctp_tf_t
), KM_SLEEP
);
74 for (i
= 0; i
< sctps
->sctps_conn_hash_size
; i
++) {
75 mutex_init(&sctps
->sctps_conn_fanout
[i
].tf_lock
, NULL
,
78 sctps
->sctps_listen_fanout
= kmem_zalloc(SCTP_LISTEN_FANOUT_SIZE
*
79 sizeof (sctp_tf_t
), KM_SLEEP
);
80 for (i
= 0; i
< SCTP_LISTEN_FANOUT_SIZE
; i
++) {
81 mutex_init(&sctps
->sctps_listen_fanout
[i
].tf_lock
, NULL
,
84 sctps
->sctps_bind_fanout
= kmem_zalloc(SCTP_BIND_FANOUT_SIZE
*
85 sizeof (sctp_tf_t
), KM_SLEEP
);
86 for (i
= 0; i
< SCTP_BIND_FANOUT_SIZE
; i
++) {
87 mutex_init(&sctps
->sctps_bind_fanout
[i
].tf_lock
, NULL
,
93 sctp_hash_destroy(sctp_stack_t
*sctps
)
97 for (i
= 0; i
< sctps
->sctps_conn_hash_size
; i
++) {
98 mutex_destroy(&sctps
->sctps_conn_fanout
[i
].tf_lock
);
100 kmem_free(sctps
->sctps_conn_fanout
, sctps
->sctps_conn_hash_size
*
102 sctps
->sctps_conn_fanout
= NULL
;
104 for (i
= 0; i
< SCTP_LISTEN_FANOUT_SIZE
; i
++) {
105 mutex_destroy(&sctps
->sctps_listen_fanout
[i
].tf_lock
);
107 kmem_free(sctps
->sctps_listen_fanout
, SCTP_LISTEN_FANOUT_SIZE
*
109 sctps
->sctps_listen_fanout
= NULL
;
111 for (i
= 0; i
< SCTP_BIND_FANOUT_SIZE
; i
++) {
112 mutex_destroy(&sctps
->sctps_bind_fanout
[i
].tf_lock
);
114 kmem_free(sctps
->sctps_bind_fanout
, SCTP_BIND_FANOUT_SIZE
*
116 sctps
->sctps_bind_fanout
= NULL
;
120 sctp_conn_match(in6_addr_t
**faddrpp
, uint32_t nfaddr
, in6_addr_t
*laddr
,
121 uint32_t ports
, zoneid_t zoneid
, iaflags_t iraflags
, sctp_stack_t
*sctps
)
127 in6_addr_t
**faddrs
, **endaddrs
= &faddrpp
[nfaddr
];
129 tf
= &(sctps
->sctps_conn_fanout
[SCTP_CONN_HASH(sctps
, ports
)]);
130 mutex_enter(&tf
->tf_lock
);
132 for (sctp
= tf
->tf_sctp
; sctp
!= NULL
; sctp
=
133 sctp
->sctp_conn_hash_next
) {
134 connp
= sctp
->sctp_connp
;
135 if (ports
!= connp
->conn_ports
)
137 if (!(connp
->conn_zoneid
== zoneid
||
138 connp
->conn_allzones
))
141 /* check for faddr match */
142 for (fp
= sctp
->sctp_faddrs
; fp
!= NULL
; fp
= fp
->sf_next
) {
143 for (faddrs
= faddrpp
; faddrs
< endaddrs
; faddrs
++) {
144 if (IN6_ARE_ADDR_EQUAL(*faddrs
,
146 /* check for laddr match */
147 if (sctp_saddr_lookup(sctp
, laddr
, 0)
150 mutex_exit(&tf
->tf_lock
);
157 /* no match; continue to the next in the chain */
160 mutex_exit(&tf
->tf_lock
);
165 listen_match(in6_addr_t
*laddr
, uint32_t ports
, zoneid_t zoneid
,
166 iaflags_t iraflags
, sctp_stack_t
*sctps
)
173 lport
= ((uint16_t *)&ports
)[1];
175 tf
= &(sctps
->sctps_listen_fanout
[SCTP_LISTEN_HASH(ntohs(lport
))]);
176 mutex_enter(&tf
->tf_lock
);
178 for (sctp
= tf
->tf_sctp
; sctp
; sctp
= sctp
->sctp_listen_hash_next
) {
179 connp
= sctp
->sctp_connp
;
180 if (lport
!= connp
->conn_lport
)
183 if (!(connp
->conn_zoneid
== zoneid
||
184 connp
->conn_allzones
))
187 if (sctp_saddr_lookup(sctp
, laddr
, 0) != NULL
) {
191 /* no match; continue to the next in the chain */
195 mutex_exit(&tf
->tf_lock
);
199 /* called by ipsec_sctp_pol */
201 sctp_find_conn(in6_addr_t
*src
, in6_addr_t
*dst
, uint32_t ports
,
202 zoneid_t zoneid
, iaflags_t iraflags
, sctp_stack_t
*sctps
)
206 sctp
= sctp_conn_match(&src
, 1, dst
, ports
, zoneid
, iraflags
, sctps
);
208 /* Not in conn fanout; check listen fanout */
209 sctp
= listen_match(dst
, ports
, zoneid
, iraflags
, sctps
);
213 return (sctp
->sctp_connp
);
217 * This is called from sctp_fanout() with IP header src & dst addresses.
218 * First call sctp_conn_match() to get a match by passing in src & dst
219 * addresses from IP header.
220 * However sctp_conn_match() can return no match under condition such as :
221 * A host can send an INIT ACK from a different address than the INIT was sent
222 * to (in a multi-homed env).
223 * According to RFC4960, a host can send additional addresses in an INIT
225 * Therefore extract all addresses from the INIT ACK chunk, pass to
226 * sctp_conn_match() to get a match.
229 sctp_lookup_by_faddrs(mblk_t
*mp
, sctp_hdr_t
*sctph
, in6_addr_t
*srcp
,
230 in6_addr_t
*dstp
, uint32_t ports
, zoneid_t zoneid
, sctp_stack_t
*sctps
,
234 sctp_chunk_hdr_t
*ich
;
235 sctp_init_chunk_t
*iack
;
237 ssize_t mlen
, remaining
;
238 uint16_t param_type
, addr_len
= PARM_ADDR4_LEN
;
240 in6_addr_t
**addrbuf
= NULL
, **faddrpp
= NULL
;
242 uint32_t totaddr
, nfaddr
= 0;
245 * If we get a match with the passed-in IP header src & dst addresses,
246 * quickly return the matched sctp.
248 if ((sctp
= sctp_conn_match(&srcp
, 1, dstp
, ports
, zoneid
, iraflags
,
254 * Currently sctph is set to NULL in icmp error fanout case
255 * (ip_fanout_sctp()).
256 * The above sctp_conn_match() should handle that, otherwise
257 * return no match found.
263 * Do a pullup again in case the previous one was partially successful,
264 * so try to complete the pullup here and have a single contiguous
265 * chunk for processing of entire INIT ACK chunk below.
267 if (mp
->b_cont
!= NULL
) {
268 if (pullupmsg(mp
, -1) == 0) {
273 mlen
= mp
->b_wptr
- (uchar_t
*)(sctph
+ 1);
274 if ((ich
= sctp_first_chunk((uchar_t
*)(sctph
+ 1), mlen
)) == NULL
) {
278 if (ich
->sch_id
== CHUNK_INIT_ACK
) {
279 remaining
= ntohs(ich
->sch_len
) - sizeof (*ich
) -
281 if (remaining
< sizeof (*ph
)) {
285 isv4
= (iraflags
& IRAF_IS_IPV4
) ? B_TRUE
: B_FALSE
;
287 addr_len
= PARM_ADDR6_LEN
;
288 totaddr
= remaining
/addr_len
;
290 iack
= (sctp_init_chunk_t
*)(ich
+ 1);
291 ph
= (sctp_parm_hdr_t
*)(iack
+ 1);
293 addrbuf
= (in6_addr_t
**)
294 kmem_zalloc(totaddr
* sizeof (in6_addr_t
*), KM_NOSLEEP
);
301 * According to RFC4960 :
302 * All integer fields in an SCTP packet MUST be
303 * transmitted in network byte order,
304 * unless otherwise stated.
305 * Therefore convert the param type to host byte order.
306 * Also do not add src address present in IP header
307 * as it has already been thru sctp_conn_match() above.
309 param_type
= ntohs(ph
->sph_type
);
310 switch (param_type
) {
312 IN6_INADDR_TO_V4MAPPED((struct in_addr
*)
314 if (IN6_ARE_ADDR_EQUAL(&src
, srcp
))
316 *faddrpp
= (in6_addr_t
*)
317 kmem_zalloc(sizeof (in6_addr_t
),
319 if (*faddrpp
== NULL
)
321 IN6_INADDR_TO_V4MAPPED((struct in_addr
*)
327 *faddrpp
= (in6_addr_t
*)(ph
+ 1);
328 if (IN6_ARE_ADDR_EQUAL(*faddrpp
, srcp
))
336 ph
= sctp_next_parm(ph
, &remaining
);
339 ASSERT(nfaddr
< totaddr
);
342 sctp
= sctp_conn_match(addrbuf
, nfaddr
, dstp
, ports
,
343 zoneid
, iraflags
, sctps
);
346 for (faddrpp
= addrbuf
; nfaddr
> 0;
347 faddrpp
++, nfaddr
--) {
348 if (IN6_IS_ADDR_V4MAPPED(*faddrpp
)) {
350 sizeof (in6_addr_t
));
355 kmem_free(addrbuf
, totaddr
* sizeof (in6_addr_t
*));
361 * Fanout to a sctp instance.
364 sctp_fanout(in6_addr_t
*src
, in6_addr_t
*dst
, uint32_t ports
,
365 ip_recv_attr_t
*ira
, mblk_t
*mp
, sctp_stack_t
*sctps
, sctp_hdr_t
*sctph
)
367 zoneid_t zoneid
= ira
->ira_zoneid
;
368 iaflags_t iraflags
= ira
->ira_flags
;
371 sctp
= sctp_lookup_by_faddrs(mp
, sctph
, src
, dst
, ports
, zoneid
,
374 /* Not in conn fanout; check listen fanout */
375 sctp
= listen_match(dst
, ports
, zoneid
, iraflags
, sctps
);
380 * For labeled systems, there's no need to check the
381 * label here. It's known to be good as we checked
382 * before allowing the connection to become bound.
384 return (sctp
->sctp_connp
);
388 * Fanout for ICMP errors for SCTP
389 * The caller puts <fport, lport> in the ports parameter.
392 ip_fanout_sctp(mblk_t
*mp
, ipha_t
*ipha
, ip6_t
*ip6h
, uint32_t ports
,
397 in6_addr_t map_src
, map_dst
;
398 in6_addr_t
*src
, *dst
;
400 ill_t
*ill
= ira
->ira_ill
;
401 ip_stack_t
*ipst
= ill
->ill_ipst
;
402 netstack_t
*ns
= ipst
->ips_netstack
;
403 ipsec_stack_t
*ipss
= ns
->netstack_ipsec
;
404 sctp_stack_t
*sctps
= ns
->netstack_sctp
;
405 iaflags_t iraflags
= ira
->ira_flags
;
406 ill_t
*rill
= ira
->ira_rill
;
408 ASSERT(iraflags
& IRAF_ICMP_ERROR
);
410 secure
= iraflags
& IRAF_IPSEC_SECURE
;
412 /* Assume IP provides aligned packets - otherwise toss */
413 if (!OK_32PTR(mp
->b_rptr
)) {
414 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
415 ip_drop_input("ipIfStatsInDiscards", mp
, ill
);
420 if (!(iraflags
& IRAF_IS_IPV4
)) {
421 src
= &ip6h
->ip6_src
;
422 dst
= &ip6h
->ip6_dst
;
424 IN6_IPADDR_TO_V4MAPPED(ipha
->ipha_src
, &map_src
);
425 IN6_IPADDR_TO_V4MAPPED(ipha
->ipha_dst
, &map_dst
);
429 connp
= sctp_fanout(src
, dst
, ports
, ira
, mp
, sctps
, NULL
);
431 ip_fanout_sctp_raw(mp
, ipha
, ip6h
, ports
, ira
);
434 sctp
= CONN2SCTP(connp
);
437 * We check some fields in conn_t without holding a lock.
438 * This should be fine.
440 if (((iraflags
& IRAF_IS_IPV4
) ?
441 CONN_INBOUND_POLICY_PRESENT(connp
, ipss
) :
442 CONN_INBOUND_POLICY_PRESENT_V6(connp
, ipss
)) ||
444 mp
= ipsec_check_inbound_policy(mp
, connp
, ipha
,
452 ira
->ira_ill
= ira
->ira_rill
= NULL
;
454 mutex_enter(&sctp
->sctp_lock
);
455 if (sctp
->sctp_running
) {
456 sctp_add_recvq(sctp
, mp
, B_FALSE
, ira
);
457 mutex_exit(&sctp
->sctp_lock
);
459 sctp
->sctp_running
= B_TRUE
;
460 mutex_exit(&sctp
->sctp_lock
);
462 mutex_enter(&sctp
->sctp_recvq_lock
);
463 if (sctp
->sctp_recvq
!= NULL
) {
464 sctp_add_recvq(sctp
, mp
, B_TRUE
, ira
);
465 mutex_exit(&sctp
->sctp_recvq_lock
);
468 mutex_exit(&sctp
->sctp_recvq_lock
);
469 if (ira
->ira_flags
& IRAF_ICMP_ERROR
) {
470 sctp_icmp_error(sctp
, mp
);
472 sctp_input_data(sctp
, mp
, ira
);
479 ira
->ira_rill
= rill
;
483 sctp_conn_hash_remove(sctp_t
*sctp
)
485 sctp_tf_t
*tf
= sctp
->sctp_conn_tfp
;
491 mutex_enter(&tf
->tf_lock
);
493 if (tf
->tf_sctp
== sctp
) {
494 tf
->tf_sctp
= sctp
->sctp_conn_hash_next
;
495 if (sctp
->sctp_conn_hash_next
) {
496 ASSERT(tf
->tf_sctp
->sctp_conn_hash_prev
== sctp
);
497 tf
->tf_sctp
->sctp_conn_hash_prev
= NULL
;
500 ASSERT(sctp
->sctp_conn_hash_prev
);
501 ASSERT(sctp
->sctp_conn_hash_prev
->sctp_conn_hash_next
== sctp
);
502 sctp
->sctp_conn_hash_prev
->sctp_conn_hash_next
=
503 sctp
->sctp_conn_hash_next
;
505 if (sctp
->sctp_conn_hash_next
) {
506 ASSERT(sctp
->sctp_conn_hash_next
->sctp_conn_hash_prev
508 sctp
->sctp_conn_hash_next
->sctp_conn_hash_prev
=
509 sctp
->sctp_conn_hash_prev
;
512 sctp
->sctp_conn_hash_next
= NULL
;
513 sctp
->sctp_conn_hash_prev
= NULL
;
514 sctp
->sctp_conn_tfp
= NULL
;
515 mutex_exit(&tf
->tf_lock
);
519 sctp_conn_hash_insert(sctp_tf_t
*tf
, sctp_t
*sctp
, int caller_holds_lock
)
521 if (sctp
->sctp_conn_tfp
) {
522 sctp_conn_hash_remove(sctp
);
525 if (!caller_holds_lock
) {
526 mutex_enter(&tf
->tf_lock
);
528 ASSERT(MUTEX_HELD(&tf
->tf_lock
));
531 sctp
->sctp_conn_hash_next
= tf
->tf_sctp
;
533 tf
->tf_sctp
->sctp_conn_hash_prev
= sctp
;
535 sctp
->sctp_conn_hash_prev
= NULL
;
537 sctp
->sctp_conn_tfp
= tf
;
538 if (!caller_holds_lock
) {
539 mutex_exit(&tf
->tf_lock
);
544 sctp_listen_hash_remove(sctp_t
*sctp
)
546 sctp_tf_t
*tf
= sctp
->sctp_listen_tfp
;
552 mutex_enter(&tf
->tf_lock
);
554 if (tf
->tf_sctp
== sctp
) {
555 tf
->tf_sctp
= sctp
->sctp_listen_hash_next
;
556 if (sctp
->sctp_listen_hash_next
!= NULL
) {
557 ASSERT(tf
->tf_sctp
->sctp_listen_hash_prev
== sctp
);
558 tf
->tf_sctp
->sctp_listen_hash_prev
= NULL
;
561 ASSERT(sctp
->sctp_listen_hash_prev
);
562 ASSERT(sctp
->sctp_listen_hash_prev
->sctp_listen_hash_next
==
564 ASSERT(sctp
->sctp_listen_hash_next
== NULL
||
565 sctp
->sctp_listen_hash_next
->sctp_listen_hash_prev
== sctp
);
567 sctp
->sctp_listen_hash_prev
->sctp_listen_hash_next
=
568 sctp
->sctp_listen_hash_next
;
570 if (sctp
->sctp_listen_hash_next
!= NULL
) {
571 sctp_t
*next
= sctp
->sctp_listen_hash_next
;
573 ASSERT(next
->sctp_listen_hash_prev
== sctp
);
574 next
->sctp_listen_hash_prev
=
575 sctp
->sctp_listen_hash_prev
;
578 sctp
->sctp_listen_hash_next
= NULL
;
579 sctp
->sctp_listen_hash_prev
= NULL
;
580 sctp
->sctp_listen_tfp
= NULL
;
581 mutex_exit(&tf
->tf_lock
);
585 sctp_listen_hash_insert(sctp_tf_t
*tf
, sctp_t
*sctp
)
587 if (sctp
->sctp_listen_tfp
) {
588 sctp_listen_hash_remove(sctp
);
591 mutex_enter(&tf
->tf_lock
);
592 sctp
->sctp_listen_hash_next
= tf
->tf_sctp
;
594 tf
->tf_sctp
->sctp_listen_hash_prev
= sctp
;
596 sctp
->sctp_listen_hash_prev
= NULL
;
598 sctp
->sctp_listen_tfp
= tf
;
599 mutex_exit(&tf
->tf_lock
);
603 * Hash list insertion routine for sctp_t structures.
604 * Inserts entries with the ones bound to a specific IP address first
605 * followed by those bound to INADDR_ANY.
608 sctp_bind_hash_insert(sctp_tf_t
*tbf
, sctp_t
*sctp
, int caller_holds_lock
)
613 if (sctp
->sctp_ptpbhn
!= NULL
) {
614 ASSERT(!caller_holds_lock
);
615 sctp_bind_hash_remove(sctp
);
617 sctpp
= &tbf
->tf_sctp
;
618 if (!caller_holds_lock
) {
619 mutex_enter(&tbf
->tf_lock
);
621 ASSERT(MUTEX_HELD(&tbf
->tf_lock
));
625 sctpnext
->sctp_ptpbhn
= &sctp
->sctp_bind_hash
;
627 sctp
->sctp_bind_hash
= sctpnext
;
628 sctp
->sctp_ptpbhn
= sctpp
;
630 /* For sctp_*_hash_remove */
631 sctp
->sctp_bind_lockp
= &tbf
->tf_lock
;
632 if (!caller_holds_lock
)
633 mutex_exit(&tbf
->tf_lock
);
637 * Hash list removal routine for sctp_t structures.
640 sctp_bind_hash_remove(sctp_t
*sctp
)
645 lockp
= sctp
->sctp_bind_lockp
;
647 if (sctp
->sctp_ptpbhn
== NULL
)
650 ASSERT(lockp
!= NULL
);
652 if (sctp
->sctp_ptpbhn
) {
653 sctpnext
= sctp
->sctp_bind_hash
;
655 sctpnext
->sctp_ptpbhn
= sctp
->sctp_ptpbhn
;
656 sctp
->sctp_bind_hash
= NULL
;
658 *sctp
->sctp_ptpbhn
= sctpnext
;
659 sctp
->sctp_ptpbhn
= NULL
;
662 sctp
->sctp_bind_lockp
= NULL
;
666 * Similar to but different from sctp_conn_match().
668 * Matches sets of addresses as follows: if the argument addr set is
669 * a complete subset of the corresponding addr set in the sctp_t, it
672 * Caller must hold tf->tf_lock.
674 * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
677 sctp_lookup(sctp_t
*sctp1
, in6_addr_t
*faddr
, sctp_tf_t
*tf
, uint32_t *ports
,
683 ASSERT(MUTEX_HELD(&tf
->tf_lock
));
685 for (sctp
= tf
->tf_sctp
; sctp
!= NULL
;
686 sctp
= sctp
->sctp_conn_hash_next
) {
687 if (*ports
!= sctp
->sctp_connp
->conn_ports
||
688 sctp
->sctp_state
< min_state
) {
692 /* check for faddr match */
693 for (fp
= sctp
->sctp_faddrs
; fp
!= NULL
; fp
= fp
->sf_next
) {
694 if (IN6_ARE_ADDR_EQUAL(faddr
, &fp
->sf_faddr
)) {
700 /* no faddr match; keep looking */
705 * There is an existing association with the same peer
706 * address. So now we need to check if our local address
707 * set overlaps with the one of the existing association.
708 * If they overlap, we should return it.
710 if (sctp_compare_saddrs(sctp1
, sctp
) <= SCTP_ADDR_OVERLAP
) {
714 /* no match; continue searching */