4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/strsubr.h>
31 #include <sys/sunddi.h>
33 #include <sys/socket.h>
34 #include <sys/random.h>
35 #include <sys/tsol/tndb.h>
36 #include <sys/tsol/tnet.h>
38 #include <netinet/in.h>
39 #include <netinet/ip6.h>
40 #include <netinet/sctp.h>
42 #include <inet/common.h>
45 #include <inet/ip_ire.h>
46 #include <inet/ip_if.h>
47 #include <inet/ip_ndp.h>
48 #include <inet/mib2.h>
50 #include <inet/optcom.h>
51 #include <inet/sctp_ip.h>
52 #include <inet/ipclassifier.h>
54 #include "sctp_impl.h"
55 #include "sctp_addr.h"
56 #include "sctp_asconf.h"
58 static struct kmem_cache
*sctp_kmem_faddr_cache
;
59 static void sctp_init_faddr(sctp_t
*, sctp_faddr_t
*, in6_addr_t
*, mblk_t
*);
61 /* Set the source address. Refer to comments in sctp_get_dest(). */
63 sctp_set_saddr(sctp_t
*sctp
, sctp_faddr_t
*fp
)
65 boolean_t v6
= !fp
->sf_isv4
;
68 fp
->sf_saddr
= sctp_get_valid_addr(sctp
, v6
, &addr_set
);
70 * If there is no source address avaialble, mark this peer address
71 * as unreachable for now. When the heartbeat timer fires, it will
72 * call sctp_get_dest() to re-check if there is any source address
76 fp
->sf_state
= SCTP_FADDRS_UNREACH
;
80 * Call this function to get information about a peer addr fp.
82 * Uses ip_attr_connect to avoid explicit use of ire and source address
86 sctp_get_dest(sctp_t
*sctp
, sctp_faddr_t
*fp
)
90 sctp_saddr_ipif_t
*sp
;
92 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
93 conn_t
*connp
= sctp
->sctp_connp
;
97 uint32_t flags
= IPDF_VERIFY_DST
| IPDF_IPSEC
|
98 IPDF_SELECT_SRC
| IPDF_UNIQUE_DCE
;
101 * Tell sctp_make_mp it needs to call us again should we not
102 * complete and set the saddr.
104 fp
->sf_saddr
= ipv6_all_zeros
;
107 * If this addr is not reachable, mark it as unconfirmed for now, the
108 * state will be changed back to unreachable later in this function
109 * if it is still the case.
111 if (fp
->sf_state
== SCTP_FADDRS_UNREACH
) {
112 fp
->sf_state
= SCTP_FADDRS_UNCONFIRMED
;
116 * Socket is connected - enable PMTU discovery.
118 if (!sctps
->sctps_ignore_path_mtu
)
119 fp
->sf_ixa
->ixa_flags
|= IXAF_PMTU_DISCOVERY
;
121 ip_attr_nexthop(&connp
->conn_xmit_ipp
, fp
->sf_ixa
, &fp
->sf_faddr
,
124 laddr
= fp
->sf_saddr
;
125 error
= ip_attr_connect(connp
, fp
->sf_ixa
, &laddr
, &fp
->sf_faddr
,
126 &nexthop
, connp
->conn_fport
, &laddr
, &uinfo
, flags
);
129 dprint(3, ("sctp_get_dest: no ire for %x:%x:%x:%x\n",
130 SCTP_PRINTADDR(fp
->sf_faddr
)));
132 * It is tempting to just leave the src addr
133 * unspecified and let IP figure it out, but we
134 * *cannot* do this, since IP may choose a src addr
135 * that is not part of this association... unless
136 * this sctp has bound to all addrs. So if the dest
137 * lookup fails, try to find one in our src addr
138 * list, unless the sctp has bound to all addrs, in
139 * which case we change the src addr to unspec.
141 * Note that if this is a v6 endpoint but it does
142 * not have any v4 address at this point (e.g. may
143 * have been deleted), sctp_get_valid_addr() will
144 * return mapped INADDR_ANY. In this case, this
145 * address should be marked not reachable so that
146 * it won't be used to send data.
148 sctp_set_saddr(sctp
, fp
);
149 if (fp
->sf_state
== SCTP_FADDRS_UNREACH
)
153 ASSERT(fp
->sf_ixa
->ixa_ire
!= NULL
);
154 ASSERT(!(fp
->sf_ixa
->ixa_ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)));
156 if (!sctp
->sctp_loopback
)
157 sctp
->sctp_loopback
= uinfo
.iulp_loopback
;
159 /* Make sure the laddr is part of this association */
160 if ((sp
= sctp_saddr_lookup(sctp
, &laddr
, 0)) != NULL
&&
161 !sp
->saddr_ipif_dontsrc
) {
162 if (sp
->saddr_ipif_unconfirmed
== 1)
163 sp
->saddr_ipif_unconfirmed
= 0;
164 /* We did IPsec policy lookup for laddr already */
165 fp
->sf_saddr
= laddr
;
167 dprint(2, ("sctp_get_dest: src addr is not part of assoc "
168 "%x:%x:%x:%x\n", SCTP_PRINTADDR(laddr
)));
171 * Set the src to the first saddr and hope for the best.
172 * Note that this case should very seldomly
173 * happen. One scenario this can happen is an app
174 * explicitly bind() to an address. But that address is
175 * not the preferred source address to send to the peer.
177 sctp_set_saddr(sctp
, fp
);
178 if (fp
->sf_state
== SCTP_FADDRS_UNREACH
) {
184 * Pull out RTO information for this faddr and use it if we don't
187 if (fp
->sf_srtt
== -1 && uinfo
.iulp_rtt
!= 0) {
188 /* The cached value is in ms. */
189 fp
->sf_srtt
= MSEC_TO_TICK(uinfo
.iulp_rtt
);
190 fp
->sf_rttvar
= MSEC_TO_TICK(uinfo
.iulp_rtt_sd
);
191 fp
->sf_rto
= 3 * fp
->sf_srtt
;
193 /* Bound the RTO by configured min and max values */
194 if (fp
->sf_rto
< sctp
->sctp_rto_min
) {
195 fp
->sf_rto
= sctp
->sctp_rto_min
;
197 if (fp
->sf_rto
> sctp
->sctp_rto_max
) {
198 fp
->sf_rto
= sctp
->sctp_rto_max
;
200 SCTP_MAX_RTO(sctp
, fp
);
202 pmtu
= uinfo
.iulp_mtu
;
205 * Record the MTU for this faddr. If the MTU for this faddr has
206 * changed, check if the assc MTU will also change.
209 hdrlen
= sctp
->sctp_hdr_len
;
211 hdrlen
= sctp
->sctp_hdr6_len
;
213 if ((fp
->sf_pmss
+ hdrlen
) != pmtu
) {
214 /* Make sure that sf_pmss is a multiple of SCTP_ALIGN. */
215 fp
->sf_pmss
= (pmtu
- hdrlen
) & ~(SCTP_ALIGN
- 1);
216 if (fp
->sf_cwnd
< (fp
->sf_pmss
* 2)) {
217 SET_CWND(fp
, fp
->sf_pmss
,
218 sctps
->sctps_slow_start_initial
);
223 if (fp
== sctp
->sctp_current
)
224 sctp_set_faddr_current(sctp
, fp
);
228 sctp_update_dce(sctp_t
*sctp
)
231 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
233 ip_stack_t
*ipst
= sctps
->sctps_netstack
->netstack_ip
;
236 for (fp
= sctp
->sctp_faddrs
; fp
!= NULL
; fp
= fp
->sf_next
) {
237 bzero(&uinfo
, sizeof (uinfo
));
239 * Only record the PMTU for this faddr if we actually have
240 * done discovery. This prevents initialized default from
241 * clobbering any real info that IP may have.
243 if (fp
->sf_pmtu_discovered
) {
245 uinfo
.iulp_mtu
= fp
->sf_pmss
+
248 uinfo
.iulp_mtu
= fp
->sf_pmss
+
252 if (sctps
->sctps_rtt_updates
!= 0 &&
253 fp
->sf_rtt_updates
>= sctps
->sctps_rtt_updates
) {
255 * dce_update_uinfo() merges these values with the
258 uinfo
.iulp_rtt
= TICK_TO_MSEC(fp
->sf_srtt
);
259 uinfo
.iulp_rtt_sd
= TICK_TO_MSEC(fp
->sf_rttvar
);
260 fp
->sf_rtt_updates
= 0;
263 if (IN6_IS_ADDR_LINKSCOPE(&fp
->sf_faddr
)) {
265 * If we are going to create a DCE we'd better have
268 if (fp
->sf_ixa
->ixa_nce
!= NULL
) {
269 ifindex
= fp
->sf_ixa
->ixa_nce
->nce_common
->
270 ncec_ill
->ill_phyint
->phyint_ifindex
;
276 (void) dce_update_uinfo(&fp
->sf_faddr
, ifindex
, &uinfo
, ipst
);
281 * The sender must later set the total length in the IP header.
284 sctp_make_mp(sctp_t
*sctp
, sctp_faddr_t
*fp
, int trailer
)
289 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
290 boolean_t src_changed
= B_FALSE
;
295 if (SCTP_IS_ADDR_UNSPEC(isv4
, fp
->sf_saddr
) ||
296 (fp
->sf_ixa
->ixa_ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
))) {
297 /* Need to pick a source */
298 sctp_get_dest(sctp
, fp
);
300 * Although we still may not get an IRE, the source address
301 * may be changed in sctp_get_ire(). Set src_changed to
302 * true so that the source address is copied again.
304 src_changed
= B_TRUE
;
307 /* There is no suitable source address to use, return. */
308 if (fp
->sf_state
== SCTP_FADDRS_UNREACH
)
311 ASSERT(fp
->sf_ixa
->ixa_ire
!= NULL
);
312 ASSERT(!SCTP_IS_ADDR_UNSPEC(isv4
, fp
->sf_saddr
));
315 ipsctplen
= sctp
->sctp_hdr_len
;
317 ipsctplen
= sctp
->sctp_hdr6_len
;
320 mp
= allocb(ipsctplen
+ sctps
->sctps_wroff_xtra
+ trailer
, BPRI_MED
);
322 ip1dbg(("sctp_make_mp: error making mp..\n"));
325 mp
->b_rptr
+= sctps
->sctps_wroff_xtra
;
326 mp
->b_wptr
= mp
->b_rptr
+ ipsctplen
;
328 ASSERT(OK_32PTR(mp
->b_wptr
));
331 ipha_t
*iph
= (ipha_t
*)mp
->b_rptr
;
333 bcopy(sctp
->sctp_iphc
, mp
->b_rptr
, ipsctplen
);
334 if (fp
!= sctp
->sctp_current
|| src_changed
) {
335 /* Fix the source and destination addresses. */
336 IN6_V4MAPPED_TO_IPADDR(&fp
->sf_faddr
, iph
->ipha_dst
);
337 IN6_V4MAPPED_TO_IPADDR(&fp
->sf_saddr
, iph
->ipha_src
);
339 /* set or clear the don't fragment bit */
341 iph
->ipha_fragment_offset_and_flags
= htons(IPH_DF
);
343 iph
->ipha_fragment_offset_and_flags
= 0;
346 bcopy(sctp
->sctp_iphc6
, mp
->b_rptr
, ipsctplen
);
347 if (fp
!= sctp
->sctp_current
|| src_changed
) {
348 /* Fix the source and destination addresses. */
349 ((ip6_t
*)(mp
->b_rptr
))->ip6_dst
= fp
->sf_faddr
;
350 ((ip6_t
*)(mp
->b_rptr
))->ip6_src
= fp
->sf_saddr
;
353 ASSERT(sctp
->sctp_connp
!= NULL
);
358 * Notify upper layers about preferred write offset, write size.
361 sctp_set_ulp_prop(sctp_t
*sctp
)
364 struct sock_proto_props sopp
;
366 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
368 if (sctp
->sctp_current
->sf_isv4
) {
369 hdrlen
= sctp
->sctp_hdr_len
;
371 hdrlen
= sctp
->sctp_hdr6_len
;
373 ASSERT(sctp
->sctp_ulpd
);
375 sctp
->sctp_connp
->conn_wroff
= sctps
->sctps_wroff_xtra
+ hdrlen
+
376 sizeof (sctp_data_hdr_t
);
378 ASSERT(sctp
->sctp_current
->sf_pmss
== sctp
->sctp_mss
);
379 bzero(&sopp
, sizeof (sopp
));
380 sopp
.sopp_flags
= SOCKOPT_MAXBLK
|SOCKOPT_WROFF
;
381 sopp
.sopp_wroff
= sctp
->sctp_connp
->conn_wroff
;
382 sopp
.sopp_maxblk
= sctp
->sctp_mss
- sizeof (sctp_data_hdr_t
);
383 sctp
->sctp_ulp_prop(sctp
->sctp_ulpd
, &sopp
);
387 * Set the lengths in the packet and the transmit attributes.
390 sctp_set_iplen(sctp_t
*sctp
, mblk_t
*mp
, ip_xmit_attr_t
*ixa
)
398 isv4
= (IPH_HDR_VERSION(mp
->b_rptr
) == IPV4_VERSION
);
399 for (; pmp
; pmp
= pmp
->b_cont
)
400 sum
+= pmp
->b_wptr
- pmp
->b_rptr
;
402 ixa
->ixa_pktlen
= sum
;
404 iph
= (ipha_t
*)mp
->b_rptr
;
405 iph
->ipha_length
= htons(sum
);
406 ixa
->ixa_ip_hdr_length
= sctp
->sctp_ip_hdr_len
;
408 ip6h
= (ip6_t
*)mp
->b_rptr
;
409 ip6h
->ip6_plen
= htons(sum
- IPV6_HDR_LEN
);
410 ixa
->ixa_ip_hdr_length
= sctp
->sctp_ip_hdr6_len
;
415 sctp_compare_faddrsets(sctp_faddr_t
*a1
, sctp_faddr_t
*a2
)
421 sctp_faddr_t
*fp1
, *fp2
;
423 for (fp1
= a1
; fp1
; fp1
= fp1
->sf_next
) {
425 for (fp2
= a2
; fp2
; fp2
= fp2
->sf_next
) {
426 if (IN6_ARE_ADDR_EQUAL(&fp1
->sf_faddr
,
440 return (SCTP_ADDR_EQUAL
);
442 if (overlap
== na1
) {
443 return (SCTP_ADDR_SUBSET
);
446 return (SCTP_ADDR_OVERLAP
);
448 return (SCTP_ADDR_DISJOINT
);
452 * Returns 0 on success, ENOMEM on memory allocation failure, EHOSTUNREACH
453 * if the connection credentials fail remote host accreditation or
454 * if the new destination does not support the previously established
455 * connection security label. If sleep is true, this function should
456 * never fail for a memory allocation failure. The boolean parameter
457 * "first" decides whether the newly created faddr structure should be
458 * added at the beginning of the list or at the end.
460 * Note: caller must hold conn fanout lock.
463 sctp_add_faddr(sctp_t
*sctp
, in6_addr_t
*addr
, int sleep
, boolean_t first
)
468 conn_t
*connp
= sctp
->sctp_connp
;
470 if (is_system_labeled()) {
471 ip_xmit_attr_t
*ixa
= connp
->conn_ixa
;
472 ts_label_t
*effective_tsl
= NULL
;
474 ASSERT(ixa
->ixa_tsl
!= NULL
);
477 * Verify the destination is allowed to receive packets
478 * at the security label of the connection we are initiating.
480 * tsol_check_dest() will create a new effective label for
481 * this connection with a modified label or label flags only
482 * if there are changes from the original label.
484 * Accept whatever label we get if this is the first
485 * destination address for this connection. The security
486 * label and label flags must match any previuous settings
487 * for all subsequent destination addresses.
489 if (IN6_IS_ADDR_V4MAPPED(addr
)) {
491 IN6_V4MAPPED_TO_IPADDR(addr
, dst
);
492 err
= tsol_check_dest(ixa
->ixa_tsl
,
493 &dst
, IPV4_VERSION
, connp
->conn_mac_mode
,
494 connp
->conn_zone_is_global
, &effective_tsl
);
496 err
= tsol_check_dest(ixa
->ixa_tsl
,
497 addr
, IPV6_VERSION
, connp
->conn_mac_mode
,
498 connp
->conn_zone_is_global
, &effective_tsl
);
503 if (sctp
->sctp_faddrs
== NULL
&& effective_tsl
!= NULL
) {
504 ip_xmit_attr_replace_tsl(ixa
, effective_tsl
);
505 } else if (effective_tsl
!= NULL
) {
506 label_rele(effective_tsl
);
507 return (EHOSTUNREACH
);
511 if ((faddr
= kmem_cache_alloc(sctp_kmem_faddr_cache
, sleep
)) == NULL
)
513 bzero(faddr
, sizeof (*faddr
));
514 timer_mp
= sctp_timer_alloc((sctp
), sctp_rexmit_timer
, sleep
);
515 if (timer_mp
== NULL
) {
516 kmem_cache_free(sctp_kmem_faddr_cache
, faddr
);
519 ((sctpt_t
*)(timer_mp
->b_rptr
))->sctpt_faddr
= faddr
;
521 /* Start with any options set on the conn */
522 faddr
->sf_ixa
= conn_get_ixa_exclusive(connp
);
523 if (faddr
->sf_ixa
== NULL
) {
525 kmem_cache_free(sctp_kmem_faddr_cache
, faddr
);
528 faddr
->sf_ixa
->ixa_notify_cookie
= connp
->conn_sctp
;
530 sctp_init_faddr(sctp
, faddr
, addr
, timer_mp
);
531 ASSERT(faddr
->sf_ixa
->ixa_cred
!= NULL
);
533 /* ip_attr_connect didn't allow broadcats/multicast dest */
534 ASSERT(faddr
->sf_next
== NULL
);
536 if (sctp
->sctp_faddrs
== NULL
) {
537 ASSERT(sctp
->sctp_lastfaddr
== NULL
);
538 /* only element on list; first and last are same */
539 sctp
->sctp_faddrs
= sctp
->sctp_lastfaddr
= faddr
;
541 ASSERT(sctp
->sctp_lastfaddr
!= NULL
);
542 faddr
->sf_next
= sctp
->sctp_faddrs
;
543 sctp
->sctp_faddrs
= faddr
;
545 sctp
->sctp_lastfaddr
->sf_next
= faddr
;
546 sctp
->sctp_lastfaddr
= faddr
;
548 sctp
->sctp_nfaddrs
++;
554 sctp_lookup_faddr(sctp_t
*sctp
, in6_addr_t
*addr
)
558 for (fp
= sctp
->sctp_faddrs
; fp
!= NULL
; fp
= fp
->sf_next
) {
559 if (IN6_ARE_ADDR_EQUAL(&fp
->sf_faddr
, addr
))
567 sctp_lookup_faddr_nosctp(sctp_faddr_t
*fp
, in6_addr_t
*addr
)
569 for (; fp
; fp
= fp
->sf_next
) {
570 if (IN6_ARE_ADDR_EQUAL(&fp
->sf_faddr
, addr
)) {
579 * To change the currently used peer address to the specified one.
582 sctp_set_faddr_current(sctp_t
*sctp
, sctp_faddr_t
*fp
)
584 /* Now setup the composite header. */
586 IN6_V4MAPPED_TO_IPADDR(&fp
->sf_faddr
,
587 sctp
->sctp_ipha
->ipha_dst
);
588 IN6_V4MAPPED_TO_IPADDR(&fp
->sf_saddr
,
589 sctp
->sctp_ipha
->ipha_src
);
590 /* update don't fragment bit */
592 sctp
->sctp_ipha
->ipha_fragment_offset_and_flags
=
595 sctp
->sctp_ipha
->ipha_fragment_offset_and_flags
= 0;
598 sctp
->sctp_ip6h
->ip6_dst
= fp
->sf_faddr
;
599 sctp
->sctp_ip6h
->ip6_src
= fp
->sf_saddr
;
602 sctp
->sctp_current
= fp
;
603 sctp
->sctp_mss
= fp
->sf_pmss
;
605 /* Update the uppper layer for the change. */
606 if (!SCTP_IS_DETACHED(sctp
))
607 sctp_set_ulp_prop(sctp
);
611 sctp_redo_faddr_srcs(sctp_t
*sctp
)
615 for (fp
= sctp
->sctp_faddrs
; fp
!= NULL
; fp
= fp
->sf_next
) {
616 sctp_get_dest(sctp
, fp
);
621 sctp_faddr_alive(sctp_t
*sctp
, sctp_faddr_t
*fp
)
623 int64_t now
= LBOLT_FASTPATH64
;
626 * If we are under memory pressure, we abort association waiting
627 * in zero window probing state for too long. We do this by not
628 * resetting sctp_strikes. So if sctp_zero_win_probe continues
629 * while under memory pressure, this association will eventually
632 if (!sctp
->sctp_zero_win_probe
|| !sctp
->sctp_sctps
->sctps_reclaim
) {
633 sctp
->sctp_strikes
= 0;
636 fp
->sf_lastactive
= now
;
637 fp
->sf_hb_expiry
= now
+ SET_HB_INTVL(fp
);
638 fp
->sf_hb_pending
= B_FALSE
;
639 if (fp
->sf_state
!= SCTP_FADDRS_ALIVE
) {
640 fp
->sf_state
= SCTP_FADDRS_ALIVE
;
641 sctp_intf_event(sctp
, fp
->sf_faddr
, SCTP_ADDR_AVAILABLE
, 0);
642 /* Should have a full IRE now */
643 sctp_get_dest(sctp
, fp
);
646 * If this is the primary, switch back to it now. And
647 * we probably want to reset the source addr used to reach
649 * Note that if we didn't find a source in sctp_get_dest
650 * then we'd be unreachable at this point in time.
652 if (fp
== sctp
->sctp_primary
&&
653 fp
->sf_state
!= SCTP_FADDRS_UNREACH
) {
654 sctp_set_faddr_current(sctp
, fp
);
661 * Return B_TRUE if there is still an active peer address with zero strikes;
662 * otherwise rturn B_FALSE.
665 sctp_is_a_faddr_clean(sctp_t
*sctp
)
669 for (fp
= sctp
->sctp_faddrs
; fp
; fp
= fp
->sf_next
) {
670 if (fp
->sf_state
== SCTP_FADDRS_ALIVE
&& fp
->sf_strikes
== 0) {
679 * Returns 0 if there is at leave one other active faddr, -1 if there
680 * are none. If there are none left, faddr_dead() will start killing the
682 * If the downed faddr was the current faddr, a new current faddr
686 sctp_faddr_dead(sctp_t
*sctp
, sctp_faddr_t
*fp
, int newstate
)
689 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
691 if (fp
->sf_state
== SCTP_FADDRS_ALIVE
) {
692 sctp_intf_event(sctp
, fp
->sf_faddr
, SCTP_ADDR_UNREACHABLE
, 0);
694 fp
->sf_state
= newstate
;
696 dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n",
697 SCTP_PRINTADDR(fp
->sf_faddr
), newstate
));
699 if (fp
== sctp
->sctp_current
) {
700 /* Current faddr down; need to switch it */
701 sctp
->sctp_current
= NULL
;
704 /* Find next alive faddr */
706 for (fp
= fp
->sf_next
; fp
!= NULL
; fp
= fp
->sf_next
) {
707 if (fp
->sf_state
== SCTP_FADDRS_ALIVE
) {
713 /* Continue from beginning of list */
714 for (fp
= sctp
->sctp_faddrs
; fp
!= ofp
; fp
= fp
->sf_next
) {
715 if (fp
->sf_state
== SCTP_FADDRS_ALIVE
) {
722 * Find a new fp, so if the current faddr is dead, use the new fp
723 * as the current one.
726 if (sctp
->sctp_current
== NULL
) {
727 dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n",
728 SCTP_PRINTADDR(fp
->sf_faddr
)));
730 * Note that we don't need to reset the source addr
733 sctp_set_faddr_current(sctp
, fp
);
739 /* All faddrs are down; kill the association */
740 dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n"));
741 SCTPS_BUMP_MIB(sctps
, sctpAborted
);
742 sctp_assoc_event(sctp
, sctp
->sctp_state
< SCTPS_ESTABLISHED
?
743 SCTP_CANT_STR_ASSOC
: SCTP_COMM_LOST
, 0, NULL
);
744 sctp_clean_death(sctp
, sctp
->sctp_client_errno
?
745 sctp
->sctp_client_errno
: ETIMEDOUT
);
751 sctp_rotate_faddr(sctp_t
*sctp
, sctp_faddr_t
*ofp
)
753 sctp_faddr_t
*nfp
= NULL
;
754 sctp_faddr_t
*saved_fp
= NULL
;
758 ofp
= sctp
->sctp_current
;
761 if (sctp
->sctp_nfaddrs
< 2)
765 * Find the next live peer address with zero strikes. In case
766 * there is none, find the one with the lowest number of strikes.
768 min_strikes
= ofp
->sf_strikes
;
771 /* If reached end of list, continue scan from the head */
773 nfp
= sctp
->sctp_faddrs
;
776 if (nfp
->sf_state
== SCTP_FADDRS_ALIVE
) {
777 if (nfp
->sf_strikes
== 0)
779 if (nfp
->sf_strikes
< min_strikes
) {
780 min_strikes
= nfp
->sf_strikes
;
786 /* If reached the old address, there is no zero strike path */
791 * If there is a peer address with zero strikes we use that, if not
792 * return a peer address with fewer strikes than the one last used,
793 * if neither exist we may as well stay with the old one.
797 if (saved_fp
!= NULL
)
803 sctp_unlink_faddr(sctp_t
*sctp
, sctp_faddr_t
*fp
)
807 if (!sctp
->sctp_faddrs
) {
811 if (fp
->sf_timer_mp
!= NULL
) {
812 sctp_timer_free(fp
->sf_timer_mp
);
813 fp
->sf_timer_mp
= NULL
;
814 fp
->sf_timer_running
= 0;
816 if (fp
->sf_rc_timer_mp
!= NULL
) {
817 sctp_timer_free(fp
->sf_rc_timer_mp
);
818 fp
->sf_rc_timer_mp
= NULL
;
819 fp
->sf_rc_timer_running
= 0;
821 if (fp
->sf_ixa
!= NULL
) {
822 ixa_refrele(fp
->sf_ixa
);
826 if (fp
== sctp
->sctp_faddrs
) {
830 for (fpp
= sctp
->sctp_faddrs
; fpp
->sf_next
!= fp
; fpp
= fpp
->sf_next
)
834 ASSERT(sctp
->sctp_conn_tfp
!= NULL
);
835 mutex_enter(&sctp
->sctp_conn_tfp
->tf_lock
);
836 if (fp
== sctp
->sctp_faddrs
) {
837 sctp
->sctp_faddrs
= fp
->sf_next
;
839 fpp
->sf_next
= fp
->sf_next
;
841 mutex_exit(&sctp
->sctp_conn_tfp
->tf_lock
);
842 kmem_cache_free(sctp_kmem_faddr_cache
, fp
);
843 sctp
->sctp_nfaddrs
--;
847 sctp_zap_faddrs(sctp_t
*sctp
, int caller_holds_lock
)
849 sctp_faddr_t
*fp
, *fpn
;
851 if (sctp
->sctp_faddrs
== NULL
) {
852 ASSERT(sctp
->sctp_lastfaddr
== NULL
);
856 ASSERT(sctp
->sctp_lastfaddr
!= NULL
);
857 sctp
->sctp_lastfaddr
= NULL
;
858 sctp
->sctp_current
= NULL
;
859 sctp
->sctp_primary
= NULL
;
861 sctp_free_faddr_timers(sctp
);
863 if (sctp
->sctp_conn_tfp
!= NULL
&& !caller_holds_lock
) {
864 /* in conn fanout; need to hold lock */
865 mutex_enter(&sctp
->sctp_conn_tfp
->tf_lock
);
868 for (fp
= sctp
->sctp_faddrs
; fp
; fp
= fpn
) {
870 if (fp
->sf_ixa
!= NULL
) {
871 ixa_refrele(fp
->sf_ixa
);
874 kmem_cache_free(sctp_kmem_faddr_cache
, fp
);
875 sctp
->sctp_nfaddrs
--;
878 sctp
->sctp_faddrs
= NULL
;
879 ASSERT(sctp
->sctp_nfaddrs
== 0);
880 if (sctp
->sctp_conn_tfp
!= NULL
&& !caller_holds_lock
) {
881 mutex_exit(&sctp
->sctp_conn_tfp
->tf_lock
);
887 sctp_zap_addrs(sctp_t
*sctp
)
889 sctp_zap_faddrs(sctp
, 0);
890 sctp_free_saddrs(sctp
);
894 * Build two SCTP header templates; one for IPv4 and one for IPv6.
895 * Store them in sctp_iphc and sctp_iphc6 respectively (and related fields).
896 * There are no IP addresses in the templates, but the port numbers and
897 * verifier are field in from the conn_t and sctp_t.
899 * Returns failure if can't allocate memory, or if there is a problem
900 * with a routing header/option.
902 * We allocate space for the minimum sctp header (sctp_hdr_t).
904 * We massage an routing option/header. There is no checksum implication
905 * for a routing header for sctp.
907 * Caller needs to update conn_wroff if desired.
909 * TSol notes: This assumes that a SCTP association has a single peer label
910 * since we only track a single pair of ipp_label_v4/v6 and not a separate one
914 sctp_build_hdrs(sctp_t
*sctp
, int sleep
)
916 conn_t
*connp
= sctp
->sctp_connp
;
917 ip_pkt_t
*ipp
= &connp
->conn_xmit_ipp
;
918 uint_t ip_hdr_length
;
921 uint_t ulp_hdr_length
= sizeof (sctp_hdr_t
);
925 in6_addr_t v6src
, v6dst
;
926 ipaddr_t v4src
, v4dst
;
928 v4src
= connp
->conn_saddr_v4
;
929 v4dst
= connp
->conn_faddr_v4
;
930 v6src
= connp
->conn_saddr_v6
;
931 v6dst
= connp
->conn_faddr_v6
;
933 /* First do IPv4 header */
934 ip_hdr_length
= ip_total_hdrs_len_v4(ipp
);
936 /* In case of TX label and IP options it can be too much */
937 if (ip_hdr_length
> IP_MAX_HDR_LENGTH
) {
938 /* Preserves existing TX errno for this */
939 return (EHOSTUNREACH
);
941 hdrs_len
= ip_hdr_length
+ ulp_hdr_length
;
942 ASSERT(hdrs_len
!= 0);
944 if (hdrs_len
!= sctp
->sctp_iphc_len
) {
945 /* Allocate new before we free any old */
946 hdrs
= kmem_alloc(hdrs_len
, sleep
);
950 if (sctp
->sctp_iphc
!= NULL
)
951 kmem_free(sctp
->sctp_iphc
, sctp
->sctp_iphc_len
);
952 sctp
->sctp_iphc
= hdrs
;
953 sctp
->sctp_iphc_len
= hdrs_len
;
955 hdrs
= sctp
->sctp_iphc
;
957 sctp
->sctp_hdr_len
= sctp
->sctp_iphc_len
;
958 sctp
->sctp_ip_hdr_len
= ip_hdr_length
;
960 sctph
= (sctp_hdr_t
*)(hdrs
+ ip_hdr_length
);
961 sctp
->sctp_sctph
= sctph
;
962 sctph
->sh_sport
= connp
->conn_lport
;
963 sctph
->sh_dport
= connp
->conn_fport
;
964 sctph
->sh_verf
= sctp
->sctp_fvtag
;
965 sctph
->sh_chksum
= 0;
967 ipha
= (ipha_t
*)hdrs
;
968 sctp
->sctp_ipha
= ipha
;
970 ipha
->ipha_src
= v4src
;
971 ipha
->ipha_dst
= v4dst
;
972 ip_build_hdrs_v4(hdrs
, ip_hdr_length
, ipp
, connp
->conn_proto
);
973 ipha
->ipha_length
= htons(hdrs_len
);
974 ipha
->ipha_fragment_offset_and_flags
= 0;
976 if (ipp
->ipp_fields
& IPPF_IPV4_OPTIONS
)
977 (void) ip_massage_options(ipha
, connp
->conn_netstack
);
980 ip_hdr_length
= ip_total_hdrs_len_v6(ipp
);
981 hdrs_len
= ip_hdr_length
+ ulp_hdr_length
;
982 ASSERT(hdrs_len
!= 0);
984 if (hdrs_len
!= sctp
->sctp_iphc6_len
) {
985 /* Allocate new before we free any old */
986 hdrs
= kmem_alloc(hdrs_len
, sleep
);
990 if (sctp
->sctp_iphc6
!= NULL
)
991 kmem_free(sctp
->sctp_iphc6
, sctp
->sctp_iphc6_len
);
992 sctp
->sctp_iphc6
= hdrs
;
993 sctp
->sctp_iphc6_len
= hdrs_len
;
995 hdrs
= sctp
->sctp_iphc6
;
997 sctp
->sctp_hdr6_len
= sctp
->sctp_iphc6_len
;
998 sctp
->sctp_ip_hdr6_len
= ip_hdr_length
;
1000 sctph
= (sctp_hdr_t
*)(hdrs
+ ip_hdr_length
);
1001 sctp
->sctp_sctph6
= sctph
;
1002 sctph
->sh_sport
= connp
->conn_lport
;
1003 sctph
->sh_dport
= connp
->conn_fport
;
1004 sctph
->sh_verf
= sctp
->sctp_fvtag
;
1005 sctph
->sh_chksum
= 0;
1007 ip6h
= (ip6_t
*)hdrs
;
1008 sctp
->sctp_ip6h
= ip6h
;
1010 ip6h
->ip6_src
= v6src
;
1011 ip6h
->ip6_dst
= v6dst
;
1012 ip_build_hdrs_v6(hdrs
, ip_hdr_length
, ipp
, connp
->conn_proto
,
1013 connp
->conn_flowinfo
);
1014 ip6h
->ip6_plen
= htons(hdrs_len
- IPV6_HDR_LEN
);
1016 if (ipp
->ipp_fields
& IPPF_RTHDR
) {
1020 end
= (uint8_t *)ip6h
+ ip_hdr_length
;
1021 rth
= ip_find_rthdr_v6(ip6h
, end
);
1023 (void) ip_massage_options_v6(ip6h
, rth
,
1024 connp
->conn_netstack
);
1028 * Verify that the first hop isn't a mapped address.
1029 * Routers along the path need to do this verification
1030 * for subsequent hops.
1032 if (IN6_IS_ADDR_V4MAPPED(&ip6h
->ip6_dst
))
1033 return (EADDRNOTAVAIL
);
1039 sctp_v4_label(sctp_t
*sctp
, sctp_faddr_t
*fp
)
1041 conn_t
*connp
= sctp
->sctp_connp
;
1043 ASSERT(fp
->sf_ixa
->ixa_flags
& IXAF_IS_IPV4
);
1044 return (conn_update_label(connp
, fp
->sf_ixa
, &fp
->sf_faddr
,
1045 &connp
->conn_xmit_ipp
));
1049 sctp_v6_label(sctp_t
*sctp
, sctp_faddr_t
*fp
)
1051 conn_t
*connp
= sctp
->sctp_connp
;
1053 ASSERT(!(fp
->sf_ixa
->ixa_flags
& IXAF_IS_IPV4
));
1054 return (conn_update_label(connp
, fp
->sf_ixa
, &fp
->sf_faddr
,
1055 &connp
->conn_xmit_ipp
));
1059 * XXX implement more sophisticated logic
1061 * Tsol note: We have already verified the addresses using tsol_check_dest
1062 * in sctp_add_faddr, thus no need to redo that here.
1063 * We do setup ipp_label_v4 and ipp_label_v6 based on which addresses
1067 sctp_set_hdraddrs(sctp_t
*sctp
)
1072 conn_t
*connp
= sctp
->sctp_connp
;
1074 ASSERT(sctp
->sctp_faddrs
!= NULL
);
1075 ASSERT(sctp
->sctp_nsaddrs
> 0);
1077 /* Set up using the primary first */
1078 connp
->conn_faddr_v6
= sctp
->sctp_primary
->sf_faddr
;
1079 /* saddr may be unspec; make_mp() will handle this */
1080 connp
->conn_saddr_v6
= sctp
->sctp_primary
->sf_saddr
;
1081 connp
->conn_laddr_v6
= connp
->conn_saddr_v6
;
1082 if (IN6_IS_ADDR_V4MAPPED(&sctp
->sctp_primary
->sf_faddr
)) {
1083 if (!is_system_labeled() ||
1084 sctp_v4_label(sctp
, sctp
->sctp_primary
) == 0) {
1086 if (connp
->conn_family
== AF_INET
) {
1091 if (!is_system_labeled() ||
1092 sctp_v6_label(sctp
, sctp
->sctp_primary
) == 0) {
1097 for (fp
= sctp
->sctp_faddrs
; fp
; fp
= fp
->sf_next
) {
1098 if (!gotv4
&& IN6_IS_ADDR_V4MAPPED(&fp
->sf_faddr
)) {
1099 if (!is_system_labeled() ||
1100 sctp_v4_label(sctp
, fp
) == 0) {
1102 if (connp
->conn_family
== AF_INET
|| gotv6
) {
1106 } else if (!gotv6
&& !IN6_IS_ADDR_V4MAPPED(&fp
->sf_faddr
)) {
1107 if (!is_system_labeled() ||
1108 sctp_v6_label(sctp
, fp
) == 0) {
1117 if (!gotv4
&& !gotv6
)
1124 * got_errchunk is set B_TRUE only if called from validate_init_params(), when
1125 * an ERROR chunk is already prepended the size of which needs updating for
1126 * additional unrecognized parameters. Other callers either prepend the ERROR
1127 * chunk with the correct size after calling this function, or they are calling
1128 * to add an invalid parameter to an INIT_ACK chunk, in that case no ERROR chunk
1129 * exists, the CAUSE blocks go into the INIT_ACK directly.
1131 * *errmp will be non-NULL both when adding an additional CAUSE block to an
1132 * existing prepended COOKIE ERROR chunk (processing params of an INIT_ACK),
1133 * and when adding unrecognized parameters after the first, to an INIT_ACK
1134 * (processing params of an INIT chunk).
1137 sctp_add_unrec_parm(sctp_parm_hdr_t
*uph
, mblk_t
**errmp
,
1138 boolean_t got_errchunk
)
1141 sctp_parm_hdr_t
*ph
;
1144 sctp_chunk_hdr_t
*ecp
;
1146 len
= sizeof (*ph
) + ntohs(uph
->sph_len
);
1147 if ((pad
= len
% SCTP_ALIGN
) != 0) {
1148 pad
= SCTP_ALIGN
- pad
;
1151 mp
= allocb(len
, BPRI_MED
);
1156 ph
= (sctp_parm_hdr_t
*)(mp
->b_rptr
);
1157 ph
->sph_type
= htons(PARM_UNRECOGNIZED
);
1158 ph
->sph_len
= htons(len
- pad
);
1160 /* copy in the unrecognized parameter */
1161 bcopy(uph
, ph
+ 1, ntohs(uph
->sph_len
));
1164 bzero((mp
->b_rptr
+ len
- pad
), pad
);
1166 mp
->b_wptr
= mp
->b_rptr
+ len
;
1167 if (*errmp
!= NULL
) {
1169 * Update total length if an ERROR chunk, then link
1170 * this CAUSE block to the possible chain of CAUSE
1171 * blocks attached to the ERROR chunk or INIT_ACK
1175 /* ERROR chunk already prepended */
1176 ecp
= (sctp_chunk_hdr_t
*)((*errmp
)->b_rptr
);
1177 ecp
->sch_len
= htons(ntohs(ecp
->sch_len
) + len
);
1187 * o Updates remaining
1188 * o Checks alignment
1191 sctp_next_parm(sctp_parm_hdr_t
*current
, ssize_t
*remaining
)
1196 len
= ntohs(current
->sph_len
);
1198 if (*remaining
< sizeof (*current
) || len
< sizeof (*current
)) {
1201 if ((pad
= len
& (SCTP_ALIGN
- 1)) != 0) {
1202 pad
= SCTP_ALIGN
- pad
;
1205 /*LINTED pointer cast may result in improper alignment*/
1206 current
= (sctp_parm_hdr_t
*)((char *)current
+ len
+ pad
);
1211 * Sets the address parameters given in the INIT chunk into sctp's
1212 * faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are
1213 * no address parameters in the INIT chunk, a single faddr is created
1214 * from the ip hdr at the beginning of pkt.
1215 * If there already are existing addresses hanging from sctp, merge
1216 * them in, if the old info contains addresses which are not present
1217 * in this new info, get rid of them, and clean the pointers if there's
1218 * messages which have this as their target address.
1220 * We also re-adjust the source address list here since the list may
1221 * contain more than what is actually part of the association. If
1222 * we get here from sctp_send_cookie_echo(), we are on the active
1223 * side and psctp will be NULL and ich will be the INIT-ACK chunk.
1224 * If we get here from sctp_accept_comm(), ich will be the INIT chunk
1225 * and psctp will the listening endpoint.
1227 * INIT processing: When processing the INIT we inherit the src address
1228 * list from the listener. For a loopback or linklocal association, we
1229 * delete the list and just take the address from the IP header (since
1230 * that's how we created the INIT-ACK). Additionally, for loopback we
1231 * ignore the address params in the INIT. For determining which address
1232 * types were sent in the INIT-ACK we follow the same logic as in
1233 * creating the INIT-ACK. We delete addresses of the type that are not
1234 * supported by the peer.
1236 * INIT-ACK processing: When processing the INIT-ACK since we had not
1237 * included addr params for loopback or linklocal addresses when creating
1238 * the INIT, we just use the address from the IP header. Further, for
1239 * loopback we ignore the addr param list. We mark addresses of the
1240 * type not supported by the peer as unconfirmed.
1242 * In case of INIT processing we look for supported address types in the
1243 * supported address param, if present. In both cases the address type in
1244 * the IP header is supported as well as types for addresses in the param
1247 * Once we have the supported address types sctp_check_saddr() runs through
1248 * the source address list and deletes or marks as unconfirmed address of
1249 * types not supported by the peer.
1251 * Returns 0 on success, sys errno on failure
1254 sctp_get_addrparams(sctp_t
*sctp
, sctp_t
*psctp
, mblk_t
*pkt
,
1255 sctp_chunk_hdr_t
*ich
, uint_t
*sctp_options
)
1257 sctp_init_chunk_t
*init
;
1260 in6_addr_t hdrsaddr
[1];
1261 in6_addr_t hdrdaddr
[1];
1262 sctp_parm_hdr_t
*ph
;
1268 boolean_t check_saddr
= B_TRUE
;
1270 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
1271 conn_t
*connp
= sctp
->sctp_connp
;
1273 if (sctp_options
!= NULL
)
1276 /* extract the address from the IP header */
1277 isv4
= (IPH_HDR_VERSION(pkt
->b_rptr
) == IPV4_VERSION
);
1279 iph
= (ipha_t
*)pkt
->b_rptr
;
1280 IN6_IPADDR_TO_V4MAPPED(iph
->ipha_src
, hdrsaddr
);
1281 IN6_IPADDR_TO_V4MAPPED(iph
->ipha_dst
, hdrdaddr
);
1282 supp_af
|= PARM_SUPP_V4
;
1284 ip6h
= (ip6_t
*)pkt
->b_rptr
;
1285 hdrsaddr
[0] = ip6h
->ip6_src
;
1286 hdrdaddr
[0] = ip6h
->ip6_dst
;
1287 supp_af
|= PARM_SUPP_V6
;
1291 * Unfortunately, we can't delay this because adding an faddr
1292 * looks for the presence of the source address (from the ire
1293 * for the faddr) in the source address list. We could have
1294 * delayed this if, say, this was a loopback/linklocal connection.
1295 * Now, we just end up nuking this list and taking the addr from
1296 * the IP header for loopback/linklocal.
1298 if (psctp
!= NULL
&& psctp
->sctp_nsaddrs
> 0) {
1299 ASSERT(sctp
->sctp_nsaddrs
== 0);
1301 err
= sctp_dup_saddrs(psctp
, sctp
, KM_NOSLEEP
);
1306 * We will add the faddr before parsing the address list as this
1307 * might be a loopback connection and we would not have to
1308 * go through the list.
1310 * Make sure the header's addr is in the list
1312 fp
= sctp_lookup_faddr(sctp
, hdrsaddr
);
1314 /* not included; add it now */
1315 err
= sctp_add_faddr(sctp
, hdrsaddr
, KM_NOSLEEP
, B_TRUE
);
1319 /* sctp_faddrs will be the hdr addr */
1320 fp
= sctp
->sctp_faddrs
;
1322 /* make the header addr the primary */
1324 if (cl_sctp_assoc_change
!= NULL
&& psctp
== NULL
)
1325 curaddr
= sctp
->sctp_current
->sf_faddr
;
1327 sctp
->sctp_primary
= fp
;
1328 sctp
->sctp_current
= fp
;
1329 sctp
->sctp_mss
= fp
->sf_pmss
;
1331 /* For loopback connections & linklocal get address from the header */
1332 if (sctp
->sctp_loopback
|| sctp
->sctp_linklocal
) {
1333 if (sctp
->sctp_nsaddrs
!= 0)
1334 sctp_free_saddrs(sctp
);
1335 if ((err
= sctp_saddr_add_addr(sctp
, hdrdaddr
, 0)) != 0)
1337 /* For loopback ignore address list */
1338 if (sctp
->sctp_loopback
)
1340 check_saddr
= B_FALSE
;
1343 /* Walk the params in the INIT [ACK], pulling out addr params */
1344 remaining
= ntohs(ich
->sch_len
) - sizeof (*ich
) -
1345 sizeof (sctp_init_chunk_t
);
1346 if (remaining
< sizeof (*ph
)) {
1348 sctp_check_saddr(sctp
, supp_af
, psctp
== NULL
?
1349 B_FALSE
: B_TRUE
, hdrdaddr
);
1351 ASSERT(sctp_saddr_lookup(sctp
, hdrdaddr
, 0) != NULL
);
1355 init
= (sctp_init_chunk_t
*)(ich
+ 1);
1356 ph
= (sctp_parm_hdr_t
*)(init
+ 1);
1358 /* params will have already been byteordered when validating */
1359 while (ph
!= NULL
) {
1360 if (ph
->sph_type
== htons(PARM_SUPP_ADDRS
)) {
1365 ASSERT(psctp
!= NULL
);
1366 plen
= ntohs(ph
->sph_len
);
1367 p
= (uint16_t *)(ph
+ 1);
1369 addrtype
= ntohs(*p
);
1372 supp_af
|= PARM_SUPP_V6
;
1375 supp_af
|= PARM_SUPP_V4
;
1381 plen
-= sizeof (*p
);
1383 } else if (ph
->sph_type
== htons(PARM_ADDR4
)) {
1384 if (remaining
>= PARM_ADDR4_LEN
) {
1388 supp_af
|= PARM_SUPP_V4
;
1390 * Screen out broad/multicasts & loopback.
1391 * If the endpoint only accepts v6 address,
1392 * go to the next one.
1394 * Subnet broadcast check is done in
1395 * sctp_add_faddr(). If the address is
1396 * a broadcast address, it won't be added.
1398 bcopy(ph
+ 1, &ta
, sizeof (ta
));
1400 ta
== INADDR_BROADCAST
||
1401 ta
== htonl(INADDR_LOOPBACK
) ||
1402 CLASSD(ta
) || connp
->conn_ipv6_v6only
) {
1405 IN6_INADDR_TO_V4MAPPED((struct in_addr
*)
1408 /* Check for duplicate. */
1409 if (sctp_lookup_faddr(sctp
, &addr
) != NULL
)
1412 /* OK, add it to the faddr set */
1413 err
= sctp_add_faddr(sctp
, &addr
, KM_NOSLEEP
,
1415 /* Something is wrong... Try the next one. */
1419 } else if (ph
->sph_type
== htons(PARM_ADDR6
) &&
1420 connp
->conn_family
== AF_INET6
) {
1421 /* An v4 socket should not take v6 addresses. */
1422 if (remaining
>= PARM_ADDR6_LEN
) {
1425 supp_af
|= PARM_SUPP_V6
;
1426 addr6
= (in6_addr_t
*)(ph
+ 1);
1428 * Screen out link locals, mcast, loopback
1429 * and bogus v6 address.
1431 if (IN6_IS_ADDR_LINKLOCAL(addr6
) ||
1432 IN6_IS_ADDR_MULTICAST(addr6
) ||
1433 IN6_IS_ADDR_LOOPBACK(addr6
) ||
1434 IN6_IS_ADDR_V4MAPPED(addr6
)) {
1437 /* Check for duplicate. */
1438 if (sctp_lookup_faddr(sctp
, addr6
) != NULL
)
1441 err
= sctp_add_faddr(sctp
,
1442 (in6_addr_t
*)(ph
+ 1), KM_NOSLEEP
,
1444 /* Something is wrong... Try the next one. */
1448 } else if (ph
->sph_type
== htons(PARM_FORWARD_TSN
)) {
1449 if (sctp_options
!= NULL
)
1450 *sctp_options
|= SCTP_PRSCTP_OPTION
;
1454 ph
= sctp_next_parm(ph
, &remaining
);
1457 sctp_check_saddr(sctp
, supp_af
, psctp
== NULL
? B_FALSE
:
1460 ASSERT(sctp_saddr_lookup(sctp
, hdrdaddr
, 0) != NULL
);
1462 * We have the right address list now, update clustering's
1463 * knowledge because when we sent the INIT we had just added
1464 * the address the INIT was sent to.
1466 if (psctp
== NULL
&& cl_sctp_assoc_change
!= NULL
) {
1472 asize
= sizeof (in6_addr_t
) * sctp
->sctp_nfaddrs
;
1473 alist
= kmem_alloc(asize
, KM_NOSLEEP
);
1474 if (alist
== NULL
) {
1475 SCTP_KSTAT(sctps
, sctp_cl_assoc_change
);
1479 * Just include the address the INIT was sent to in the
1480 * delete list and send the entire faddr list. We could
1481 * do it differently (i.e include all the addresses in the
1482 * add list even if it contains the original address OR
1483 * remove the original address from the add list etc.), but
1484 * this seems reasonable enough.
1486 dsize
= sizeof (in6_addr_t
);
1487 dlist
= kmem_alloc(dsize
, KM_NOSLEEP
);
1488 if (dlist
== NULL
) {
1489 kmem_free(alist
, asize
);
1490 SCTP_KSTAT(sctps
, sctp_cl_assoc_change
);
1493 bcopy(&curaddr
, dlist
, sizeof (curaddr
));
1494 sctp_get_faddr_list(sctp
, alist
, asize
);
1495 (*cl_sctp_assoc_change
)(connp
->conn_family
, alist
, asize
,
1496 sctp
->sctp_nfaddrs
, dlist
, dsize
, 1, SCTP_CL_PADDR
,
1497 (cl_sctp_handle_t
)sctp
);
1498 /* alist and dlist will be freed by the clustering module */
1504 * Returns 0 if the check failed and the restart should be refused,
1505 * 1 if the check succeeded.
1508 sctp_secure_restart_check(mblk_t
*pkt
, sctp_chunk_hdr_t
*ich
, uint32_t ports
,
1509 int sleep
, sctp_stack_t
*sctps
, ip_recv_attr_t
*ira
)
1511 sctp_faddr_t
*fp
, *fphead
= NULL
;
1512 sctp_parm_hdr_t
*ph
;
1517 in6_addr_t hdraddr
[1];
1522 sctp_init_chunk_t
*init
;
1525 /* extract the address from the IP header */
1526 isv4
= (IPH_HDR_VERSION(pkt
->b_rptr
) == IPV4_VERSION
);
1528 iph
= (ipha_t
*)pkt
->b_rptr
;
1529 IN6_IPADDR_TO_V4MAPPED(iph
->ipha_src
, hdraddr
);
1531 ip6h
= (ip6_t
*)pkt
->b_rptr
;
1532 hdraddr
[0] = ip6h
->ip6_src
;
1535 /* Walk the params in the INIT [ACK], pulling out addr params */
1536 remaining
= ntohs(ich
->sch_len
) - sizeof (*ich
) -
1537 sizeof (sctp_init_chunk_t
);
1538 if (remaining
< sizeof (*ph
)) {
1539 /* no parameters; restart OK */
1542 init
= (sctp_init_chunk_t
*)(ich
+ 1);
1543 ph
= (sctp_parm_hdr_t
*)(init
+ 1);
1545 while (ph
!= NULL
) {
1546 sctp_faddr_t
*fpa
= NULL
;
1548 /* params will have already been byteordered when validating */
1549 if (ph
->sph_type
== htons(PARM_ADDR4
)) {
1550 if (remaining
>= PARM_ADDR4_LEN
) {
1552 IN6_INADDR_TO_V4MAPPED((struct in_addr
*)
1554 fpa
= kmem_cache_alloc(sctp_kmem_faddr_cache
,
1559 bzero(fpa
, sizeof (*fpa
));
1560 fpa
->sf_faddr
= addr
;
1561 fpa
->sf_next
= NULL
;
1563 } else if (ph
->sph_type
== htons(PARM_ADDR6
)) {
1564 if (remaining
>= PARM_ADDR6_LEN
) {
1565 fpa
= kmem_cache_alloc(sctp_kmem_faddr_cache
,
1570 bzero(fpa
, sizeof (*fpa
));
1571 bcopy(ph
+ 1, &fpa
->sf_faddr
,
1572 sizeof (fpa
->sf_faddr
));
1573 fpa
->sf_next
= NULL
;
1576 /* link in the new addr, if it was an addr param */
1578 if (fphead
== NULL
) {
1581 fpa
->sf_next
= fphead
;
1586 ph
= sctp_next_parm(ph
, &remaining
);
1589 if (fphead
== NULL
) {
1590 /* no addr parameters; restart OK */
1595 * got at least one; make sure the header's addr is
1598 fp
= sctp_lookup_faddr_nosctp(fphead
, hdraddr
);
1600 /* not included; add it now */
1601 fp
= kmem_cache_alloc(sctp_kmem_faddr_cache
, sleep
);
1605 bzero(fp
, sizeof (*fp
));
1606 fp
->sf_faddr
= *hdraddr
;
1607 fp
->sf_next
= fphead
;
1612 * Now, we can finally do the check: For each sctp instance
1613 * on the hash line for ports, compare its faddr set against
1614 * the new one. If the new one is a strict subset of any
1615 * existing sctp's faddrs, the restart is OK. However, if there
1616 * is an overlap, this could be an attack, so return failure.
1617 * If all sctp's faddrs are disjoint, this is a legitimate new
1620 tf
= &(sctps
->sctps_conn_fanout
[SCTP_CONN_HASH(sctps
, ports
)]);
1621 mutex_enter(&tf
->tf_lock
);
1623 for (sctp
= tf
->tf_sctp
; sctp
; sctp
= sctp
->sctp_conn_hash_next
) {
1624 if (ports
!= sctp
->sctp_connp
->conn_ports
) {
1627 compres
= sctp_compare_faddrsets(fphead
, sctp
->sctp_faddrs
);
1628 if (compres
<= SCTP_ADDR_SUBSET
) {
1630 mutex_exit(&tf
->tf_lock
);
1633 if (compres
== SCTP_ADDR_OVERLAP
) {
1635 ("new assoc from %x:%x:%x:%x overlaps with %p\n",
1636 SCTP_PRINTADDR(*hdraddr
), (void *)sctp
));
1638 * While we still hold the lock, we need to
1639 * figure out which addresses have been
1640 * added so we can include them in the abort
1641 * we will send back. Since these faddrs will
1642 * never be used, we overload the rto field
1643 * here, setting it to 0 if the address was
1644 * not added, 1 if it was added.
1646 for (fp
= fphead
; fp
; fp
= fp
->sf_next
) {
1647 if (sctp_lookup_faddr(sctp
, &fp
->sf_faddr
)) {
1654 mutex_exit(&tf
->tf_lock
);
1658 mutex_exit(&tf
->tf_lock
);
1660 /* All faddrs are disjoint; legit new association */
1664 /* If are attempted adds, send back an abort listing the addrs */
1669 dtail
= kmem_alloc(PARM_ADDR6_LEN
* nadded
, KM_NOSLEEP
);
1670 if (dtail
== NULL
) {
1676 for (fp
= fphead
; fp
; fp
= fp
->sf_next
) {
1677 if (fp
->sf_rto
== 0) {
1680 if (IN6_IS_ADDR_V4MAPPED(&fp
->sf_faddr
)) {
1683 ph
->sph_type
= htons(PARM_ADDR4
);
1684 ph
->sph_len
= htons(PARM_ADDR4_LEN
);
1685 IN6_V4MAPPED_TO_IPADDR(&fp
->sf_faddr
, addr4
);
1687 bcopy(&addr4
, ph
, sizeof (addr4
));
1688 ph
= (sctp_parm_hdr_t
*)
1689 ((char *)ph
+ sizeof (addr4
));
1690 dlen
+= PARM_ADDR4_LEN
;
1692 ph
->sph_type
= htons(PARM_ADDR6
);
1693 ph
->sph_len
= htons(PARM_ADDR6_LEN
);
1695 bcopy(&fp
->sf_faddr
, ph
, sizeof (fp
->sf_faddr
));
1696 ph
= (sctp_parm_hdr_t
*)
1697 ((char *)ph
+ sizeof (fp
->sf_faddr
));
1698 dlen
+= PARM_ADDR6_LEN
;
1702 /* Send off the abort */
1703 sctp_send_abort(sctp
, sctp_init2vtag(ich
),
1704 SCTP_ERR_RESTART_NEW_ADDRS
, dtail
, dlen
, pkt
, 0, B_TRUE
,
1707 kmem_free(dtail
, PARM_ADDR6_LEN
* nadded
);
1714 for (fp
= fphead
; fp
; fp
= fpn
) {
1716 if (fp
->sf_ixa
!= NULL
) {
1717 ixa_refrele(fp
->sf_ixa
);
1720 kmem_cache_free(sctp_kmem_faddr_cache
, fp
);
1728 * Reset any state related to transmitted chunks.
1731 sctp_congest_reset(sctp_t
*sctp
)
1734 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
1737 for (fp
= sctp
->sctp_faddrs
; fp
!= NULL
; fp
= fp
->sf_next
) {
1738 fp
->sf_ssthresh
= sctps
->sctps_initial_mtu
;
1739 SET_CWND(fp
, fp
->sf_pmss
, sctps
->sctps_slow_start_initial
);
1744 * Clean up the transmit list as well since we have reset accounting
1745 * on all the fps. Send event upstream, if required.
1747 while ((mp
= sctp
->sctp_xmit_head
) != NULL
) {
1748 sctp
->sctp_xmit_head
= mp
->b_next
;
1750 if (sctp
->sctp_xmit_head
!= NULL
)
1751 sctp
->sctp_xmit_head
->b_prev
= NULL
;
1752 sctp_sendfail_event(sctp
, mp
, 0, B_TRUE
);
1754 sctp
->sctp_xmit_head
= NULL
;
1755 sctp
->sctp_xmit_tail
= NULL
;
1756 sctp
->sctp_xmit_unacked
= NULL
;
1758 sctp
->sctp_unacked
= 0;
1760 * Any control message as well. We will clean-up this list as well.
1761 * This contains any pending ASCONF request that we have queued/sent.
1762 * If we do get an ACK we will just drop it. However, given that
1763 * we are restarting chances are we aren't going to get any.
1765 if (sctp
->sctp_cxmit_list
!= NULL
)
1766 sctp_asconf_free_cxmit(sctp
, NULL
);
1767 sctp
->sctp_cxmit_list
= NULL
;
1768 sctp
->sctp_cchunk_pend
= 0;
1770 sctp
->sctp_rexmitting
= B_FALSE
;
1771 sctp
->sctp_rxt_nxttsn
= 0;
1772 sctp
->sctp_rxt_maxtsn
= 0;
1774 sctp
->sctp_zero_win_probe
= B_FALSE
;
1778 sctp_init_faddr(sctp_t
*sctp
, sctp_faddr_t
*fp
, in6_addr_t
*addr
,
1781 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
1783 ASSERT(fp
->sf_ixa
!= NULL
);
1785 bcopy(addr
, &fp
->sf_faddr
, sizeof (*addr
));
1786 if (IN6_IS_ADDR_V4MAPPED(addr
)) {
1788 /* Make sure that sf_pmss is a multiple of SCTP_ALIGN. */
1790 (sctps
->sctps_initial_mtu
- sctp
->sctp_hdr_len
) &
1792 fp
->sf_ixa
->ixa_flags
|= IXAF_IS_IPV4
;
1796 (sctps
->sctps_initial_mtu
- sctp
->sctp_hdr6_len
) &
1798 fp
->sf_ixa
->ixa_flags
&= ~IXAF_IS_IPV4
;
1800 fp
->sf_cwnd
= sctps
->sctps_slow_start_initial
* fp
->sf_pmss
;
1801 fp
->sf_rto
= MIN(sctp
->sctp_rto_initial
, sctp
->sctp_rto_max_init
);
1802 SCTP_MAX_RTO(sctp
, fp
);
1804 fp
->sf_rtt_updates
= 0;
1806 fp
->sf_max_retr
= sctp
->sctp_pp_max_rxt
;
1807 /* Mark it as not confirmed. */
1808 fp
->sf_state
= SCTP_FADDRS_UNCONFIRMED
;
1809 fp
->sf_hb_interval
= sctp
->sctp_hb_interval
;
1810 fp
->sf_ssthresh
= sctps
->sctps_initial_ssthresh
;
1814 fp
->sf_lastactive
= fp
->sf_hb_expiry
= ddi_get_lbolt64();
1815 fp
->sf_timer_mp
= timer_mp
;
1816 fp
->sf_hb_pending
= B_FALSE
;
1817 fp
->sf_hb_enabled
= B_TRUE
;
1819 fp
->sf_pmtu_discovered
= 0;
1821 fp
->sf_T3expire
= 0;
1822 (void) random_get_pseudo_bytes((uint8_t *)&fp
->sf_hb_secret
,
1823 sizeof (fp
->sf_hb_secret
));
1824 fp
->sf_rxt_unacked
= 0;
1826 sctp_get_dest(sctp
, fp
);
1831 faddr_constructor(void *buf
, void *arg
, int flags
)
1833 sctp_faddr_t
*fp
= buf
;
1835 fp
->sf_timer_mp
= NULL
;
1836 fp
->sf_timer_running
= 0;
1838 fp
->sf_rc_timer_mp
= NULL
;
1839 fp
->sf_rc_timer_running
= 0;
1846 faddr_destructor(void *buf
, void *arg
)
1848 sctp_faddr_t
*fp
= buf
;
1850 ASSERT(fp
->sf_timer_mp
== NULL
);
1851 ASSERT(fp
->sf_timer_running
== 0);
1853 ASSERT(fp
->sf_rc_timer_mp
== NULL
);
1854 ASSERT(fp
->sf_rc_timer_running
== 0);
1858 sctp_faddr_init(void)
1860 sctp_kmem_faddr_cache
= kmem_cache_create("sctp_faddr_cache",
1861 sizeof (sctp_faddr_t
), 0, faddr_constructor
, faddr_destructor
,
1862 NULL
, NULL
, NULL
, 0);
1866 sctp_faddr_fini(void)
1868 kmem_cache_destroy(sctp_kmem_faddr_cache
);