4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright (c) 2013 by Delphix. All rights reserved.
27 * Copyright (c) 2017, Joyent, Inc. All rights reserved.
34 #include <sys/sunddi.h>
35 #include <sys/modctl.h>
37 #include <sys/cpuvar.h>
38 #include <sys/socket.h>
39 #include <sys/strsubr.h>
40 #include <sys/sysmacros.h>
42 #include <netinet/tcp.h>
44 #include <sys/socketvar.h>
45 #include <sys/pathname.h>
46 #include <sys/fs/snode.h>
47 #include <sys/fs/dv_node.h>
48 #include <sys/vnode.h>
49 #include <netinet/in.h>
51 #include <sys/sockio.h>
52 #include <sys/ksocket.h>
53 #include <sys/filio.h> /* FIONBIO */
54 #include <sys/iscsi_protocol.h>
55 #include <sys/idm/idm.h>
56 #include <sys/idm/idm_so.h>
57 #include <sys/idm/idm_text.h>
59 #define IN_PROGRESS_DELAY 1
62 * in6addr_any is currently all zeroes, but use the macro in case this
65 static const struct in6_addr in6addr_any
= IN6ADDR_ANY_INIT
;
67 static void idm_sorx_cache_pdu_cb(idm_pdu_t
*pdu
, idm_status_t status
);
68 static void idm_sorx_addl_pdu_cb(idm_pdu_t
*pdu
, idm_status_t status
);
69 static void idm_sotx_cache_pdu_cb(idm_pdu_t
*pdu
, idm_status_t status
);
71 static idm_status_t
idm_so_conn_create_common(idm_conn_t
*ic
, ksocket_t new_so
);
72 static void idm_so_conn_destroy_common(idm_conn_t
*ic
);
73 static void idm_so_conn_connect_common(idm_conn_t
*ic
);
75 static void idm_set_ini_preconnect_options(idm_so_conn_t
*sc
,
77 static void idm_set_postconnect_options(ksocket_t so
);
78 static idm_status_t
idm_i_so_tx(idm_pdu_t
*pdu
);
80 static idm_status_t
idm_sorecvdata(idm_conn_t
*ic
, idm_pdu_t
*pdu
);
81 static void idm_so_send_rtt_data(idm_conn_t
*ic
, idm_task_t
*idt
,
82 idm_buf_t
*idb
, uint32_t offset
, uint32_t length
);
83 static void idm_so_send_rtt_data_done(idm_task_t
*idt
, idm_buf_t
*idb
);
84 static idm_status_t
idm_so_send_buf_region(idm_task_t
*idt
,
85 idm_buf_t
*idb
, uint32_t buf_region_offset
, uint32_t buf_region_length
);
87 static uint32_t idm_fill_iov(idm_pdu_t
*pdu
, idm_buf_t
*idb
,
88 uint32_t ro
, uint32_t dlength
);
90 static idm_status_t
idm_so_handle_digest(idm_conn_t
*it
,
91 nvpair_t
*digest_choice
, const idm_kv_xlate_t
*ikvx
);
93 static void idm_so_socket_set_nonblock(struct sonode
*node
);
94 static void idm_so_socket_set_block(struct sonode
*node
);
97 * Transport ops prototypes
99 static void idm_so_tx(idm_conn_t
*ic
, idm_pdu_t
*pdu
);
100 static idm_status_t
idm_so_buf_tx_to_ini(idm_task_t
*idt
, idm_buf_t
*idb
);
101 static idm_status_t
idm_so_buf_rx_from_ini(idm_task_t
*idt
, idm_buf_t
*idb
);
102 static void idm_so_rx_datain(idm_conn_t
*ic
, idm_pdu_t
*pdu
);
103 static void idm_so_rx_rtt(idm_conn_t
*ic
, idm_pdu_t
*pdu
);
104 static void idm_so_rx_dataout(idm_conn_t
*ic
, idm_pdu_t
*pdu
);
105 static idm_status_t
idm_so_free_task_rsrc(idm_task_t
*idt
);
106 static kv_status_t
idm_so_negotiate_key_values(idm_conn_t
*it
,
107 nvlist_t
*request_nvl
, nvlist_t
*response_nvl
, nvlist_t
*negotiated_nvl
);
108 static void idm_so_notice_key_values(idm_conn_t
*it
,
109 nvlist_t
*negotiated_nvl
);
110 static kv_status_t
idm_so_declare_key_values(idm_conn_t
*it
,
111 nvlist_t
*config_nvl
, nvlist_t
*outgoing_nvl
);
112 static boolean_t
idm_so_conn_is_capable(idm_conn_req_t
*ic
,
113 idm_transport_caps_t
*caps
);
114 static idm_status_t
idm_so_buf_alloc(idm_buf_t
*idb
, uint64_t buflen
);
115 static void idm_so_buf_free(idm_buf_t
*idb
);
116 static idm_status_t
idm_so_buf_setup(idm_buf_t
*idb
);
117 static void idm_so_buf_teardown(idm_buf_t
*idb
);
118 static idm_status_t
idm_so_tgt_svc_create(idm_svc_req_t
*sr
, idm_svc_t
*is
);
119 static void idm_so_tgt_svc_destroy(idm_svc_t
*is
);
120 static idm_status_t
idm_so_tgt_svc_online(idm_svc_t
*is
);
121 static void idm_so_tgt_svc_offline(idm_svc_t
*is
);
122 static void idm_so_tgt_conn_destroy(idm_conn_t
*ic
);
123 static idm_status_t
idm_so_tgt_conn_connect(idm_conn_t
*ic
);
124 static void idm_so_conn_disconnect(idm_conn_t
*ic
);
125 static idm_status_t
idm_so_ini_conn_create(idm_conn_req_t
*cr
, idm_conn_t
*ic
);
126 static void idm_so_ini_conn_destroy(idm_conn_t
*ic
);
127 static idm_status_t
idm_so_ini_conn_connect(idm_conn_t
*ic
);
130 * IDM Native Sockets transport operations
133 idm_transport_ops_t idm_so_transport_ops
= {
134 idm_so_tx
, /* it_tx_pdu */
135 idm_so_buf_tx_to_ini
, /* it_buf_tx_to_ini */
136 idm_so_buf_rx_from_ini
, /* it_buf_rx_from_ini */
137 idm_so_rx_datain
, /* it_rx_datain */
138 idm_so_rx_rtt
, /* it_rx_rtt */
139 idm_so_rx_dataout
, /* it_rx_dataout */
140 NULL
, /* it_alloc_conn_rsrc */
141 NULL
, /* it_free_conn_rsrc */
142 NULL
, /* it_tgt_enable_datamover */
143 NULL
, /* it_ini_enable_datamover */
144 NULL
, /* it_conn_terminate */
145 idm_so_free_task_rsrc
, /* it_free_task_rsrc */
146 idm_so_negotiate_key_values
, /* it_negotiate_key_values */
147 idm_so_notice_key_values
, /* it_notice_key_values */
148 idm_so_conn_is_capable
, /* it_conn_is_capable */
149 idm_so_buf_alloc
, /* it_buf_alloc */
150 idm_so_buf_free
, /* it_buf_free */
151 idm_so_buf_setup
, /* it_buf_setup */
152 idm_so_buf_teardown
, /* it_buf_teardown */
153 idm_so_tgt_svc_create
, /* it_tgt_svc_create */
154 idm_so_tgt_svc_destroy
, /* it_tgt_svc_destroy */
155 idm_so_tgt_svc_online
, /* it_tgt_svc_online */
156 idm_so_tgt_svc_offline
, /* it_tgt_svc_offline */
157 idm_so_tgt_conn_destroy
, /* it_tgt_conn_destroy */
158 idm_so_tgt_conn_connect
, /* it_tgt_conn_connect */
159 idm_so_conn_disconnect
, /* it_tgt_conn_disconnect */
160 idm_so_ini_conn_create
, /* it_ini_conn_create */
161 idm_so_ini_conn_destroy
, /* it_ini_conn_destroy */
162 idm_so_ini_conn_connect
, /* it_ini_conn_connect */
163 idm_so_conn_disconnect
, /* it_ini_conn_disconnect */
164 idm_so_declare_key_values
/* it_declare_key_values */
167 kmutex_t idm_so_timed_socket_mutex
;
169 int32_t idm_so_sndbuf
= IDM_SNDBUF_SIZE
;
170 int32_t idm_so_rcvbuf
= IDM_RCVBUF_SIZE
;
174 * Sockets transport initialization
177 idm_so_init(idm_transport_t
*it
)
179 /* Cache for IDM Data and R2T Transmit PDU's */
180 idm
.idm_sotx_pdu_cache
= kmem_cache_create("idm_tx_pdu_cache",
181 sizeof (idm_pdu_t
) + sizeof (iscsi_hdr_t
), 8,
182 &idm_sotx_pdu_constructor
, NULL
, NULL
, NULL
, NULL
, KM_SLEEP
);
184 /* Cache for IDM Receive PDU's */
185 idm
.idm_sorx_pdu_cache
= kmem_cache_create("idm_rx_pdu_cache",
186 sizeof (idm_pdu_t
) + IDM_SORX_CACHE_HDRLEN
, 8,
187 &idm_sorx_pdu_constructor
, NULL
, NULL
, NULL
, NULL
, KM_SLEEP
);
189 /* 128k buffer cache */
190 idm
.idm_so_128k_buf_cache
= kmem_cache_create("idm_128k_buf_cache",
191 IDM_SO_BUF_CACHE_UB
, 8, NULL
, NULL
, NULL
, NULL
, NULL
, KM_SLEEP
);
193 /* Set the sockets transport ops */
194 it
->it_ops
= &idm_so_transport_ops
;
196 mutex_init(&idm_so_timed_socket_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
202 * Sockets transport teardown
207 kmem_cache_destroy(idm
.idm_so_128k_buf_cache
);
208 kmem_cache_destroy(idm
.idm_sotx_pdu_cache
);
209 kmem_cache_destroy(idm
.idm_sorx_pdu_cache
);
210 mutex_destroy(&idm_so_timed_socket_mutex
);
214 idm_socreate(int domain
, int type
, int protocol
)
218 if (!ksocket_socket(&ks
, domain
, type
, protocol
, KSOCKET_NOSLEEP
,
227 * idm_soshutdown will disconnect the socket and prevent subsequent PDU
228 * reception and transmission. The sonode still exists but its state
229 * gets modified to indicate it is no longer connected. Calls to
230 * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
231 * regain control of a thread stuck in idm_sorecv.
234 idm_soshutdown(ksocket_t so
)
236 (void) ksocket_shutdown(so
, SHUT_RDWR
, CRED());
240 * idm_sodestroy releases all resources associated with a socket previously
241 * created with idm_socreate. The socket must be shutdown using
242 * idm_soshutdown before the socket is destroyed with idm_sodestroy,
243 * otherwise undefined behavior will result.
246 idm_sodestroy(ksocket_t ks
)
248 (void) ksocket_close(ks
, CRED());
252 * Function to compare two addresses in sockaddr_storage format
256 idm_ss_compare(const struct sockaddr_storage
*cmp_ss1
,
257 const struct sockaddr_storage
*cmp_ss2
,
258 boolean_t v4_mapped_as_v4
,
259 boolean_t compare_ports
)
261 struct sockaddr_storage mapped_v4_ss1
, mapped_v4_ss2
;
262 const struct sockaddr_storage
*ss1
, *ss2
;
263 struct in_addr
*in1
, *in2
;
264 struct in6_addr
*in61
, *in62
;
268 * Normalize V4-mapped IPv6 addresses into V4 format if
269 * v4_mapped_as_v4 is B_TRUE.
273 if (v4_mapped_as_v4
&& (ss1
->ss_family
== AF_INET6
)) {
274 in61
= &((struct sockaddr_in6
*)ss1
)->sin6_addr
;
275 if (IN6_IS_ADDR_V4MAPPED(in61
)) {
276 bzero(&mapped_v4_ss1
, sizeof (mapped_v4_ss1
));
277 mapped_v4_ss1
.ss_family
= AF_INET
;
278 ((struct sockaddr_in
*)&mapped_v4_ss1
)->sin_port
=
279 ((struct sockaddr_in
*)ss1
)->sin_port
;
280 IN6_V4MAPPED_TO_INADDR(in61
,
281 &((struct sockaddr_in
*)&mapped_v4_ss1
)->sin_addr
);
282 ss1
= &mapped_v4_ss1
;
286 if (v4_mapped_as_v4
&& (ss2
->ss_family
== AF_INET6
)) {
287 in62
= &((struct sockaddr_in6
*)ss2
)->sin6_addr
;
288 if (IN6_IS_ADDR_V4MAPPED(in62
)) {
289 bzero(&mapped_v4_ss2
, sizeof (mapped_v4_ss2
));
290 mapped_v4_ss2
.ss_family
= AF_INET
;
291 ((struct sockaddr_in
*)&mapped_v4_ss2
)->sin_port
=
292 ((struct sockaddr_in
*)ss2
)->sin_port
;
293 IN6_V4MAPPED_TO_INADDR(in62
,
294 &((struct sockaddr_in
*)&mapped_v4_ss2
)->sin_addr
);
295 ss2
= &mapped_v4_ss2
;
300 * Compare ports, then address family, then ip address
303 (((struct sockaddr_in
*)ss1
)->sin_port
!=
304 ((struct sockaddr_in
*)ss2
)->sin_port
)) {
305 if (((struct sockaddr_in
*)ss1
)->sin_port
>
306 ((struct sockaddr_in
*)ss2
)->sin_port
)
315 if (ss1
->ss_family
!= ss2
->ss_family
) {
316 if (ss1
->ss_family
== AF_INET
)
323 * address families are the same
325 if (ss1
->ss_family
== AF_INET
) {
326 in1
= &((struct sockaddr_in
*)ss1
)->sin_addr
;
327 in2
= &((struct sockaddr_in
*)ss2
)->sin_addr
;
329 if (in1
->s_addr
> in2
->s_addr
)
331 else if (in1
->s_addr
< in2
->s_addr
)
335 } else if (ss1
->ss_family
== AF_INET6
) {
336 in61
= &((struct sockaddr_in6
*)ss1
)->sin6_addr
;
337 in62
= &((struct sockaddr_in6
*)ss2
)->sin6_addr
;
339 for (i
= 0; i
< 4; i
++) {
340 if (in61
->s6_addr32
[i
] > in62
->s6_addr32
[i
])
342 else if (in61
->s6_addr32
[i
] < in62
->s6_addr32
[i
])
352 * IP address filter functions to flag addresses that should not
353 * go out to initiators through discovery.
356 idm_v4_addr_okay(struct in_addr
*in_addr
)
358 in_addr_t addr
= ntohl(in_addr
->s_addr
);
360 if ((INADDR_NONE
== addr
) ||
361 (IN_MULTICAST(addr
)) ||
362 ((addr
>> IN_CLASSA_NSHIFT
) == 0) ||
363 ((addr
>> IN_CLASSA_NSHIFT
) == IN_LOOPBACKNET
)) {
370 idm_v6_addr_okay(struct in6_addr
*addr6
)
373 if ((IN6_IS_ADDR_UNSPECIFIED(addr6
)) ||
374 (IN6_IS_ADDR_LOOPBACK(addr6
)) ||
375 (IN6_IS_ADDR_MULTICAST(addr6
)) ||
376 (IN6_IS_ADDR_V4MAPPED(addr6
)) ||
377 (IN6_IS_ADDR_V4COMPAT(addr6
)) ||
378 (IN6_IS_ADDR_LINKLOCAL(addr6
))) {
385 * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
386 * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
389 idm_get_ipaddr(idm_addr_list_t
**ipaddr_p
)
400 struct sockaddr_storage ss
;
401 struct sockaddr_in
*sin
;
402 struct sockaddr_in6
*sin6
;
404 idm_addr_list_t
*ipaddr
= NULL
;
411 /* create an ipv4 and ipv6 UDP socket */
412 if ((so6
= idm_socreate(PF_INET6
, SOCK_DGRAM
, 0)) == NULL
)
414 if ((so4
= idm_socreate(PF_INET
, SOCK_DGRAM
, 0)) == NULL
) {
421 /* snapshot the current number of interfaces */
422 lifn
.lifn_family
= PF_UNSPEC
;
423 lifn
.lifn_flags
= LIFC_NOXMIT
| LIFC_TEMPORARY
| LIFC_ALLZONES
;
425 /* use vp6 for ioctls with unspecified families by default */
426 if (ksocket_ioctl(so6
, SIOCGLIFNUM
, (intptr_t)&lifn
, &rval
, CRED())
431 numifs
= lifn
.lifn_count
;
436 /* allocate extra room in case more interfaces appear */
439 /* get the interface names and ip addresses */
440 bufsize
= numifs
* sizeof (struct lifreq
);
441 buf
= kmem_alloc(bufsize
, KM_SLEEP
);
443 lifc
.lifc_family
= AF_UNSPEC
;
444 lifc
.lifc_flags
= LIFC_NOXMIT
| LIFC_TEMPORARY
| LIFC_ALLZONES
;
445 lifc
.lifc_len
= bufsize
;
447 rc
= ksocket_ioctl(so6
, SIOCGLIFCONF
, (intptr_t)&lifc
, &rval
, CRED());
451 /* if our extra room is used up, try again */
452 if (bufsize
<= lifc
.lifc_len
) {
453 kmem_free(buf
, bufsize
);
457 /* calc actual number of ifconfs */
458 n
= lifc
.lifc_len
/ sizeof (struct lifreq
);
462 size_ipaddr
= sizeof (idm_addr_list_t
) +
463 (n
- 1) * sizeof (idm_addr_t
);
464 ipaddr
= kmem_zalloc(size_ipaddr
, KM_SLEEP
);
470 * Examine the array of interfaces and filter uninteresting ones
472 for (i
= 0, j
= 0, lp
= lifc
.lifc_req
; i
< n
; i
++, lp
++) {
475 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
479 * fetch the flags using the socket of the correct family
481 switch (ss
.ss_family
) {
483 rc
= ksocket_ioctl(so4
, SIOCGLIFFLAGS
, (intptr_t)lp
,
487 rc
= ksocket_ioctl(so6
, SIOCGLIFFLAGS
, (intptr_t)lp
,
495 * If we got the flags, skip uninteresting
496 * interfaces based on flags
498 if ((lp
->lifr_flags
& IFF_UP
) != IFF_UP
)
501 (IFF_ANYCAST
|IFF_NOLOCAL
|IFF_DEPRECATED
))
505 /* save ip address */
506 ip
= &ipaddr
->al_addrs
[j
];
507 switch (ss
.ss_family
) {
509 sin
= (struct sockaddr_in
*)&ss
;
510 if (!idm_v4_addr_okay(&sin
->sin_addr
))
512 ip
->a_addr
.i_addr
.in4
= sin
->sin_addr
;
513 ip
->a_addr
.i_insize
= sizeof (struct in_addr
);
516 sin6
= (struct sockaddr_in6
*)&ss
;
517 if (!idm_v6_addr_okay(&sin6
->sin6_addr
))
519 ip
->a_addr
.i_addr
.in6
= sin6
->sin6_addr
;
520 ip
->a_addr
.i_insize
= sizeof (struct in6_addr
);
529 /* no valid ifaddr */
530 kmem_free(ipaddr
, size_ipaddr
);
534 ipaddr
->al_out_cnt
= j
;
543 kmem_free(buf
, bufsize
);
546 return (size_ipaddr
);
550 idm_sorecv(ksocket_t so
, void *msg
, size_t len
)
558 * Fill in iovec and receive data
563 return (idm_iov_sorecv(so
, &iov
, 1, len
));
567 * idm_sosendto - Sends a buffered data on a non-connected socket.
569 * This function puts the data provided on the wire by calling sosendmsg.
570 * It will return only when all the data has been sent or if an error
573 * Returns 0 for success, the socket errno value if sosendmsg fails, and
574 * -1 if sosendmsg returns success but uio_resid != 0
577 idm_sosendto(ksocket_t so
, void *buff
, size_t len
,
578 struct sockaddr
*name
, socklen_t namelen
)
585 iov
[0].iov_base
= buff
;
586 iov
[0].iov_len
= len
;
588 /* Initialization of the message header. */
589 bzero(&msg
, sizeof (msg
));
593 msg
.msg_namelen
= namelen
;
595 if ((error
= ksocket_sendmsg(so
, &msg
, 0, &sent
, CRED())) == 0) {
598 /* All data sent. Success. */
601 /* Not all data was sent. Failure */
611 * idm_iov_sosend - Sends an iovec on a connection.
613 * This function puts the data provided on the wire by calling sosendmsg.
614 * It will return only when all the data has been sent or if an error
617 * Returns 0 for success, the socket errno value if sosendmsg fails, and
618 * -1 if sosendmsg returns success but uio_resid != 0
621 idm_iov_sosend(ksocket_t so
, iovec_t
*iop
, int iovlen
, size_t total_len
)
629 /* Initialization of the message header. */
630 bzero(&msg
, sizeof (msg
));
632 msg
.msg_iovlen
= iovlen
;
634 if ((error
= ksocket_sendmsg(so
, &msg
, 0, &sent
, CRED()))
637 if (sent
== total_len
) {
638 /* All data sent. Success. */
641 /* Not all data was sent. Failure */
651 * idm_iov_sorecv - Receives an iovec from a connection
653 * This function gets the data asked for from the socket. It will return
654 * only when all the requested data has been retrieved or if an error
657 * Returns 0 for success, the socket errno value if sorecvmsg fails, and
658 * -1 if sorecvmsg returns success but uio_resid != 0
661 idm_iov_sorecv(ksocket_t so
, iovec_t
*iop
, int iovlen
, size_t total_len
)
670 /* Initialization of the message header. */
671 bzero(&msg
, sizeof (msg
));
673 msg
.msg_iovlen
= iovlen
;
676 if ((error
= ksocket_recvmsg(so
, &msg
, flags
, &recv
, CRED()))
679 if (recv
== total_len
) {
680 /* All requested data received. Success */
684 * Not all data was received. The connection has
696 idm_set_ini_preconnect_options(idm_so_conn_t
*sc
, boolean_t boot_conn
)
698 int conn_abort
= 10000;
699 int conn_notify
= 2000;
702 /* Pre-connect socket options */
703 (void) ksocket_setsockopt(sc
->ic_so
, IPPROTO_TCP
,
704 TCP_CONN_NOTIFY_THRESHOLD
, (char *)&conn_notify
, sizeof (int),
706 if (boot_conn
== B_FALSE
) {
707 (void) ksocket_setsockopt(sc
->ic_so
, IPPROTO_TCP
,
708 TCP_CONN_ABORT_THRESHOLD
, (char *)&conn_abort
, sizeof (int),
710 (void) ksocket_setsockopt(sc
->ic_so
, IPPROTO_TCP
,
712 (char *)&abort
, sizeof (int), CRED());
717 idm_set_postconnect_options(ksocket_t ks
)
721 /* Set connect options */
722 (void) ksocket_setsockopt(ks
, SOL_SOCKET
, SO_RCVBUF
,
723 (char *)&idm_so_rcvbuf
, sizeof (idm_so_rcvbuf
), CRED());
724 (void) ksocket_setsockopt(ks
, SOL_SOCKET
, SO_SNDBUF
,
725 (char *)&idm_so_sndbuf
, sizeof (idm_so_sndbuf
), CRED());
726 (void) ksocket_setsockopt(ks
, IPPROTO_TCP
, TCP_NODELAY
,
727 (char *)&on
, sizeof (on
), CRED());
731 n2h24(const uchar_t
*ptr
)
733 return ((ptr
[0] << 16) | (ptr
[1] << 8) | ptr
[2]);
737 idm_dataseglenokay(idm_conn_t
*ic
, idm_pdu_t
*pdu
)
741 if (ic
->ic_conn_type
== CONN_TYPE_TGT
&&
742 pdu
->isp_datalen
> ic
->ic_conn_params
.max_recv_dataseglen
) {
743 IDM_CONN_LOG(CE_WARN
,
744 "idm_dataseglenokay: exceeded the max data segment length");
750 * Filter out any RFC3720 data-size violations.
752 switch (IDM_PDU_OPCODE(pdu
)) {
753 case ISCSI_OP_SCSI_TASK_MGT_MSG
:
754 case ISCSI_OP_SCSI_TASK_MGT_RSP
:
755 case ISCSI_OP_RTT_RSP
:
756 case ISCSI_OP_LOGOUT_CMD
:
758 * Data-segment not allowed and additional headers not allowed.
759 * (both must be zero according to the RFC3720.)
761 if (bhs
->hlength
!= 0 || pdu
->isp_datalen
!= 0)
764 case ISCSI_OP_NOOP_OUT
:
765 case ISCSI_OP_LOGIN_CMD
:
766 case ISCSI_OP_TEXT_CMD
:
767 case ISCSI_OP_SNACK_CMD
:
768 case ISCSI_OP_NOOP_IN
:
769 case ISCSI_OP_SCSI_RSP
:
770 case ISCSI_OP_LOGIN_RSP
:
771 case ISCSI_OP_TEXT_RSP
:
772 case ISCSI_OP_SCSI_DATA_RSP
:
773 case ISCSI_OP_LOGOUT_RSP
:
774 case ISCSI_OP_ASYNC_EVENT
:
775 case ISCSI_OP_REJECT_MSG
:
777 * Additional headers not allowed.
778 * (must be zero according to RFC3720.)
783 case ISCSI_OP_SCSI_CMD
:
785 * See RFC3720, section 10.3
787 * For pure read cmds, data-segment-length must be zero.
788 * For non-final transfers, data-size must be even number of
790 * For any transfer, an expected byte count must be provided.
791 * For bidirectional transfers, an additional-header must be
792 * provided (for the read byte-count.)
794 if (pdu
->isp_datalen
) {
795 if ((bhs
->flags
& (ISCSI_FLAG_CMD_READ
|
796 ISCSI_FLAG_CMD_WRITE
)) == ISCSI_FLAG_CMD_READ
)
798 if ((bhs
->flags
& ISCSI_FLAG_FINAL
) == 0 &&
799 ((pdu
->isp_datalen
& 0x3) != 0))
802 if (bhs
->flags
& (ISCSI_FLAG_CMD_READ
|
803 ISCSI_FLAG_CMD_WRITE
)) {
804 iscsi_scsi_cmd_hdr_t
*cmdhdr
=
805 (iscsi_scsi_cmd_hdr_t
*)bhs
;
807 * we're transfering some data, we must have a
810 if (cmdhdr
->data_length
== 0)
814 case ISCSI_OP_SCSI_DATA
:
816 * See RFC3720, section 10.7
818 * Additional headers aren't allowed, and the data-size must
819 * be an even number of 4-byte words (unless the final bit
824 if ((bhs
->flags
& ISCSI_FLAG_FINAL
) == 0 &&
825 ((pdu
->isp_datalen
& 0x3) != 0))
835 idm_sorecvhdr(idm_conn_t
*ic
, idm_pdu_t
*pdu
)
838 uint32_t hdr_digest_crc
;
839 uint32_t crc_calculated
;
845 idm_so_conn_t
*so_conn
;
848 so_conn
= ic
->ic_transport_private
;
854 rc
= idm_sorecv(so_conn
->ic_so
, pdu
->isp_hdr
, sizeof (iscsi_hdr_t
));
855 if (rc
!= IDM_STATUS_SUCCESS
) {
856 return (IDM_STATUS_FAIL
);
860 * Check actual AHS length against the amount available in the buffer
862 pdu
->isp_hdrlen
= sizeof (iscsi_hdr_t
) +
863 (bhs
->hlength
* sizeof (uint32_t));
864 pdu
->isp_datalen
= n2h24(bhs
->dlength
);
866 if (!idm_dataseglenokay(ic
, pdu
)) {
867 IDM_CONN_LOG(CE_WARN
,
868 "idm_sorecvhdr: invalid data segment length");
869 return (IDM_STATUS_FAIL
);
871 if (bhs
->hlength
> IDM_SORX_CACHE_AHSLEN
) {
872 /* Allocate a new header segment and change the callback */
873 new_hdr
= kmem_alloc(pdu
->isp_hdrlen
, KM_SLEEP
);
874 bcopy(pdu
->isp_hdr
, new_hdr
, sizeof (iscsi_hdr_t
));
875 pdu
->isp_hdr
= new_hdr
;
876 pdu
->isp_flags
|= IDM_PDU_ADDL_HDR
;
879 * This callback will restore the expected values after
880 * the RX PDU has been processed.
882 pdu
->isp_callback
= idm_sorx_addl_pdu_cb
;
886 * Setup receipt of additional header and header digest (if enabled).
888 if (bhs
->hlength
> 0) {
889 iov
[iovlen
].iov_base
= (caddr_t
)(pdu
->isp_hdr
+ 1);
890 ahslen
= pdu
->isp_hdrlen
- sizeof (iscsi_hdr_t
);
891 iov
[iovlen
].iov_len
= ahslen
;
892 total_len
+= iov
[iovlen
].iov_len
;
896 if (ic
->ic_conn_flags
& IDM_CONN_HEADER_DIGEST
) {
897 iov
[iovlen
].iov_base
= (caddr_t
)&hdr_digest_crc
;
898 iov
[iovlen
].iov_len
= sizeof (hdr_digest_crc
);
899 total_len
+= iov
[iovlen
].iov_len
;
904 (idm_iov_sorecv(so_conn
->ic_so
, &iov
[0], iovlen
,
906 return (IDM_STATUS_FAIL
);
910 * Validate header digest if enabled
912 if (ic
->ic_conn_flags
& IDM_CONN_HEADER_DIGEST
) {
913 crc_calculated
= idm_crc32c(pdu
->isp_hdr
,
914 sizeof (iscsi_hdr_t
) + ahslen
);
915 if (crc_calculated
!= hdr_digest_crc
) {
916 /* Invalid Header Digest */
917 return (IDM_STATUS_HEADER_DIGEST
);
925 * idm_so_ini_conn_create()
926 * Allocate the sockets transport connection resources.
929 idm_so_ini_conn_create(idm_conn_req_t
*cr
, idm_conn_t
*ic
)
932 idm_so_conn_t
*so_conn
;
935 so
= idm_socreate(cr
->cr_domain
, cr
->cr_type
,
938 return (IDM_STATUS_FAIL
);
941 /* Bind the socket if configured to do so */
943 if (ksocket_bind(so
, &cr
->cr_bound_addr
.sin
,
944 SIZEOF_SOCKADDR(&cr
->cr_bound_addr
.sin
), CRED()) != 0) {
946 return (IDM_STATUS_FAIL
);
950 idmrc
= idm_so_conn_create_common(ic
, so
);
951 if (idmrc
!= IDM_STATUS_SUCCESS
) {
954 return (IDM_STATUS_FAIL
);
957 so_conn
= ic
->ic_transport_private
;
958 /* Set up socket options */
959 idm_set_ini_preconnect_options(so_conn
, cr
->cr_boot_conn
);
961 return (IDM_STATUS_SUCCESS
);
965 * idm_so_ini_conn_destroy()
966 * Tear down the sockets transport connection resources.
969 idm_so_ini_conn_destroy(idm_conn_t
*ic
)
971 idm_so_conn_destroy_common(ic
);
975 * idm_so_ini_conn_connect()
976 * Establish the connection referred to by the handle previously allocated via
977 * idm_so_ini_conn_create().
980 idm_so_ini_conn_connect(idm_conn_t
*ic
)
982 idm_so_conn_t
*so_conn
;
983 struct sonode
*node
= NULL
;
985 clock_t lbolt
, conn_login_max
, conn_login_interval
;
988 so_conn
= ic
->ic_transport_private
;
989 nonblock
= ic
->ic_conn_params
.nonblock_socket
;
990 conn_login_max
= ic
->ic_conn_params
.conn_login_max
;
991 conn_login_interval
= ddi_get_lbolt() +
992 SEC_TO_TICK(ic
->ic_conn_params
.conn_login_interval
);
994 if (nonblock
== B_TRUE
) {
995 node
= ((struct sonode
*)(so_conn
->ic_so
));
996 /* Set to none block socket mode */
997 idm_so_socket_set_nonblock(node
);
999 rc
= ksocket_connect(so_conn
->ic_so
,
1000 &ic
->ic_ini_dst_addr
.sin
,
1001 (SIZEOF_SOCKADDR(&ic
->ic_ini_dst_addr
.sin
)),
1003 if (rc
== 0 || rc
== EISCONN
) {
1004 /* socket success or already success */
1005 rc
= IDM_STATUS_SUCCESS
;
1008 if ((rc
== ETIMEDOUT
) || (rc
== ECONNREFUSED
) ||
1009 (rc
== ECONNRESET
)) {
1010 /* socket connection timeout or refuse */
1013 lbolt
= ddi_get_lbolt();
1014 if (lbolt
> conn_login_max
) {
1016 * Connection retry timeout,
1017 * failed connect to target.
1021 if (lbolt
< conn_login_interval
) {
1022 if ((rc
== EINPROGRESS
) || (rc
== EALREADY
)) {
1023 /* TCP connect still in progress */
1024 ddi_sleep(IN_PROGRESS_DELAY
);
1027 delay(conn_login_interval
- lbolt
);
1030 conn_login_interval
= ddi_get_lbolt() +
1031 SEC_TO_TICK(ic
->ic_conn_params
.conn_login_interval
);
1033 /* resume to nonblock mode */
1034 if (rc
== IDM_STATUS_SUCCESS
) {
1035 idm_so_socket_set_block(node
);
1038 rc
= ksocket_connect(so_conn
->ic_so
, &ic
->ic_ini_dst_addr
.sin
,
1039 (SIZEOF_SOCKADDR(&ic
->ic_ini_dst_addr
.sin
)), CRED());
1043 idm_soshutdown(so_conn
->ic_so
);
1044 return (IDM_STATUS_FAIL
);
1047 idm_so_conn_connect_common(ic
);
1049 idm_set_postconnect_options(so_conn
->ic_so
);
1051 return (IDM_STATUS_SUCCESS
);
1055 idm_so_tgt_conn_create(idm_conn_t
*ic
, ksocket_t new_so
)
1059 idm_set_postconnect_options(new_so
);
1060 idmrc
= idm_so_conn_create_common(ic
, new_so
);
1066 idm_so_tgt_conn_destroy(idm_conn_t
*ic
)
1068 idm_so_conn_destroy_common(ic
);
1072 * idm_so_tgt_conn_connect()
1073 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
1074 * is invoked from the SM as a result of an inbound connection request.
1077 idm_so_tgt_conn_connect(idm_conn_t
*ic
)
1079 idm_so_conn_connect_common(ic
);
1081 return (IDM_STATUS_SUCCESS
);
1085 idm_so_conn_create_common(idm_conn_t
*ic
, ksocket_t new_so
)
1087 idm_so_conn_t
*so_conn
;
1089 so_conn
= kmem_zalloc(sizeof (idm_so_conn_t
), KM_SLEEP
);
1090 so_conn
->ic_so
= new_so
;
1092 ic
->ic_transport_private
= so_conn
;
1093 ic
->ic_transport_hdrlen
= 0;
1095 /* Set the scoreboarding flag on this connection */
1096 ic
->ic_conn_flags
|= IDM_CONN_USE_SCOREBOARD
;
1097 ic
->ic_conn_params
.max_recv_dataseglen
=
1098 ISCSI_DEFAULT_MAX_RECV_SEG_LEN
;
1099 ic
->ic_conn_params
.max_xmit_dataseglen
=
1100 ISCSI_DEFAULT_MAX_XMIT_SEG_LEN
;
1103 * Initialize tx thread mutex and list
1105 mutex_init(&so_conn
->ic_tx_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
1106 cv_init(&so_conn
->ic_tx_cv
, NULL
, CV_DEFAULT
, NULL
);
1107 list_create(&so_conn
->ic_tx_list
, sizeof (idm_pdu_t
),
1108 offsetof(idm_pdu_t
, idm_tx_link
));
1110 return (IDM_STATUS_SUCCESS
);
1114 idm_so_conn_destroy_common(idm_conn_t
*ic
)
1116 idm_so_conn_t
*so_conn
= ic
->ic_transport_private
;
1118 ic
->ic_transport_private
= NULL
;
1119 idm_sodestroy(so_conn
->ic_so
);
1120 list_destroy(&so_conn
->ic_tx_list
);
1121 mutex_destroy(&so_conn
->ic_tx_mutex
);
1122 cv_destroy(&so_conn
->ic_tx_cv
);
1124 kmem_free(so_conn
, sizeof (idm_so_conn_t
));
1128 idm_so_conn_connect_common(idm_conn_t
*ic
)
1130 idm_so_conn_t
*so_conn
;
1131 struct sockaddr_in6 t_addr
;
1132 socklen_t t_addrlen
= 0;
1134 so_conn
= ic
->ic_transport_private
;
1135 bzero(&t_addr
, sizeof (struct sockaddr_in6
));
1136 t_addrlen
= sizeof (struct sockaddr_in6
);
1138 /* Set the local and remote addresses in the idm conn handle */
1139 (void) ksocket_getsockname(so_conn
->ic_so
, (struct sockaddr
*)&t_addr
,
1140 &t_addrlen
, CRED());
1141 bcopy(&t_addr
, &ic
->ic_laddr
, t_addrlen
);
1142 (void) ksocket_getpeername(so_conn
->ic_so
, (struct sockaddr
*)&t_addr
,
1143 &t_addrlen
, CRED());
1144 bcopy(&t_addr
, &ic
->ic_raddr
, t_addrlen
);
1146 mutex_enter(&ic
->ic_mutex
);
1147 so_conn
->ic_tx_thread
= thread_create(NULL
, 0, idm_sotx_thread
, ic
, 0,
1148 &p0
, TS_RUN
, minclsyspri
);
1149 so_conn
->ic_rx_thread
= thread_create(NULL
, 0, idm_sorx_thread
, ic
, 0,
1150 &p0
, TS_RUN
, minclsyspri
);
1152 while (so_conn
->ic_rx_thread_did
== 0 ||
1153 so_conn
->ic_tx_thread_did
== 0)
1154 cv_wait(&ic
->ic_cv
, &ic
->ic_mutex
);
1155 mutex_exit(&ic
->ic_mutex
);
1159 * idm_so_conn_disconnect()
1160 * Shutdown the socket connection and stop the thread
1163 idm_so_conn_disconnect(idm_conn_t
*ic
)
1165 idm_so_conn_t
*so_conn
;
1167 so_conn
= ic
->ic_transport_private
;
1169 mutex_enter(&ic
->ic_mutex
);
1170 so_conn
->ic_rx_thread_running
= B_FALSE
;
1171 so_conn
->ic_tx_thread_running
= B_FALSE
;
1172 /* We need to wakeup the TX thread */
1173 mutex_enter(&so_conn
->ic_tx_mutex
);
1174 cv_signal(&so_conn
->ic_tx_cv
);
1175 mutex_exit(&so_conn
->ic_tx_mutex
);
1176 mutex_exit(&ic
->ic_mutex
);
1178 /* This should wakeup the RX thread if it is sleeping */
1179 idm_soshutdown(so_conn
->ic_so
);
1181 thread_join(so_conn
->ic_tx_thread_did
);
1182 thread_join(so_conn
->ic_rx_thread_did
);
1186 * idm_so_tgt_svc_create()
1187 * Establish a service on an IP address and port. idm_svc_req_t contains
1188 * the service parameters.
1192 idm_so_tgt_svc_create(idm_svc_req_t
*sr
, idm_svc_t
*is
)
1194 idm_so_svc_t
*so_svc
;
1196 so_svc
= kmem_zalloc(sizeof (idm_so_svc_t
), KM_SLEEP
);
1198 /* Set the new sockets service in svc handle */
1199 is
->is_so_svc
= (void *)so_svc
;
1201 return (IDM_STATUS_SUCCESS
);
1205 * idm_so_tgt_svc_destroy()
1206 * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1209 idm_so_tgt_svc_destroy(idm_svc_t
*is
)
1211 /* the socket will have been torn down; free the service */
1212 kmem_free(is
->is_so_svc
, sizeof (idm_so_svc_t
));
1216 * idm_so_tgt_svc_online()
1217 * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1221 idm_so_tgt_svc_online(idm_svc_t
*is
)
1223 idm_so_svc_t
*so_svc
;
1224 idm_svc_req_t
*sr
= &is
->is_svc_req
;
1225 struct sockaddr_in6 sin6_ip
;
1226 const uint32_t on
= 1;
1228 mutex_enter(&is
->is_mutex
);
1229 so_svc
= (idm_so_svc_t
*)is
->is_so_svc
;
1232 * Try creating an IPv6 socket first
1234 if ((so_svc
->is_so
= idm_socreate(PF_INET6
, SOCK_STREAM
, 0)) == NULL
) {
1235 mutex_exit(&is
->is_mutex
);
1236 return (IDM_STATUS_FAIL
);
1238 bzero(&sin6_ip
, sizeof (sin6_ip
));
1239 sin6_ip
.sin6_family
= AF_INET6
;
1240 sin6_ip
.sin6_port
= htons(sr
->sr_port
);
1241 sin6_ip
.sin6_addr
= in6addr_any
;
1243 (void) ksocket_setsockopt(so_svc
->is_so
, SOL_SOCKET
,
1244 SO_REUSEADDR
, (char *)&on
, sizeof (on
), CRED());
1246 if (ksocket_bind(so_svc
->is_so
, (struct sockaddr
*)&sin6_ip
,
1247 sizeof (sin6_ip
), CRED()) != 0) {
1248 mutex_exit(&is
->is_mutex
);
1249 idm_sodestroy(so_svc
->is_so
);
1250 return (IDM_STATUS_FAIL
);
1254 idm_set_postconnect_options(so_svc
->is_so
);
1256 if (ksocket_listen(so_svc
->is_so
, 5, CRED()) != 0) {
1257 mutex_exit(&is
->is_mutex
);
1258 idm_soshutdown(so_svc
->is_so
);
1259 idm_sodestroy(so_svc
->is_so
);
1260 return (IDM_STATUS_FAIL
);
1263 /* Launch a watch thread */
1264 so_svc
->is_thread
= thread_create(NULL
, 0, idm_so_svc_port_watcher
,
1265 is
, 0, &p0
, TS_RUN
, minclsyspri
);
1267 if (so_svc
->is_thread
== NULL
) {
1268 /* Failure to launch; teardown the socket */
1269 mutex_exit(&is
->is_mutex
);
1270 idm_soshutdown(so_svc
->is_so
);
1271 idm_sodestroy(so_svc
->is_so
);
1272 return (IDM_STATUS_FAIL
);
1274 ksocket_hold(so_svc
->is_so
);
1275 /* Wait for the port watcher thread to start */
1276 while (!so_svc
->is_thread_running
)
1277 cv_wait(&is
->is_cv
, &is
->is_mutex
);
1278 mutex_exit(&is
->is_mutex
);
1280 return (IDM_STATUS_SUCCESS
);
1284 * idm_so_tgt_svc_offline
1286 * Stop listening on the IP address and port identified by idm_svc_t.
1289 idm_so_tgt_svc_offline(idm_svc_t
*is
)
1291 idm_so_svc_t
*so_svc
;
1292 mutex_enter(&is
->is_mutex
);
1293 so_svc
= (idm_so_svc_t
*)is
->is_so_svc
;
1294 so_svc
->is_thread_running
= B_FALSE
;
1295 mutex_exit(&is
->is_mutex
);
1300 idm_sodestroy(so_svc
->is_so
);
1303 * Now we expect the port watcher thread to terminate
1305 thread_join(so_svc
->is_thread_did
);
1309 * Watch thread for target service connection establishment.
1312 idm_so_svc_port_watcher(void *arg
)
1314 idm_svc_t
*svc
= arg
;
1318 idm_so_svc_t
*so_svc
;
1320 struct sockaddr_in6 t_addr
;
1321 socklen_t t_addrlen
;
1323 bzero(&t_addr
, sizeof (struct sockaddr_in6
));
1324 t_addrlen
= sizeof (struct sockaddr_in6
);
1325 mutex_enter(&svc
->is_mutex
);
1327 so_svc
= svc
->is_so_svc
;
1328 so_svc
->is_thread_running
= B_TRUE
;
1329 so_svc
->is_thread_did
= so_svc
->is_thread
->t_did
;
1331 cv_signal(&svc
->is_cv
);
1333 IDM_SVC_LOG(CE_NOTE
, "iSCSI service (%p/%d) online", (void *)svc
,
1334 svc
->is_svc_req
.sr_port
);
1336 while (so_svc
->is_thread_running
) {
1337 mutex_exit(&svc
->is_mutex
);
1339 if ((rc
= ksocket_accept(so_svc
->is_so
,
1340 (struct sockaddr
*)&t_addr
, &t_addrlen
,
1341 &new_so
, CRED())) != 0) {
1342 mutex_enter(&svc
->is_mutex
);
1343 if (rc
!= ECONNABORTED
&& rc
!= EINTR
) {
1344 IDM_SVC_LOG(CE_NOTE
, "idm_so_svc_port_watcher:"
1345 " ksocket_accept failed %d", rc
);
1348 * Unclean shutdown of this thread is not handled
1349 * wait for !is_thread_running.
1353 idmrc
= idm_svc_conn_create(svc
, IDM_TRANSPORT_TYPE_SOCKETS
,
1355 if (idmrc
!= IDM_STATUS_SUCCESS
) {
1356 /* Drop connection */
1357 idm_soshutdown(new_so
);
1358 idm_sodestroy(new_so
);
1359 mutex_enter(&svc
->is_mutex
);
1363 idmrc
= idm_so_tgt_conn_create(ic
, new_so
);
1364 if (idmrc
!= IDM_STATUS_SUCCESS
) {
1365 idm_svc_conn_destroy(ic
);
1366 idm_soshutdown(new_so
);
1367 idm_sodestroy(new_so
);
1368 mutex_enter(&svc
->is_mutex
);
1373 * Kick the state machine. At CS_S3_XPT_UP the state machine
1374 * will notify the client (target) about the new connection.
1376 idm_conn_event(ic
, CE_CONNECT_ACCEPT
, (uintptr_t)NULL
);
1378 mutex_enter(&svc
->is_mutex
);
1380 ksocket_rele(so_svc
->is_so
);
1381 so_svc
->is_thread_running
= B_FALSE
;
1382 mutex_exit(&svc
->is_mutex
);
1384 IDM_SVC_LOG(CE_NOTE
, "iSCSI service (%p/%d) offline", (void *)svc
,
1385 svc
->is_svc_req
.sr_port
);
1391 * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1392 * frees resources associated with the task.
1394 * It's not clear that this should return idm_status_t. What do we do
1398 idm_so_free_task_rsrc(idm_task_t
*idt
)
1400 idm_buf_t
*idb
, *next_idb
;
1403 * There is nothing to cleanup on initiator connections
1405 if (IDM_CONN_ISINI(idt
->idt_ic
))
1406 return (IDM_STATUS_SUCCESS
);
1409 * If this is a target connection, call idm_buf_rx_from_ini_done for
1410 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1412 * In addition, remove any buffers associated with this task from
1413 * the ic_tx_list. We'll do this by walking the idt_inbufv list, but
1414 * items don't actually get removed from that list (and completion
1415 * routines called) until idm_task_cleanup.
1417 mutex_enter(&idt
->idt_mutex
);
1419 for (idb
= list_head(&idt
->idt_outbufv
); idb
!= NULL
; idb
= next_idb
) {
1420 next_idb
= list_next(&idt
->idt_outbufv
, idb
);
1421 if (idb
->idb_in_transport
) {
1423 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1425 DTRACE_ISCSI_8(xfer__done
, idm_conn_t
*, idt
->idt_ic
,
1426 uintptr_t, idb
->idb_buf
,
1427 uint32_t, idb
->idb_bufoffset
,
1428 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1429 uint32_t, idb
->idb_xfer_len
,
1430 int, XFER_BUF_RX_FROM_INI
);
1431 idm_buf_rx_from_ini_done(idt
, idb
, IDM_STATUS_ABORTED
);
1432 mutex_enter(&idt
->idt_mutex
);
1436 for (idb
= list_head(&idt
->idt_inbufv
); idb
!= NULL
; idb
= next_idb
) {
1437 next_idb
= list_next(&idt
->idt_inbufv
, idb
);
1439 * We want to remove these items from the tx_list as well,
1440 * but knowing it's in the idt_inbufv list is not a guarantee
1441 * that it's in the tx_list. If it's on the tx list then
1442 * let idm_sotx_thread() clean it up.
1444 if (idb
->idb_in_transport
&& !idb
->idb_tx_thread
) {
1446 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1448 DTRACE_ISCSI_8(xfer__done
, idm_conn_t
*, idt
->idt_ic
,
1449 uintptr_t, idb
->idb_buf
,
1450 uint32_t, idb
->idb_bufoffset
,
1451 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1452 uint32_t, idb
->idb_xfer_len
,
1453 int, XFER_BUF_TX_TO_INI
);
1454 idm_buf_tx_to_ini_done(idt
, idb
, IDM_STATUS_ABORTED
);
1455 mutex_enter(&idt
->idt_mutex
);
1459 mutex_exit(&idt
->idt_mutex
);
1461 return (IDM_STATUS_SUCCESS
);
1465 * idm_so_negotiate_key_values() validates the key values for this connection
1469 idm_so_negotiate_key_values(idm_conn_t
*it
, nvlist_t
*request_nvl
,
1470 nvlist_t
*response_nvl
, nvlist_t
*negotiated_nvl
)
1472 /* All parameters are negotiated at the iscsit level */
1473 return (KV_HANDLED
);
1477 * idm_so_notice_key_values() activates the negotiated key values for
1481 idm_so_notice_key_values(idm_conn_t
*it
, nvlist_t
*negotiated_nvl
)
1487 idm_status_t idm_status
;
1488 const idm_kv_xlate_t
*ikvx
;
1491 for (nvp
= nvlist_next_nvpair(negotiated_nvl
, NULL
);
1492 nvp
!= NULL
; nvp
= next_nvp
) {
1493 next_nvp
= nvlist_next_nvpair(negotiated_nvl
, nvp
);
1494 nvp_name
= nvpair_name(nvp
);
1496 ikvx
= idm_lookup_kv_xlate(nvp_name
, strlen(nvp_name
));
1497 switch (ikvx
->ik_key_id
) {
1498 case KI_HEADER_DIGEST
:
1499 case KI_DATA_DIGEST
:
1500 idm_status
= idm_so_handle_digest(it
, nvp
, ikvx
);
1501 ASSERT(idm_status
== 0);
1503 /* Remove processed item from negotiated_nvl list */
1504 nvrc
= nvlist_remove_all(
1505 negotiated_nvl
, ikvx
->ik_key_name
);
1508 case KI_MAX_RECV_DATA_SEGMENT_LENGTH
:
1510 * Just pass the value down to idm layer.
1511 * No need to remove it from negotiated_nvl list here.
1513 nvrc
= nvpair_value_uint64(nvp
, &num_val
);
1515 it
->ic_conn_params
.max_xmit_dataseglen
=
1525 * idm_so_declare_key_values() declares the key values for this connection
1529 idm_so_declare_key_values(idm_conn_t
*it
, nvlist_t
*config_nvl
,
1530 nvlist_t
*outgoing_nvl
)
1537 const idm_kv_xlate_t
*ikvx
;
1540 for (nvp
= nvlist_next_nvpair(config_nvl
, NULL
);
1541 nvp
!= NULL
&& nvrc
== 0; nvp
= next_nvp
) {
1542 next_nvp
= nvlist_next_nvpair(config_nvl
, nvp
);
1543 nvp_name
= nvpair_name(nvp
);
1545 ikvx
= idm_lookup_kv_xlate(nvp_name
, strlen(nvp_name
));
1546 switch (ikvx
->ik_key_id
) {
1547 case KI_MAX_RECV_DATA_SEGMENT_LENGTH
:
1548 if ((nvrc
= nvpair_value_uint64(nvp
, &num_val
)) != 0) {
1552 (nvrc
= nvlist_add_uint64(outgoing_nvl
,
1553 nvp_name
, num_val
)) != 0) {
1556 it
->ic_conn_params
.max_recv_dataseglen
=
1563 kvrc
= idm_nvstat_to_kvstat(nvrc
);
1568 idm_so_handle_digest(idm_conn_t
*it
, nvpair_t
*digest_choice
,
1569 const idm_kv_xlate_t
*ikvx
)
1572 char *digest_choice_string
;
1574 nvrc
= nvpair_value_string(digest_choice
,
1575 &digest_choice_string
);
1577 if (strcasecmp(digest_choice_string
, "crc32c") == 0) {
1578 switch (ikvx
->ik_key_id
) {
1579 case KI_HEADER_DIGEST
:
1580 it
->ic_conn_flags
|= IDM_CONN_HEADER_DIGEST
;
1582 case KI_DATA_DIGEST
:
1583 it
->ic_conn_flags
|= IDM_CONN_DATA_DIGEST
;
1589 } else if (strcasecmp(digest_choice_string
, "none") == 0) {
1590 switch (ikvx
->ik_key_id
) {
1591 case KI_HEADER_DIGEST
:
1592 it
->ic_conn_flags
&= ~IDM_CONN_HEADER_DIGEST
;
1594 case KI_DATA_DIGEST
:
1595 it
->ic_conn_flags
&= ~IDM_CONN_DATA_DIGEST
;
1605 return (IDM_STATUS_SUCCESS
);
1610 * idm_so_conn_is_capable() verifies that the passed connection is provided
1611 * for by the sockets interface.
1615 idm_so_conn_is_capable(idm_conn_req_t
*ic
, idm_transport_caps_t
*caps
)
1621 * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1622 * idm_sorecv_scsidata() function invoked earlier actually reads the data
1623 * off the socket into the appropriate buffers.
1626 idm_so_rx_datain(idm_conn_t
*ic
, idm_pdu_t
*pdu
)
1628 iscsi_data_hdr_t
*bhs
;
1633 iscsi_hdr_t
*ihp
= (iscsi_hdr_t
*)pdu
->isp_hdr
;
1634 iscsi_data_rsp_hdr_t
*idrhp
= (iscsi_data_rsp_hdr_t
*)ihp
;
1637 ASSERT(pdu
!= NULL
);
1638 ASSERT(IDM_PDU_OPCODE(pdu
) == ISCSI_OP_SCSI_DATA_RSP
);
1640 bhs
= (iscsi_data_hdr_t
*)pdu
->isp_hdr
;
1641 datasn
= ntohl(bhs
->datasn
);
1642 offset
= ntohl(bhs
->offset
);
1645 * Look up the task corresponding to the initiator task tag
1646 * to get the buffers affiliated with the task.
1648 idt
= idm_task_find(ic
, bhs
->itt
, bhs
->ttt
);
1650 IDM_CONN_LOG(CE_WARN
, "idm_so_rx_datain: failed to find task");
1651 idm_pdu_rx_protocol_error(ic
, pdu
);
1655 idb
= pdu
->isp_sorx_buf
;
1657 IDM_CONN_LOG(CE_WARN
,
1658 "idm_so_rx_datain: failed to find buffer");
1660 idm_pdu_rx_protocol_error(ic
, pdu
);
1665 * DataSN values should be sequential and should not have any gaps or
1666 * repetitions. Check the DataSN with the one stored in the task.
1668 if (datasn
== idt
->idt_exp_datasn
) {
1669 idt
->idt_exp_datasn
++; /* keep track of DataSN received */
1671 IDM_CONN_LOG(CE_WARN
, "idm_so_rx_datain: datasn out of order");
1673 idm_pdu_rx_protocol_error(ic
, pdu
);
1678 * PDUs in a sequence should be in continuously increasing
1681 if (offset
!= idb
->idb_exp_offset
) {
1682 IDM_CONN_LOG(CE_WARN
, "idm_so_rx_datain: unexpected offset");
1684 idm_pdu_rx_protocol_error(ic
, pdu
);
1687 /* Expected next relative buffer offset */
1688 idb
->idb_exp_offset
+= n2h24(bhs
->dlength
);
1689 idt
->idt_rx_bytes
+= n2h24(bhs
->dlength
);
1694 * For now call scsi_rsp which will process the data rsp
1695 * Revisit, need to provide an explicit client entry point for
1696 * phase collapse completions.
1698 if ((IDM_PDU_OPCODE(pdu
) == ISCSI_OP_SCSI_DATA_RSP
) &&
1699 (idrhp
->flags
& ISCSI_FLAG_DATA_STATUS
)) {
1700 (*ic
->ic_conn_ops
.icb_rx_scsi_rsp
)(ic
, pdu
);
1703 idm_pdu_complete(pdu
, IDM_STATUS_SUCCESS
);
1707 * The idm_so_rx_dataout() function is used by the iSCSI target to read
1708 * data from the Data-Out PDU sent by the iSCSI initiator.
1710 * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1711 * task to get the buffers associated with the PDU. A PDU might span buffers.
1712 * The data is then read into the respective buffer.
1715 idm_so_rx_dataout(idm_conn_t
*ic
, idm_pdu_t
*pdu
)
1718 iscsi_data_hdr_t
*bhs
;
1724 ASSERT(pdu
!= NULL
);
1725 ASSERT(IDM_PDU_OPCODE(pdu
) == ISCSI_OP_SCSI_DATA
);
1727 bhs
= (iscsi_data_hdr_t
*)pdu
->isp_hdr
;
1728 offset
= ntohl(bhs
->offset
);
1731 * Look up the task corresponding to the initiator task tag
1732 * to get the buffers affiliated with the task.
1734 idt
= idm_task_find(ic
, bhs
->itt
, bhs
->ttt
);
1736 IDM_CONN_LOG(CE_WARN
,
1737 "idm_so_rx_dataout: failed to find task");
1738 idm_pdu_rx_protocol_error(ic
, pdu
);
1742 idb
= pdu
->isp_sorx_buf
;
1744 IDM_CONN_LOG(CE_WARN
,
1745 "idm_so_rx_dataout: failed to find buffer");
1747 idm_pdu_rx_protocol_error(ic
, pdu
);
1751 /* Keep track of data transferred - check data offsets */
1752 if (offset
!= idb
->idb_exp_offset
) {
1753 IDM_CONN_LOG(CE_NOTE
, "idm_so_rx_dataout: offset out of seq: "
1754 "%ld, %d", offset
, idb
->idb_exp_offset
);
1756 idm_pdu_rx_protocol_error(ic
, pdu
);
1759 /* Expected next relative offset */
1760 idb
->idb_exp_offset
+= ntoh24(bhs
->dlength
);
1761 idt
->idt_rx_bytes
+= n2h24(bhs
->dlength
);
1764 * Call the buffer callback when the transfer is complete
1766 * The connection state machine should only abort tasks after
1767 * shutting down the connection so we are assured that there
1768 * won't be a simultaneous attempt to abort this task at the
1769 * same time as we are processing this PDU (due to a connection
1772 if (bhs
->flags
& ISCSI_FLAG_FINAL
) {
1774 * We have gotten the last data-message for the current
1775 * transfer. idb_xfer_len represents the data that the
1776 * command intended to transfer, it does not represent the
1777 * actual number of bytes transferred. If we have not
1778 * transferred the expected number of bytes something is
1781 * We have two options, when there is a mismatch, we can
1782 * regard the transfer as invalid -- or we can modify our
1783 * notion of "xfer_len." In order to be as stringent as
1784 * possible, here we regard this transfer as in error; and
1787 if (idb
->idb_buflen
== idb
->idb_xfer_len
&&
1789 (idb
->idb_exp_offset
- idb
->idb_bufoffset
)) {
1790 printf("idm_so_rx_dataout: incomplete transfer, "
1792 IDM_CONN_LOG(CE_NOTE
,
1793 "idm_so_rx_dataout: incomplete transfer: %ld, %d",
1794 offset
, (int)(idb
->idb_exp_offset
- offset
));
1796 idm_pdu_rx_protocol_error(ic
, pdu
);
1800 * We only want to call idm_buf_rx_from_ini_done once
1801 * per transfer. It's possible that this task has
1802 * already been aborted in which case
1803 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1804 * for each buffer with idb_in_transport==B_TRUE. To
1805 * close this window and ensure that this doesn't happen,
1806 * we'll clear idb->idb_in_transport now while holding
1807 * the task mutex. This is only really an issue for
1808 * SCSI task abort -- if tasks were being aborted because
1809 * of a connection state change the state machine would
1810 * have already stopped the receive thread.
1812 mutex_enter(&idt
->idt_mutex
);
1815 * Release the task hold here (obtained in idm_task_find)
1816 * because the task may complete synchronously during
1817 * idm_buf_rx_from_ini_done. Since we still have an active
1818 * buffer we know there is at least one additional hold on idt.
1823 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1825 DTRACE_ISCSI_8(xfer__done
, idm_conn_t
*, idt
->idt_ic
,
1826 uintptr_t, idb
->idb_buf
, uint32_t, idb
->idb_bufoffset
,
1827 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1828 uint32_t, idb
->idb_xfer_len
,
1829 int, XFER_BUF_RX_FROM_INI
);
1830 idm_buf_rx_from_ini_done(idt
, idb
, IDM_STATUS_SUCCESS
);
1831 idm_pdu_complete(pdu
, IDM_STATUS_SUCCESS
);
1836 idm_pdu_complete(pdu
, IDM_STATUS_SUCCESS
);
1840 * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1841 * the R2T PDU sent by the iSCSI target indicating that it is ready to
1842 * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1843 * and looks up the task in the task tree using the itt to get the output
1844 * buffers associated the task. The R2T PDU contains the offset of the
1845 * requested data and the data length. This function then constructs a
1846 * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1847 * PDU is associated with the R2T by the Target Transfer Tag (ttt).
1851 idm_so_rx_rtt(idm_conn_t
*ic
, idm_pdu_t
*pdu
)
1855 iscsi_rtt_hdr_t
*rtt_hdr
;
1856 uint32_t data_offset
;
1857 uint32_t data_length
;
1860 ASSERT(pdu
!= NULL
);
1862 rtt_hdr
= (iscsi_rtt_hdr_t
*)pdu
->isp_hdr
;
1863 data_offset
= ntohl(rtt_hdr
->data_offset
);
1864 data_length
= ntohl(rtt_hdr
->data_length
);
1865 idt
= idm_task_find(ic
, rtt_hdr
->itt
, rtt_hdr
->ttt
);
1868 IDM_CONN_LOG(CE_WARN
, "idm_so_rx_rtt: could not find task");
1869 idm_pdu_rx_protocol_error(ic
, pdu
);
1873 /* Find the buffer bound to the task by the iSCSI initiator */
1874 mutex_enter(&idt
->idt_mutex
);
1875 idb
= idm_buf_find(&idt
->idt_outbufv
, data_offset
);
1877 mutex_exit(&idt
->idt_mutex
);
1879 IDM_CONN_LOG(CE_WARN
, "idm_so_rx_rtt: could not find buffer");
1880 idm_pdu_rx_protocol_error(ic
, pdu
);
1884 /* return buffer contains this data */
1885 if (data_offset
+ data_length
> idb
->idb_buflen
) {
1887 mutex_exit(&idt
->idt_mutex
);
1889 IDM_CONN_LOG(CE_WARN
, "idm_so_rx_rtt: read from outside "
1891 idm_pdu_rx_protocol_error(ic
, pdu
);
1895 idt
->idt_r2t_ttt
= rtt_hdr
->ttt
;
1896 idt
->idt_exp_datasn
= 0;
1898 idm_so_send_rtt_data(ic
, idt
, idb
, data_offset
,
1899 ntohl(rtt_hdr
->data_length
));
1901 * the idt_mutex is released in idm_so_send_rtt_data
1904 idm_pdu_complete(pdu
, IDM_STATUS_SUCCESS
);
1910 idm_sorecvdata(idm_conn_t
*ic
, idm_pdu_t
*pdu
)
1912 uint8_t pad
[ISCSI_PAD_WORD_LEN
];
1914 uint32_t data_digest_crc
;
1915 uint32_t crc_calculated
;
1917 idm_so_conn_t
*so_conn
;
1919 so_conn
= ic
->ic_transport_private
;
1921 pad_len
= ((ISCSI_PAD_WORD_LEN
-
1922 (pdu
->isp_datalen
& (ISCSI_PAD_WORD_LEN
- 1))) &
1923 (ISCSI_PAD_WORD_LEN
- 1));
1925 ASSERT(pdu
->isp_iovlen
< (PDU_MAX_IOVLEN
- 2)); /* pad + data digest */
1927 total_len
= pdu
->isp_datalen
;
1930 pdu
->isp_iov
[pdu
->isp_iovlen
].iov_base
= (char *)&pad
;
1931 pdu
->isp_iov
[pdu
->isp_iovlen
].iov_len
= pad_len
;
1932 total_len
+= pad_len
;
1936 /* setup data digest */
1937 if ((ic
->ic_conn_flags
& IDM_CONN_DATA_DIGEST
) != 0) {
1938 pdu
->isp_iov
[pdu
->isp_iovlen
].iov_base
=
1939 (char *)&data_digest_crc
;
1940 pdu
->isp_iov
[pdu
->isp_iovlen
].iov_len
=
1941 sizeof (data_digest_crc
);
1942 total_len
+= sizeof (data_digest_crc
);
1946 pdu
->isp_data
= (uint8_t *)(uintptr_t)pdu
->isp_iov
[0].iov_base
;
1948 if (idm_iov_sorecv(so_conn
->ic_so
, &pdu
->isp_iov
[0],
1949 pdu
->isp_iovlen
, total_len
) != 0) {
1950 return (IDM_STATUS_IO
);
1953 if ((ic
->ic_conn_flags
& IDM_CONN_DATA_DIGEST
) != 0) {
1954 crc_calculated
= idm_crc32c(pdu
->isp_data
,
1957 crc_calculated
= idm_crc32c_continued((char *)&pad
,
1958 pad_len
, crc_calculated
);
1960 if (crc_calculated
!= data_digest_crc
) {
1961 IDM_CONN_LOG(CE_WARN
,
1963 "CRC error: actual 0x%x, calc 0x%x",
1964 data_digest_crc
, crc_calculated
);
1966 /* Invalid Data Digest */
1967 return (IDM_STATUS_DATA_DIGEST
);
1971 return (IDM_STATUS_SUCCESS
);
1975 * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1976 * Data-type PDU header must be read into the idm_pdu_t structure prior to
1977 * calling this function.
1980 idm_sorecv_scsidata(idm_conn_t
*ic
, idm_pdu_t
*pdu
)
1982 iscsi_data_hdr_t
*bhs
;
1988 uint32_t xfer_bytes
;
1989 idm_status_t status
;
1992 ASSERT(pdu
!= NULL
);
1994 bhs
= (iscsi_data_hdr_t
*)pdu
->isp_hdr
;
1996 offset
= ntohl(bhs
->offset
);
1997 opcode
= IDM_PDU_OPCODE(pdu
);
1998 dlength
= n2h24(bhs
->dlength
);
2000 ASSERT((opcode
== ISCSI_OP_SCSI_DATA_RSP
) ||
2001 (opcode
== ISCSI_OP_SCSI_DATA
));
2004 * Successful lookup implicitly gets a "hold" on the task. This
2005 * hold must be released before leaving this function. At one
2006 * point we were caching this task context and retaining the hold
2007 * but it turned out to be very difficult to release the hold properly.
2008 * The task can be aborted and the connection shutdown between this
2009 * call and the subsequent expected call to idm_so_rx_datain/
2010 * idm_so_rx_dataout (in which case those functions are not called).
2011 * Releasing the hold in the PDU callback doesn't work well either
2012 * because the whole task may be completed by then at which point
2013 * it is too late to release the hold -- for better or worse this
2014 * code doesn't wait on the refcnts during normal operation.
2015 * idm_task_find() is very fast and it is not a huge burden if we
2016 * have to do it twice.
2018 task
= idm_task_find(ic
, bhs
->itt
, bhs
->ttt
);
2020 IDM_CONN_LOG(CE_WARN
,
2021 "idm_sorecv_scsidata: could not find task");
2022 return (IDM_STATUS_FAIL
);
2025 mutex_enter(&task
->idt_mutex
);
2026 buflst
= (opcode
== ISCSI_OP_SCSI_DATA_RSP
) ?
2027 &task
->idt_inbufv
: &task
->idt_outbufv
;
2028 pdu
->isp_sorx_buf
= idm_buf_find(buflst
, offset
);
2029 mutex_exit(&task
->idt_mutex
);
2031 if (pdu
->isp_sorx_buf
== NULL
) {
2032 idm_task_rele(task
);
2033 IDM_CONN_LOG(CE_WARN
, "idm_sorecv_scsidata: could not find "
2034 "buffer for offset %x opcode=%x",
2036 return (IDM_STATUS_FAIL
);
2039 xfer_bytes
= idm_fill_iov(pdu
, pdu
->isp_sorx_buf
, offset
, dlength
);
2040 ASSERT(xfer_bytes
!= 0);
2041 if (xfer_bytes
!= dlength
) {
2042 idm_task_rele(task
);
2044 * Buffer overflow, connection error. The PDU data is still
2045 * sitting in the socket so we can't use the connection
2046 * again until that data is drained.
2048 return (IDM_STATUS_FAIL
);
2051 status
= idm_sorecvdata(ic
, pdu
);
2053 idm_task_rele(task
);
2059 idm_fill_iov(idm_pdu_t
*pdu
, idm_buf_t
*idb
, uint32_t ro
, uint32_t dlength
)
2061 uint32_t buf_ro
= ro
- idb
->idb_bufoffset
;
2062 uint32_t xfer_len
= min(dlength
, idb
->idb_buflen
- buf_ro
);
2064 ASSERT(ro
>= idb
->idb_bufoffset
);
2066 pdu
->isp_iov
[pdu
->isp_iovlen
].iov_base
=
2067 (caddr_t
)idb
->idb_buf
+ buf_ro
;
2068 pdu
->isp_iov
[pdu
->isp_iovlen
].iov_len
= xfer_len
;
2075 idm_sorecv_nonscsidata(idm_conn_t
*ic
, idm_pdu_t
*pdu
)
2077 pdu
->isp_data
= kmem_alloc(pdu
->isp_datalen
, KM_SLEEP
);
2078 ASSERT(pdu
->isp_data
!= NULL
);
2080 pdu
->isp_databuflen
= pdu
->isp_datalen
;
2081 pdu
->isp_iov
[0].iov_base
= (caddr_t
)pdu
->isp_data
;
2082 pdu
->isp_iov
[0].iov_len
= pdu
->isp_datalen
;
2083 pdu
->isp_iovlen
= 1;
2085 * Since we are associating a new data buffer with this received
2086 * PDU we need to set a specific callback to free the data
2087 * after the PDU is processed.
2089 pdu
->isp_flags
|= IDM_PDU_ADDL_DATA
;
2090 pdu
->isp_callback
= idm_sorx_addl_pdu_cb
;
2092 return (idm_sorecvdata(ic
, pdu
));
2096 idm_sorx_thread(void *arg
)
2098 boolean_t conn_failure
= B_FALSE
;
2099 idm_conn_t
*ic
= (idm_conn_t
*)arg
;
2100 idm_so_conn_t
*so_conn
;
2106 mutex_enter(&ic
->ic_mutex
);
2108 so_conn
= ic
->ic_transport_private
;
2109 so_conn
->ic_rx_thread_running
= B_TRUE
;
2110 so_conn
->ic_rx_thread_did
= so_conn
->ic_rx_thread
->t_did
;
2111 cv_signal(&ic
->ic_cv
);
2113 while (so_conn
->ic_rx_thread_running
) {
2114 mutex_exit(&ic
->ic_mutex
);
2117 * Get PDU with default header size (large enough for
2118 * BHS plus any anticipated AHS). PDU from
2119 * the cache will have all values set correctly
2120 * for sockets RX including callback.
2122 pdu
= kmem_cache_alloc(idm
.idm_sorx_pdu_cache
, KM_SLEEP
);
2125 pdu
->isp_transport_hdrlen
= 0;
2127 if ((rc
= idm_sorecvhdr(ic
, pdu
)) != 0) {
2129 * Call idm_pdu_complete so that we call the callback
2130 * and ensure any memory allocated in idm_sorecvhdr
2133 idm_pdu_complete(pdu
, IDM_STATUS_FAIL
);
2136 * If ic_rx_thread_running is still set then
2137 * this is some kind of connection problem
2138 * on the socket. In this case we want to
2139 * generate an event. Otherwise some other
2140 * thread closed the socket due to another
2141 * issue in which case we don't need to
2142 * generate an event.
2144 mutex_enter(&ic
->ic_mutex
);
2145 if (so_conn
->ic_rx_thread_running
) {
2146 conn_failure
= B_TRUE
;
2147 so_conn
->ic_rx_thread_running
= B_FALSE
;
2154 * Header has been read and validated. Now we need
2155 * to read the PDU data payload (if present). SCSI data
2156 * need to be transferred from the socket directly into
2157 * the associated transfer buffer for the SCSI task.
2159 if (pdu
->isp_datalen
!= 0) {
2160 if ((IDM_PDU_OPCODE(pdu
) == ISCSI_OP_SCSI_DATA
) ||
2161 (IDM_PDU_OPCODE(pdu
) == ISCSI_OP_SCSI_DATA_RSP
)) {
2162 rc
= idm_sorecv_scsidata(ic
, pdu
);
2164 * All SCSI errors are fatal to the
2165 * connection right now since we have no
2166 * place to put the data. What we need
2167 * is some kind of sink to dispose of unwanted
2168 * SCSI data. For example an invalid task tag
2169 * should not kill the connection (although
2170 * we may want to drop the connection).
2174 * Not data PDUs so allocate a buffer for the
2175 * data segment and read the remaining data.
2177 rc
= idm_sorecv_nonscsidata(ic
, pdu
);
2181 * Call idm_pdu_complete so that we call the
2182 * callback and ensure any memory allocated
2183 * in idm_sorecvhdr gets freed up.
2185 idm_pdu_complete(pdu
, IDM_STATUS_FAIL
);
2188 * If ic_rx_thread_running is still set then
2189 * this is some kind of connection problem
2190 * on the socket. In this case we want to
2191 * generate an event. Otherwise some other
2192 * thread closed the socket due to another
2193 * issue in which case we don't need to
2194 * generate an event.
2196 mutex_enter(&ic
->ic_mutex
);
2197 if (so_conn
->ic_rx_thread_running
) {
2198 conn_failure
= B_TRUE
;
2199 so_conn
->ic_rx_thread_running
= B_FALSE
;
2208 idm_pdu_rx(ic
, pdu
);
2210 mutex_enter(&ic
->ic_mutex
);
2213 mutex_exit(&ic
->ic_mutex
);
2216 * If we dropped out of the RX processing loop because of
2217 * a socket problem or other connection failure (including
2218 * digest errors) then we need to generate a state machine
2219 * event to shut the connection down.
2220 * If the state machine is already in, for example, INIT_ERROR, this
2221 * event will get dropped, and the TX thread will never be notified
2222 * to shut down. To be safe, we'll just notify it here.
2225 if (so_conn
->ic_tx_thread_running
) {
2226 so_conn
->ic_tx_thread_running
= B_FALSE
;
2227 mutex_enter(&so_conn
->ic_tx_mutex
);
2228 cv_signal(&so_conn
->ic_tx_cv
);
2229 mutex_exit(&so_conn
->ic_tx_mutex
);
2232 idm_conn_event(ic
, CE_TRANSPORT_FAIL
, rc
);
2243 * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2244 * point. By definition, it is supposed to be fast. So, simply queue
2245 * the entry and return. The real work is done by idm_i_so_tx() via
2246 * idm_sotx_thread().
2250 idm_so_tx(idm_conn_t
*ic
, idm_pdu_t
*pdu
)
2252 idm_so_conn_t
*so_conn
= ic
->ic_transport_private
;
2254 ASSERT(pdu
->isp_ic
== ic
);
2255 mutex_enter(&so_conn
->ic_tx_mutex
);
2257 if (!so_conn
->ic_tx_thread_running
) {
2258 mutex_exit(&so_conn
->ic_tx_mutex
);
2259 idm_pdu_complete(pdu
, IDM_STATUS_ABORTED
);
2263 list_insert_tail(&so_conn
->ic_tx_list
, (void *)pdu
);
2264 cv_signal(&so_conn
->ic_tx_cv
);
2265 mutex_exit(&so_conn
->ic_tx_mutex
);
2269 idm_i_so_tx(idm_pdu_t
*pdu
)
2271 idm_conn_t
*ic
= pdu
->isp_ic
;
2272 idm_status_t status
= IDM_STATUS_SUCCESS
;
2273 uint8_t pad
[ISCSI_PAD_WORD_LEN
];
2275 uint32_t hdr_digest_crc
;
2276 uint32_t data_digest_crc
= 0;
2279 struct iovec iov
[6];
2280 idm_so_conn_t
*so_conn
;
2282 so_conn
= ic
->ic_transport_private
;
2285 iov
[iovlen
].iov_base
= (caddr_t
)pdu
->isp_hdr
;
2286 iov
[iovlen
].iov_len
= pdu
->isp_hdrlen
;
2287 total_len
+= iov
[iovlen
].iov_len
;
2290 /* Setup header digest */
2291 if (((pdu
->isp_flags
& IDM_PDU_LOGIN_TX
) == 0) &&
2292 (ic
->ic_conn_flags
& IDM_CONN_HEADER_DIGEST
)) {
2293 hdr_digest_crc
= idm_crc32c(pdu
->isp_hdr
, pdu
->isp_hdrlen
);
2295 iov
[iovlen
].iov_base
= (caddr_t
)&hdr_digest_crc
;
2296 iov
[iovlen
].iov_len
= sizeof (hdr_digest_crc
);
2297 total_len
+= iov
[iovlen
].iov_len
;
2301 /* Setup the data */
2302 if (pdu
->isp_datalen
) {
2305 iscsi_data_hdr_t
*ihp
;
2306 ihp
= (iscsi_data_hdr_t
*)pdu
->isp_hdr
;
2307 /* Write of immediate data */
2309 (IDM_PDU_OPCODE(pdu
) == ISCSI_OP_SCSI_CMD
||
2310 IDM_PDU_OPCODE(pdu
) == ISCSI_OP_SCSI_DATA
)) {
2311 idt
= idm_task_find(ic
, ihp
->itt
, ihp
->ttt
);
2313 mutex_enter(&idt
->idt_mutex
);
2314 idb
= idm_buf_find(&idt
->idt_outbufv
, 0);
2315 mutex_exit(&idt
->idt_mutex
);
2317 * If the initiator call to idm_buf_alloc
2318 * failed then we can get to this point
2319 * without a bound buffer. The associated
2320 * connection failure will clean things up
2321 * later. It would be nice to come up with
2322 * a cleaner way to handle this. In
2323 * particular it seems absurd to look up
2324 * the task and the buffer just to update
2328 idb
->idb_xfer_len
+= pdu
->isp_datalen
;
2333 iov
[iovlen
].iov_base
= (caddr_t
)pdu
->isp_data
;
2334 iov
[iovlen
].iov_len
= pdu
->isp_datalen
;
2335 total_len
+= iov
[iovlen
].iov_len
;
2339 /* Setup the data pad if necessary */
2340 pad_len
= ((ISCSI_PAD_WORD_LEN
-
2341 (pdu
->isp_datalen
& (ISCSI_PAD_WORD_LEN
- 1))) &
2342 (ISCSI_PAD_WORD_LEN
- 1));
2345 bzero(pad
, sizeof (pad
));
2346 iov
[iovlen
].iov_base
= (void *)&pad
;
2347 iov
[iovlen
].iov_len
= pad_len
;
2348 total_len
+= iov
[iovlen
].iov_len
;
2353 * Setup the data digest if enabled. Data-digest is not sent
2354 * for login-phase PDUs.
2356 if ((ic
->ic_conn_flags
& IDM_CONN_DATA_DIGEST
) &&
2357 ((pdu
->isp_flags
& IDM_PDU_LOGIN_TX
) == 0) &&
2358 (pdu
->isp_datalen
|| pad_len
)) {
2360 * RFC3720/10.2.3: A zero-length Data Segment also
2361 * implies a zero-length data digest.
2363 if (pdu
->isp_datalen
) {
2364 data_digest_crc
= idm_crc32c(pdu
->isp_data
,
2368 data_digest_crc
= idm_crc32c_continued(&pad
,
2369 pad_len
, data_digest_crc
);
2372 iov
[iovlen
].iov_base
= (caddr_t
)&data_digest_crc
;
2373 iov
[iovlen
].iov_len
= sizeof (data_digest_crc
);
2374 total_len
+= iov
[iovlen
].iov_len
;
2378 /* Transmit the PDU */
2379 if (idm_iov_sosend(so_conn
->ic_so
, &iov
[0], iovlen
,
2381 /* Set error status */
2382 IDM_CONN_LOG(CE_WARN
,
2383 "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2384 "data: %p", (void *) so_conn
->ic_so
, (void *) ic
,
2385 (void *) pdu
->isp_data
);
2386 status
= IDM_STATUS_IO
;
2390 * Success does not mean that the PDU actually reached the
2391 * remote node since it could get dropped along the way.
2393 idm_pdu_complete(pdu
, status
);
2399 * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2400 * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2401 * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2402 * A target can invoke this function multiple times for a single read command
2403 * (identified by the same ITT) to split the input into several sequences.
2405 * DataSN starts with 0 for the first data PDU of an input command and advances
2406 * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2407 * which is set to 1 for the last data PDU of a sequence.
2408 * If the initiator supports phase collapse, the status bit must be set along
2409 * with the F bit to indicate that the status is shipped together with the last
2412 * The data PDUs within a sequence will be sent in order with the buffer offset
2413 * in increasing order. i.e. initiator and target must have negotiated the
2414 * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2416 * Caller holds idt->idt_mutex
2419 idm_so_buf_tx_to_ini(idm_task_t
*idt
, idm_buf_t
*idb
)
2421 idm_so_conn_t
*so_conn
= idb
->idb_ic
->ic_transport_private
;
2424 ASSERT(mutex_owned(&idt
->idt_mutex
));
2427 * Put the idm_buf_t on the tx queue. It will be transmitted by
2430 mutex_enter(&so_conn
->ic_tx_mutex
);
2432 DTRACE_ISCSI_8(xfer__start
, idm_conn_t
*, idt
->idt_ic
,
2433 uintptr_t, idb
->idb_buf
, uint32_t, idb
->idb_bufoffset
,
2434 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2435 uint32_t, idb
->idb_xfer_len
, int, XFER_BUF_TX_TO_INI
);
2437 if (!so_conn
->ic_tx_thread_running
) {
2438 mutex_exit(&so_conn
->ic_tx_mutex
);
2440 * Don't release idt->idt_mutex since we're supposed to hold
2441 * in when calling idm_buf_tx_to_ini_done
2443 DTRACE_ISCSI_8(xfer__done
, idm_conn_t
*, idt
->idt_ic
,
2444 uintptr_t, idb
->idb_buf
, uint32_t, idb
->idb_bufoffset
,
2445 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2446 uint32_t, idb
->idb_xfer_len
,
2447 int, XFER_BUF_TX_TO_INI
);
2448 idm_buf_tx_to_ini_done(idt
, idb
, IDM_STATUS_ABORTED
);
2449 return (IDM_STATUS_FAIL
);
2453 * Build a template for the data PDU headers we will use so that
2454 * the SN values will stay consistent with other PDU's we are
2455 * transmitting like R2T and SCSI status.
2457 bzero(&idb
->idb_data_hdr_tmpl
, sizeof (iscsi_hdr_t
));
2458 tmppdu
.isp_hdr
= &idb
->idb_data_hdr_tmpl
;
2459 (*idt
->idt_ic
->ic_conn_ops
.icb_build_hdr
)(idt
, &tmppdu
,
2460 ISCSI_OP_SCSI_DATA_RSP
);
2461 idb
->idb_tx_thread
= B_TRUE
;
2462 list_insert_tail(&so_conn
->ic_tx_list
, (void *)idb
);
2463 cv_signal(&so_conn
->ic_tx_cv
);
2464 mutex_exit(&so_conn
->ic_tx_mutex
);
2465 mutex_exit(&idt
->idt_mutex
);
2468 * Returning success here indicates the transfer was successfully
2469 * dispatched -- it does not mean that the transfer completed
2472 return (IDM_STATUS_SUCCESS
);
2476 * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2477 * data blocks it is ready to receive from the initiator in response to a WRITE
2478 * SCSI command. The target iSCSI layer passes the information about the desired
2479 * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2480 * offset and datalen are passed via the 'idb' argument.
2482 * Scope for Prototype build:
2483 * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2484 * negotiated the "InitialR2T" to "Yes".
2486 * Caller holds idt->idt_mutex
2489 idm_so_buf_rx_from_ini(idm_task_t
*idt
, idm_buf_t
*idb
)
2492 iscsi_rtt_hdr_t
*rtt
;
2494 ASSERT(mutex_owned(&idt
->idt_mutex
));
2496 DTRACE_ISCSI_8(xfer__start
, idm_conn_t
*, idt
->idt_ic
,
2497 uintptr_t, idb
->idb_buf
, uint32_t, idb
->idb_bufoffset
,
2498 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2499 uint32_t, idb
->idb_xfer_len
, int, XFER_BUF_RX_FROM_INI
);
2501 pdu
= kmem_cache_alloc(idm
.idm_sotx_pdu_cache
, KM_SLEEP
);
2502 pdu
->isp_ic
= idt
->idt_ic
;
2503 pdu
->isp_flags
= IDM_PDU_SET_STATSN
;
2504 bzero(pdu
->isp_hdr
, sizeof (iscsi_rtt_hdr_t
));
2506 /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2507 (*idt
->idt_ic
->ic_conn_ops
.icb_build_hdr
)(idt
, pdu
, ISCSI_OP_RTT_RSP
);
2509 /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2510 rtt
= (iscsi_rtt_hdr_t
*)(pdu
->isp_hdr
);
2512 rtt
->opcode
= ISCSI_OP_RTT_RSP
;
2513 rtt
->flags
= ISCSI_FLAG_FINAL
;
2514 rtt
->data_offset
= htonl(idb
->idb_bufoffset
);
2515 rtt
->data_length
= htonl(idb
->idb_xfer_len
);
2516 rtt
->rttsn
= htonl(idt
->idt_exp_rttsn
++);
2518 /* Keep track of buffer offsets */
2519 idb
->idb_exp_offset
= idb
->idb_bufoffset
;
2520 mutex_exit(&idt
->idt_mutex
);
2527 return (IDM_STATUS_SUCCESS
);
2531 idm_so_buf_alloc(idm_buf_t
*idb
, uint64_t buflen
)
2533 if ((buflen
> IDM_SO_BUF_CACHE_LB
) && (buflen
<= IDM_SO_BUF_CACHE_UB
)) {
2534 idb
->idb_buf
= kmem_cache_alloc(idm
.idm_so_128k_buf_cache
,
2536 idb
->idb_buf_private
= idm
.idm_so_128k_buf_cache
;
2538 idb
->idb_buf
= kmem_alloc(buflen
, KM_NOSLEEP
);
2539 idb
->idb_buf_private
= NULL
;
2542 if (idb
->idb_buf
== NULL
) {
2543 IDM_CONN_LOG(CE_NOTE
,
2544 "idm_so_buf_alloc: failed buffer allocation");
2545 return (IDM_STATUS_FAIL
);
2548 return (IDM_STATUS_SUCCESS
);
2553 idm_so_buf_setup(idm_buf_t
*idb
)
2555 /* Ensure bufalloc'd flag is unset */
2556 idb
->idb_bufalloc
= B_FALSE
;
2558 return (IDM_STATUS_SUCCESS
);
2563 idm_so_buf_teardown(idm_buf_t
*idb
)
2565 /* nothing to do here */
2569 idm_so_buf_free(idm_buf_t
*idb
)
2571 if (idb
->idb_buf_private
== NULL
) {
2572 kmem_free(idb
->idb_buf
, idb
->idb_buflen
);
2574 kmem_cache_free(idb
->idb_buf_private
, idb
->idb_buf
);
2579 idm_so_send_rtt_data(idm_conn_t
*ic
, idm_task_t
*idt
, idm_buf_t
*idb
,
2580 uint32_t offset
, uint32_t length
)
2582 idm_so_conn_t
*so_conn
= ic
->ic_transport_private
;
2586 ASSERT(mutex_owned(&idt
->idt_mutex
));
2589 * Allocate a buffer to represent the RTT transfer. We could further
2590 * optimize this by allocating the buffers internally from an rtt
2591 * specific buffer cache since this is socket-specific code but for
2592 * now we will keep it simple.
2594 rtt_buf
= idm_buf_alloc(ic
, (uint8_t *)idb
->idb_buf
+ offset
, length
);
2595 if (rtt_buf
== NULL
) {
2597 * If we're in FFP then the failure was likely a resource
2598 * allocation issue and we should close the connection by
2599 * sending a CE_TRANSPORT_FAIL event.
2601 * If we're not in FFP then idm_buf_alloc will always
2602 * fail and the state is transitioning to "complete" anyway
2603 * so we won't bother to send an event.
2605 mutex_enter(&ic
->ic_state_mutex
);
2607 idm_conn_event_locked(ic
, CE_TRANSPORT_FAIL
,
2608 (uintptr_t)NULL
, CT_NONE
);
2609 mutex_exit(&ic
->ic_state_mutex
);
2610 mutex_exit(&idt
->idt_mutex
);
2614 rtt_buf
->idb_buf_cb
= NULL
;
2615 rtt_buf
->idb_cb_arg
= NULL
;
2616 rtt_buf
->idb_bufoffset
= offset
;
2617 rtt_buf
->idb_xfer_len
= length
;
2618 rtt_buf
->idb_ic
= idt
->idt_ic
;
2619 rtt_buf
->idb_task_binding
= idt
;
2622 * The new buffer (if any) represents an additional
2623 * reference on the task
2626 mutex_exit(&idt
->idt_mutex
);
2629 * Put the idm_buf_t on the tx queue. It will be transmitted by
2632 mutex_enter(&so_conn
->ic_tx_mutex
);
2634 if (!so_conn
->ic_tx_thread_running
) {
2635 idm_buf_free(rtt_buf
);
2636 mutex_exit(&so_conn
->ic_tx_mutex
);
2642 * Build a template for the data PDU headers we will use so that
2643 * the SN values will stay consistent with other PDU's we are
2644 * transmitting like R2T and SCSI status.
2646 bzero(&rtt_buf
->idb_data_hdr_tmpl
, sizeof (iscsi_hdr_t
));
2647 tmppdu
.isp_hdr
= &rtt_buf
->idb_data_hdr_tmpl
;
2648 (*idt
->idt_ic
->ic_conn_ops
.icb_build_hdr
)(idt
, &tmppdu
,
2649 ISCSI_OP_SCSI_DATA
);
2650 rtt_buf
->idb_tx_thread
= B_TRUE
;
2651 rtt_buf
->idb_in_transport
= B_TRUE
;
2652 list_insert_tail(&so_conn
->ic_tx_list
, (void *)rtt_buf
);
2653 cv_signal(&so_conn
->ic_tx_cv
);
2654 mutex_exit(&so_conn
->ic_tx_mutex
);
2658 idm_so_send_rtt_data_done(idm_task_t
*idt
, idm_buf_t
*idb
)
2661 * Don't worry about status -- we assume any error handling
2662 * is performed by the caller (idm_sotx_thread).
2664 idb
->idb_in_transport
= B_FALSE
;
2670 idm_so_send_buf_region(idm_task_t
*idt
, idm_buf_t
*idb
,
2671 uint32_t buf_region_offset
, uint32_t buf_region_length
)
2674 uint32_t max_dataseglen
;
2675 size_t remainder
, chunk
;
2676 uint32_t data_offset
= buf_region_offset
;
2677 iscsi_data_hdr_t
*bhs
;
2679 idm_status_t tx_status
;
2681 ASSERT(mutex_owned(&idt
->idt_mutex
));
2685 max_dataseglen
= ic
->ic_conn_params
.max_xmit_dataseglen
;
2686 remainder
= buf_region_length
;
2689 if (idt
->idt_state
!= TASK_ACTIVE
) {
2690 ASSERT((idt
->idt_state
!= TASK_IDLE
) &&
2691 (idt
->idt_state
!= TASK_COMPLETE
));
2692 return (IDM_STATUS_ABORTED
);
2695 /* check to see if we need to chunk the data */
2696 if (remainder
> max_dataseglen
) {
2697 chunk
= max_dataseglen
;
2702 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2703 pdu
= kmem_cache_alloc(idm
.idm_sotx_pdu_cache
, KM_SLEEP
);
2705 pdu
->isp_flags
= 0; /* initialize isp_flags */
2708 * We've already built a build a header template
2709 * to use during the transfer. Use this template so that
2710 * the SN values stay consistent with any unrelated PDU's
2711 * being transmitted.
2713 bcopy(&idb
->idb_data_hdr_tmpl
, pdu
->isp_hdr
,
2714 sizeof (iscsi_hdr_t
));
2717 * Set DataSN, data offset, and flags in BHS
2718 * For the prototype build, A = 0, S = 0, U = 0
2720 bhs
= (iscsi_data_hdr_t
*)(pdu
->isp_hdr
);
2722 bhs
->datasn
= htonl(idt
->idt_exp_datasn
++);
2724 hton24(bhs
->dlength
, chunk
);
2725 bhs
->offset
= htonl(idb
->idb_bufoffset
+ data_offset
);
2728 pdu
->isp_data
= (uint8_t *)idb
->idb_buf
+ data_offset
;
2729 pdu
->isp_datalen
= (uint_t
)chunk
;
2731 if (chunk
== remainder
) {
2732 bhs
->flags
= ISCSI_FLAG_FINAL
; /* F bit set to 1 */
2733 /* Piggyback the status with the last data PDU */
2734 if (idt
->idt_flags
& IDM_TASK_PHASECOLLAPSE_REQ
) {
2735 pdu
->isp_flags
|= IDM_PDU_SET_STATSN
|
2736 IDM_PDU_ADVANCE_STATSN
;
2737 (*idt
->idt_ic
->ic_conn_ops
.icb_update_statsn
)
2740 IDM_TASK_PHASECOLLAPSE_SUCCESS
;
2746 data_offset
+= chunk
;
2748 /* Instrument the data-send DTrace probe. */
2749 if (IDM_PDU_OPCODE(pdu
) == ISCSI_OP_SCSI_DATA_RSP
) {
2750 DTRACE_ISCSI_2(data__send
,
2751 idm_conn_t
*, idt
->idt_ic
,
2752 iscsi_data_rsp_hdr_t
*,
2753 (iscsi_data_rsp_hdr_t
*)pdu
->isp_hdr
);
2757 * Now that we're done working with idt_exp_datasn,
2758 * idt->idt_state and idb->idb_bufoffset we can release
2759 * the task lock -- don't want to hold it across the
2760 * call to idm_i_so_tx since we could block.
2762 mutex_exit(&idt
->idt_mutex
);
2765 * Transmit the PDU. Call the internal routine directly
2766 * as there is already implicit ordering.
2768 if ((tx_status
= idm_i_so_tx(pdu
)) != IDM_STATUS_SUCCESS
) {
2769 mutex_enter(&idt
->idt_mutex
);
2773 mutex_enter(&idt
->idt_mutex
);
2774 idt
->idt_tx_bytes
+= chunk
;
2777 return (IDM_STATUS_SUCCESS
);
2785 idm_sotx_pdu_constructor(void *hdl
, void *arg
, int flags
)
2787 idm_pdu_t
*pdu
= hdl
;
2789 bzero(pdu
, sizeof (idm_pdu_t
));
2790 pdu
->isp_hdr
= (iscsi_hdr_t
*)(pdu
+ 1); /* Ptr arithmetic */
2791 pdu
->isp_hdrlen
= sizeof (iscsi_hdr_t
);
2792 pdu
->isp_callback
= idm_sotx_cache_pdu_cb
;
2793 pdu
->isp_magic
= IDM_PDU_MAGIC
;
2794 bzero(pdu
->isp_hdr
, sizeof (iscsi_hdr_t
));
2801 idm_sotx_cache_pdu_cb(idm_pdu_t
*pdu
, idm_status_t status
)
2803 /* reset values between use */
2804 pdu
->isp_datalen
= 0;
2806 kmem_cache_free(idm
.idm_sotx_pdu_cache
, pdu
);
2814 idm_sorx_pdu_constructor(void *hdl
, void *arg
, int flags
)
2816 idm_pdu_t
*pdu
= hdl
;
2818 bzero(pdu
, sizeof (idm_pdu_t
));
2819 pdu
->isp_magic
= IDM_PDU_MAGIC
;
2820 pdu
->isp_hdr
= (iscsi_hdr_t
*)(pdu
+ 1); /* Ptr arithmetic */
2821 pdu
->isp_callback
= idm_sorx_cache_pdu_cb
;
2828 idm_sorx_cache_pdu_cb(idm_pdu_t
*pdu
, idm_status_t status
)
2830 pdu
->isp_iovlen
= 0;
2831 pdu
->isp_sorx_buf
= 0;
2832 kmem_cache_free(idm
.idm_sorx_pdu_cache
, pdu
);
2836 idm_sorx_addl_pdu_cb(idm_pdu_t
*pdu
, idm_status_t status
)
2839 * We had to modify our cached RX PDU with a longer header buffer
2840 * and/or a longer data buffer. Release the new buffers and fix
2841 * the fields back to what we would expect for a cached RX PDU.
2843 if (pdu
->isp_flags
& IDM_PDU_ADDL_HDR
) {
2844 kmem_free(pdu
->isp_hdr
, pdu
->isp_hdrlen
);
2846 if (pdu
->isp_flags
& IDM_PDU_ADDL_DATA
) {
2847 kmem_free(pdu
->isp_data
, pdu
->isp_datalen
);
2849 pdu
->isp_hdr
= (iscsi_hdr_t
*)(pdu
+ 1);
2850 pdu
->isp_hdrlen
= sizeof (iscsi_hdr_t
);
2851 pdu
->isp_data
= NULL
;
2852 pdu
->isp_datalen
= 0;
2853 pdu
->isp_sorx_buf
= 0;
2854 pdu
->isp_callback
= idm_sorx_cache_pdu_cb
;
2855 idm_sorx_cache_pdu_cb(pdu
, status
);
2859 * This thread is only active when I/O is queued for transmit
2860 * because the socket is busy.
2863 idm_sotx_thread(void *arg
)
2865 idm_conn_t
*ic
= arg
;
2866 idm_tx_obj_t
*object
, *next
;
2867 idm_so_conn_t
*so_conn
;
2868 idm_status_t status
= IDM_STATUS_SUCCESS
;
2872 mutex_enter(&ic
->ic_mutex
);
2873 so_conn
= ic
->ic_transport_private
;
2874 so_conn
->ic_tx_thread_running
= B_TRUE
;
2875 so_conn
->ic_tx_thread_did
= so_conn
->ic_tx_thread
->t_did
;
2876 cv_signal(&ic
->ic_cv
);
2877 mutex_exit(&ic
->ic_mutex
);
2879 mutex_enter(&so_conn
->ic_tx_mutex
);
2881 while (so_conn
->ic_tx_thread_running
) {
2882 while (list_is_empty(&so_conn
->ic_tx_list
)) {
2883 DTRACE_PROBE1(soconn__tx__sleep
, idm_conn_t
*, ic
);
2884 cv_wait(&so_conn
->ic_tx_cv
, &so_conn
->ic_tx_mutex
);
2885 DTRACE_PROBE1(soconn__tx__wakeup
, idm_conn_t
*, ic
);
2887 if (!so_conn
->ic_tx_thread_running
) {
2892 object
= (idm_tx_obj_t
*)list_head(&so_conn
->ic_tx_list
);
2893 list_remove(&so_conn
->ic_tx_list
, object
);
2894 mutex_exit(&so_conn
->ic_tx_mutex
);
2896 switch (object
->idm_tx_obj_magic
) {
2897 case IDM_PDU_MAGIC
: {
2898 idm_pdu_t
*pdu
= (idm_pdu_t
*)object
;
2899 DTRACE_PROBE2(soconn__tx__pdu
, idm_conn_t
*, ic
,
2900 idm_pdu_t
*, (idm_pdu_t
*)object
);
2902 if (pdu
->isp_flags
& IDM_PDU_SET_STATSN
) {
2904 (ic
->ic_conn_ops
.icb_update_statsn
)(NULL
, pdu
);
2906 status
= idm_i_so_tx((idm_pdu_t
*)object
);
2909 case IDM_BUF_MAGIC
: {
2910 idm_buf_t
*idb
= (idm_buf_t
*)object
;
2911 idm_task_t
*idt
= idb
->idb_task_binding
;
2913 DTRACE_PROBE2(soconn__tx__buf
, idm_conn_t
*, ic
,
2916 mutex_enter(&idt
->idt_mutex
);
2917 status
= idm_so_send_buf_region(idt
,
2918 idb
, 0, idb
->idb_xfer_len
);
2921 * TX thread owns the buffer so we expect it to
2924 ASSERT(idb
->idb_in_transport
);
2925 if (IDM_CONN_ISTGT(ic
)) {
2927 * idm_buf_tx_to_ini_done releases
2930 DTRACE_ISCSI_8(xfer__done
,
2931 idm_conn_t
*, idt
->idt_ic
,
2932 uintptr_t, idb
->idb_buf
,
2933 uint32_t, idb
->idb_bufoffset
,
2934 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2935 uint32_t, idb
->idb_xfer_len
,
2936 int, XFER_BUF_TX_TO_INI
);
2937 idm_buf_tx_to_ini_done(idt
, idb
, status
);
2939 idm_so_send_rtt_data_done(idt
, idb
);
2940 mutex_exit(&idt
->idt_mutex
);
2946 IDM_CONN_LOG(CE_WARN
, "idm_sotx_thread: Unknown magic "
2947 "(0x%08x)", object
->idm_tx_obj_magic
);
2948 status
= IDM_STATUS_FAIL
;
2951 mutex_enter(&so_conn
->ic_tx_mutex
);
2953 if (status
!= IDM_STATUS_SUCCESS
) {
2954 so_conn
->ic_tx_thread_running
= B_FALSE
;
2955 idm_conn_event(ic
, CE_TRANSPORT_FAIL
, status
);
2960 * Before we leave, we need to abort every item remaining in the
2965 object
= (idm_tx_obj_t
*)list_head(&so_conn
->ic_tx_list
);
2967 while (object
!= NULL
) {
2968 next
= list_next(&so_conn
->ic_tx_list
, object
);
2970 list_remove(&so_conn
->ic_tx_list
, object
);
2971 switch (object
->idm_tx_obj_magic
) {
2973 idm_pdu_complete((idm_pdu_t
*)object
,
2974 IDM_STATUS_ABORTED
);
2977 case IDM_BUF_MAGIC
: {
2978 idm_buf_t
*idb
= (idm_buf_t
*)object
;
2979 idm_task_t
*idt
= idb
->idb_task_binding
;
2980 mutex_exit(&so_conn
->ic_tx_mutex
);
2981 mutex_enter(&idt
->idt_mutex
);
2983 * TX thread owns the buffer so we expect it to
2986 ASSERT(idb
->idb_in_transport
);
2987 if (IDM_CONN_ISTGT(ic
)) {
2989 * idm_buf_tx_to_ini_done releases
2992 DTRACE_ISCSI_8(xfer__done
,
2993 idm_conn_t
*, idt
->idt_ic
,
2994 uintptr_t, idb
->idb_buf
,
2995 uint32_t, idb
->idb_bufoffset
,
2996 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2997 uint32_t, idb
->idb_xfer_len
,
2998 int, XFER_BUF_TX_TO_INI
);
2999 idm_buf_tx_to_ini_done(idt
, idb
,
3000 IDM_STATUS_ABORTED
);
3002 idm_so_send_rtt_data_done(idt
, idb
);
3003 mutex_exit(&idt
->idt_mutex
);
3005 mutex_enter(&so_conn
->ic_tx_mutex
);
3009 IDM_CONN_LOG(CE_WARN
,
3010 "idm_sotx_thread: Unexpected magic "
3011 "(0x%08x)", object
->idm_tx_obj_magic
);
3017 mutex_exit(&so_conn
->ic_tx_mutex
);
3024 idm_so_socket_set_nonblock(struct sonode
*node
)
3026 (void) fop_setfl(node
->so_vnode
, node
->so_flag
,
3027 (node
->so_state
| FNONBLOCK
), CRED(), NULL
);
3031 idm_so_socket_set_block(struct sonode
*node
)
3033 (void) fop_setfl(node
->so_vnode
, node
->so_flag
,
3034 (node
->so_state
& (~FNONBLOCK
)), CRED(), NULL
);
3039 * Called by kernel sockets when the connection has been accepted or
3040 * rejected. In early volo, a "disconnect" callback was sent instead of
3041 * "connectfailed", so we check for both.
3045 idm_so_timed_socket_connect_cb(ksocket_t ks
,
3046 ksocket_callback_event_t ev
, void *arg
, uintptr_t info
)
3048 idm_so_timed_socket_t
*itp
= arg
;
3049 ASSERT(itp
!= NULL
);
3050 ASSERT(ev
== KSOCKET_EV_CONNECTED
||
3051 ev
== KSOCKET_EV_CONNECTFAILED
||
3052 ev
== KSOCKET_EV_DISCONNECTED
);
3054 mutex_enter(&idm_so_timed_socket_mutex
);
3055 itp
->it_callback_called
= B_TRUE
;
3056 if (ev
== KSOCKET_EV_CONNECTED
) {
3057 itp
->it_socket_error_code
= 0;
3059 /* Make sure the error code is non-zero on error */
3062 itp
->it_socket_error_code
= (int)info
;
3064 cv_signal(&itp
->it_cv
);
3065 mutex_exit(&idm_so_timed_socket_mutex
);
3069 idm_so_timed_socket_connect(ksocket_t ks
,
3070 struct sockaddr_storage
*sa
, int sa_sz
, int login_max_usec
)
3072 clock_t conn_login_max
;
3073 int rc
, nonblocking
, rval
;
3074 idm_so_timed_socket_t it
;
3075 ksocket_callbacks_t ks_cb
;
3077 conn_login_max
= ddi_get_lbolt() + drv_usectohz(login_max_usec
);
3080 * Set to non-block socket mode, with callback on connect
3081 * Early volo used "disconnected" instead of "connectfailed",
3082 * so set callback to look for both.
3084 bzero(&it
, sizeof (it
));
3085 ks_cb
.ksock_cb_flags
= KSOCKET_CB_CONNECTED
|
3086 KSOCKET_CB_CONNECTFAILED
| KSOCKET_CB_DISCONNECTED
;
3087 ks_cb
.ksock_cb_connected
= idm_so_timed_socket_connect_cb
;
3088 ks_cb
.ksock_cb_connectfailed
= idm_so_timed_socket_connect_cb
;
3089 ks_cb
.ksock_cb_disconnected
= idm_so_timed_socket_connect_cb
;
3090 cv_init(&it
.it_cv
, NULL
, CV_DEFAULT
, NULL
);
3091 rc
= ksocket_setcallbacks(ks
, &ks_cb
, &it
, CRED());
3095 /* Set to non-blocking mode */
3097 rc
= ksocket_ioctl(ks
, FIONBIO
, (intptr_t)&nonblocking
, &rval
,
3102 bzero(&it
, sizeof (it
));
3105 * Warning -- in a loopback scenario, the call to
3106 * the connect_cb can occur inside the call to
3107 * ksocket_connect. Do not hold the mutex around the
3108 * call to ksocket_connect.
3110 rc
= ksocket_connect(ks
, (struct sockaddr
*)sa
, sa_sz
, CRED());
3111 if (rc
== 0 || rc
== EISCONN
) {
3112 /* socket success or already success */
3116 if ((rc
!= EINPROGRESS
) && (rc
!= EALREADY
)) {
3120 /* TCP connect still in progress. See if out of time. */
3121 if (ddi_get_lbolt() > conn_login_max
) {
3123 * Connection retry timeout,
3124 * failed connect to target.
3131 * TCP connect still in progress. Sleep until callback.
3132 * Do NOT go to sleep if the callback already occurred!
3134 mutex_enter(&idm_so_timed_socket_mutex
);
3135 if (!it
.it_callback_called
) {
3136 (void) cv_timedwait(&it
.it_cv
,
3137 &idm_so_timed_socket_mutex
, conn_login_max
);
3139 if (it
.it_callback_called
) {
3140 rc
= it
.it_socket_error_code
;
3141 mutex_exit(&idm_so_timed_socket_mutex
);
3144 /* If timer expires, go call ksocket_connect one last time. */
3145 mutex_exit(&idm_so_timed_socket_mutex
);
3148 /* resume blocking mode */
3150 (void) ksocket_ioctl(ks
, FIONBIO
, (intptr_t)&nonblocking
, &rval
,
3153 (void) ksocket_setcallbacks(ks
, NULL
, NULL
, CRED());
3154 cv_destroy(&it
.it_cv
);
3163 idm_addr_to_sa(idm_addr_t
*dportal
, struct sockaddr_storage
*sa
)
3166 struct sockaddr_in
*sin
;
3167 struct sockaddr_in6
*sin6
;
3169 /* Build sockaddr_storage for this portal (idm_addr_t) */
3170 bzero(sa
, sizeof (*sa
));
3171 dp_addr_size
= dportal
->a_addr
.i_insize
;
3172 if (dp_addr_size
== sizeof (struct in_addr
)) {
3174 sa
->ss_family
= AF_INET
;
3175 sin
= (struct sockaddr_in
*)sa
;
3176 sin
->sin_port
= htons(dportal
->a_port
);
3177 bcopy(&dportal
->a_addr
.i_addr
.in4
,
3178 &sin
->sin_addr
, sizeof (struct in_addr
));
3179 } else if (dp_addr_size
== sizeof (struct in6_addr
)) {
3181 sa
->ss_family
= AF_INET6
;
3182 sin6
= (struct sockaddr_in6
*)sa
;
3183 sin6
->sin6_port
= htons(dportal
->a_port
);
3184 bcopy(&dportal
->a_addr
.i_addr
.in6
,
3185 &sin6
->sin6_addr
, sizeof (struct in6_addr
));
3193 * return a human-readable form of a sockaddr_storage, in the form
3194 * [ip-address]:port. This is used in calls to logging functions.
3195 * If several calls to idm_sa_ntop are made within the same invocation
3196 * of a logging function, then each one needs its own buf.
3199 idm_sa_ntop(const struct sockaddr_storage
*sa
,
3200 char *buf
, size_t size
)
3202 static const char bogus_ip
[] = "[0].-1";
3203 char tmp
[INET6_ADDRSTRLEN
];
3205 switch (sa
->ss_family
) {
3207 const struct sockaddr_in6
*in6
=
3208 (const struct sockaddr_in6
*) sa
;
3210 (void) inet_ntop(in6
->sin6_family
, &in6
->sin6_addr
, tmp
,
3212 if (strlen(tmp
) + sizeof ("[].65535") > size
)
3214 /* struct sockaddr_storage gets port info from v4 loc */
3215 (void) snprintf(buf
, size
, "[%s].%u", tmp
,
3216 ntohs(in6
->sin6_port
));
3220 const struct sockaddr_in
*in
= (const struct sockaddr_in
*) sa
;
3222 (void) inet_ntop(in
->sin_family
, &in
->sin_addr
, tmp
,
3224 if (strlen(tmp
) + sizeof ("[].65535") > size
)
3226 (void) snprintf(buf
, size
, "[%s].%u", tmp
,
3227 ntohs(in
->sin_port
));
3234 (void) snprintf(buf
, size
, "%s", bogus_ip
);