Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / io / idm / idm_so.c
blob29c344eb1b5d0ea6638121e4756f2850aa81002a
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright (c) 2013 by Delphix. All rights reserved.
27 * Copyright (c) 2017, Joyent, Inc. All rights reserved.
30 #include <sys/conf.h>
31 #include <sys/stat.h>
32 #include <sys/file.h>
33 #include <sys/ddi.h>
34 #include <sys/sunddi.h>
35 #include <sys/modctl.h>
36 #include <sys/priv.h>
37 #include <sys/cpuvar.h>
38 #include <sys/socket.h>
39 #include <sys/strsubr.h>
40 #include <sys/sysmacros.h>
41 #include <sys/sdt.h>
42 #include <netinet/tcp.h>
43 #include <inet/tcp.h>
44 #include <sys/socketvar.h>
45 #include <sys/pathname.h>
46 #include <sys/fs/snode.h>
47 #include <sys/fs/dv_node.h>
48 #include <sys/vnode.h>
49 #include <netinet/in.h>
50 #include <net/if.h>
51 #include <sys/sockio.h>
52 #include <sys/ksocket.h>
53 #include <sys/filio.h> /* FIONBIO */
54 #include <sys/iscsi_protocol.h>
55 #include <sys/idm/idm.h>
56 #include <sys/idm/idm_so.h>
57 #include <sys/idm/idm_text.h>
59 #define IN_PROGRESS_DELAY 1
62 * in6addr_any is currently all zeroes, but use the macro in case this
63 * ever changes.
65 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
67 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
68 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
69 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
71 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
72 static void idm_so_conn_destroy_common(idm_conn_t *ic);
73 static void idm_so_conn_connect_common(idm_conn_t *ic);
75 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
76 boolean_t boot_conn);
77 static void idm_set_postconnect_options(ksocket_t so);
78 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
80 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
81 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
82 idm_buf_t *idb, uint32_t offset, uint32_t length);
83 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
84 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
85 idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
87 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
88 uint32_t ro, uint32_t dlength);
90 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
91 nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
93 static void idm_so_socket_set_nonblock(struct sonode *node);
94 static void idm_so_socket_set_block(struct sonode *node);
97 * Transport ops prototypes
99 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
100 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
101 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
102 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
103 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
104 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
105 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
106 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
107 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
108 static void idm_so_notice_key_values(idm_conn_t *it,
109 nvlist_t *negotiated_nvl);
110 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
111 nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
112 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
113 idm_transport_caps_t *caps);
114 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
115 static void idm_so_buf_free(idm_buf_t *idb);
116 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
117 static void idm_so_buf_teardown(idm_buf_t *idb);
118 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
119 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
120 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
121 static void idm_so_tgt_svc_offline(idm_svc_t *is);
122 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
123 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
124 static void idm_so_conn_disconnect(idm_conn_t *ic);
125 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
126 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
127 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
130 * IDM Native Sockets transport operations
132 static
133 idm_transport_ops_t idm_so_transport_ops = {
134 idm_so_tx, /* it_tx_pdu */
135 idm_so_buf_tx_to_ini, /* it_buf_tx_to_ini */
136 idm_so_buf_rx_from_ini, /* it_buf_rx_from_ini */
137 idm_so_rx_datain, /* it_rx_datain */
138 idm_so_rx_rtt, /* it_rx_rtt */
139 idm_so_rx_dataout, /* it_rx_dataout */
140 NULL, /* it_alloc_conn_rsrc */
141 NULL, /* it_free_conn_rsrc */
142 NULL, /* it_tgt_enable_datamover */
143 NULL, /* it_ini_enable_datamover */
144 NULL, /* it_conn_terminate */
145 idm_so_free_task_rsrc, /* it_free_task_rsrc */
146 idm_so_negotiate_key_values, /* it_negotiate_key_values */
147 idm_so_notice_key_values, /* it_notice_key_values */
148 idm_so_conn_is_capable, /* it_conn_is_capable */
149 idm_so_buf_alloc, /* it_buf_alloc */
150 idm_so_buf_free, /* it_buf_free */
151 idm_so_buf_setup, /* it_buf_setup */
152 idm_so_buf_teardown, /* it_buf_teardown */
153 idm_so_tgt_svc_create, /* it_tgt_svc_create */
154 idm_so_tgt_svc_destroy, /* it_tgt_svc_destroy */
155 idm_so_tgt_svc_online, /* it_tgt_svc_online */
156 idm_so_tgt_svc_offline, /* it_tgt_svc_offline */
157 idm_so_tgt_conn_destroy, /* it_tgt_conn_destroy */
158 idm_so_tgt_conn_connect, /* it_tgt_conn_connect */
159 idm_so_conn_disconnect, /* it_tgt_conn_disconnect */
160 idm_so_ini_conn_create, /* it_ini_conn_create */
161 idm_so_ini_conn_destroy, /* it_ini_conn_destroy */
162 idm_so_ini_conn_connect, /* it_ini_conn_connect */
163 idm_so_conn_disconnect, /* it_ini_conn_disconnect */
164 idm_so_declare_key_values /* it_declare_key_values */
167 kmutex_t idm_so_timed_socket_mutex;
169 int32_t idm_so_sndbuf = IDM_SNDBUF_SIZE;
170 int32_t idm_so_rcvbuf = IDM_RCVBUF_SIZE;
173 * idm_so_init()
174 * Sockets transport initialization
176 void
177 idm_so_init(idm_transport_t *it)
179 /* Cache for IDM Data and R2T Transmit PDU's */
180 idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
181 sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
182 &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
184 /* Cache for IDM Receive PDU's */
185 idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
186 sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
187 &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
189 /* 128k buffer cache */
190 idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
191 IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
193 /* Set the sockets transport ops */
194 it->it_ops = &idm_so_transport_ops;
196 mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
201 * idm_so_fini()
202 * Sockets transport teardown
204 void
205 idm_so_fini(void)
207 kmem_cache_destroy(idm.idm_so_128k_buf_cache);
208 kmem_cache_destroy(idm.idm_sotx_pdu_cache);
209 kmem_cache_destroy(idm.idm_sorx_pdu_cache);
210 mutex_destroy(&idm_so_timed_socket_mutex);
213 ksocket_t
214 idm_socreate(int domain, int type, int protocol)
216 ksocket_t ks;
218 if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
219 CRED())) {
220 return (ks);
221 } else {
222 return (NULL);
227 * idm_soshutdown will disconnect the socket and prevent subsequent PDU
228 * reception and transmission. The sonode still exists but its state
229 * gets modified to indicate it is no longer connected. Calls to
230 * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
231 * regain control of a thread stuck in idm_sorecv.
233 void
234 idm_soshutdown(ksocket_t so)
236 (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
240 * idm_sodestroy releases all resources associated with a socket previously
241 * created with idm_socreate. The socket must be shutdown using
242 * idm_soshutdown before the socket is destroyed with idm_sodestroy,
243 * otherwise undefined behavior will result.
245 void
246 idm_sodestroy(ksocket_t ks)
248 (void) ksocket_close(ks, CRED());
252 * Function to compare two addresses in sockaddr_storage format
256 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
257 const struct sockaddr_storage *cmp_ss2,
258 boolean_t v4_mapped_as_v4,
259 boolean_t compare_ports)
261 struct sockaddr_storage mapped_v4_ss1, mapped_v4_ss2;
262 const struct sockaddr_storage *ss1, *ss2;
263 struct in_addr *in1, *in2;
264 struct in6_addr *in61, *in62;
265 int i;
268 * Normalize V4-mapped IPv6 addresses into V4 format if
269 * v4_mapped_as_v4 is B_TRUE.
271 ss1 = cmp_ss1;
272 ss2 = cmp_ss2;
273 if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
274 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
275 if (IN6_IS_ADDR_V4MAPPED(in61)) {
276 bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
277 mapped_v4_ss1.ss_family = AF_INET;
278 ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
279 ((struct sockaddr_in *)ss1)->sin_port;
280 IN6_V4MAPPED_TO_INADDR(in61,
281 &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
282 ss1 = &mapped_v4_ss1;
285 ss2 = cmp_ss2;
286 if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
287 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
288 if (IN6_IS_ADDR_V4MAPPED(in62)) {
289 bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
290 mapped_v4_ss2.ss_family = AF_INET;
291 ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
292 ((struct sockaddr_in *)ss2)->sin_port;
293 IN6_V4MAPPED_TO_INADDR(in62,
294 &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
295 ss2 = &mapped_v4_ss2;
300 * Compare ports, then address family, then ip address
302 if (compare_ports &&
303 (((struct sockaddr_in *)ss1)->sin_port !=
304 ((struct sockaddr_in *)ss2)->sin_port)) {
305 if (((struct sockaddr_in *)ss1)->sin_port >
306 ((struct sockaddr_in *)ss2)->sin_port)
307 return (1);
308 else
309 return (-1);
313 * ports are the same
315 if (ss1->ss_family != ss2->ss_family) {
316 if (ss1->ss_family == AF_INET)
317 return (1);
318 else
319 return (-1);
323 * address families are the same
325 if (ss1->ss_family == AF_INET) {
326 in1 = &((struct sockaddr_in *)ss1)->sin_addr;
327 in2 = &((struct sockaddr_in *)ss2)->sin_addr;
329 if (in1->s_addr > in2->s_addr)
330 return (1);
331 else if (in1->s_addr < in2->s_addr)
332 return (-1);
333 else
334 return (0);
335 } else if (ss1->ss_family == AF_INET6) {
336 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
337 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
339 for (i = 0; i < 4; i++) {
340 if (in61->s6_addr32[i] > in62->s6_addr32[i])
341 return (1);
342 else if (in61->s6_addr32[i] < in62->s6_addr32[i])
343 return (-1);
345 return (0);
348 return (1);
352 * IP address filter functions to flag addresses that should not
353 * go out to initiators through discovery.
355 static boolean_t
356 idm_v4_addr_okay(struct in_addr *in_addr)
358 in_addr_t addr = ntohl(in_addr->s_addr);
360 if ((INADDR_NONE == addr) ||
361 (IN_MULTICAST(addr)) ||
362 ((addr >> IN_CLASSA_NSHIFT) == 0) ||
363 ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
364 return (B_FALSE);
366 return (B_TRUE);
369 static boolean_t
370 idm_v6_addr_okay(struct in6_addr *addr6)
373 if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
374 (IN6_IS_ADDR_LOOPBACK(addr6)) ||
375 (IN6_IS_ADDR_MULTICAST(addr6)) ||
376 (IN6_IS_ADDR_V4MAPPED(addr6)) ||
377 (IN6_IS_ADDR_V4COMPAT(addr6)) ||
378 (IN6_IS_ADDR_LINKLOCAL(addr6))) {
379 return (B_FALSE);
381 return (B_TRUE);
385 * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
386 * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
389 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
391 ksocket_t so4, so6;
392 struct lifnum lifn;
393 struct lifconf lifc;
394 struct lifreq *lp;
395 int rval;
396 int numifs;
397 int bufsize;
398 void *buf;
399 int i, j, n, rc;
400 struct sockaddr_storage ss;
401 struct sockaddr_in *sin;
402 struct sockaddr_in6 *sin6;
403 idm_addr_t *ip;
404 idm_addr_list_t *ipaddr = NULL;
405 int size_ipaddr;
407 *ipaddr_p = NULL;
408 size_ipaddr = 0;
409 buf = NULL;
411 /* create an ipv4 and ipv6 UDP socket */
412 if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
413 return (0);
414 if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
415 idm_sodestroy(so6);
416 return (0);
420 retry_count:
421 /* snapshot the current number of interfaces */
422 lifn.lifn_family = PF_UNSPEC;
423 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
424 lifn.lifn_count = 0;
425 /* use vp6 for ioctls with unspecified families by default */
426 if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
427 != 0) {
428 goto cleanup;
431 numifs = lifn.lifn_count;
432 if (numifs <= 0) {
433 goto cleanup;
436 /* allocate extra room in case more interfaces appear */
437 numifs += 10;
439 /* get the interface names and ip addresses */
440 bufsize = numifs * sizeof (struct lifreq);
441 buf = kmem_alloc(bufsize, KM_SLEEP);
443 lifc.lifc_family = AF_UNSPEC;
444 lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
445 lifc.lifc_len = bufsize;
446 lifc.lifc_buf = buf;
447 rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
448 if (rc != 0) {
449 goto cleanup;
451 /* if our extra room is used up, try again */
452 if (bufsize <= lifc.lifc_len) {
453 kmem_free(buf, bufsize);
454 buf = NULL;
455 goto retry_count;
457 /* calc actual number of ifconfs */
458 n = lifc.lifc_len / sizeof (struct lifreq);
460 /* get ip address */
461 if (n > 0) {
462 size_ipaddr = sizeof (idm_addr_list_t) +
463 (n - 1) * sizeof (idm_addr_t);
464 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
465 } else {
466 goto cleanup;
470 * Examine the array of interfaces and filter uninteresting ones
472 for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
475 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
477 ss = lp->lifr_addr;
479 * fetch the flags using the socket of the correct family
481 switch (ss.ss_family) {
482 case AF_INET:
483 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
484 &rval, CRED());
485 break;
486 case AF_INET6:
487 rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
488 &rval, CRED());
489 break;
490 default:
491 continue;
493 if (rc == 0) {
495 * If we got the flags, skip uninteresting
496 * interfaces based on flags
498 if ((lp->lifr_flags & IFF_UP) != IFF_UP)
499 continue;
500 if (lp->lifr_flags &
501 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
502 continue;
505 /* save ip address */
506 ip = &ipaddr->al_addrs[j];
507 switch (ss.ss_family) {
508 case AF_INET:
509 sin = (struct sockaddr_in *)&ss;
510 if (!idm_v4_addr_okay(&sin->sin_addr))
511 continue;
512 ip->a_addr.i_addr.in4 = sin->sin_addr;
513 ip->a_addr.i_insize = sizeof (struct in_addr);
514 break;
515 case AF_INET6:
516 sin6 = (struct sockaddr_in6 *)&ss;
517 if (!idm_v6_addr_okay(&sin6->sin6_addr))
518 continue;
519 ip->a_addr.i_addr.in6 = sin6->sin6_addr;
520 ip->a_addr.i_insize = sizeof (struct in6_addr);
521 break;
522 default:
523 continue;
525 j++;
528 if (j == 0) {
529 /* no valid ifaddr */
530 kmem_free(ipaddr, size_ipaddr);
531 size_ipaddr = 0;
532 ipaddr = NULL;
533 } else {
534 ipaddr->al_out_cnt = j;
538 cleanup:
539 idm_sodestroy(so6);
540 idm_sodestroy(so4);
542 if (buf != NULL)
543 kmem_free(buf, bufsize);
545 *ipaddr_p = ipaddr;
546 return (size_ipaddr);
550 idm_sorecv(ksocket_t so, void *msg, size_t len)
552 iovec_t iov;
554 ASSERT(so != NULL);
555 ASSERT(len != 0);
558 * Fill in iovec and receive data
560 iov.iov_base = msg;
561 iov.iov_len = len;
563 return (idm_iov_sorecv(so, &iov, 1, len));
567 * idm_sosendto - Sends a buffered data on a non-connected socket.
569 * This function puts the data provided on the wire by calling sosendmsg.
570 * It will return only when all the data has been sent or if an error
571 * occurs.
573 * Returns 0 for success, the socket errno value if sosendmsg fails, and
574 * -1 if sosendmsg returns success but uio_resid != 0
577 idm_sosendto(ksocket_t so, void *buff, size_t len,
578 struct sockaddr *name, socklen_t namelen)
580 struct msghdr msg;
581 struct iovec iov[1];
582 int error;
583 size_t sent = 0;
585 iov[0].iov_base = buff;
586 iov[0].iov_len = len;
588 /* Initialization of the message header. */
589 bzero(&msg, sizeof (msg));
590 msg.msg_iov = iov;
591 msg.msg_iovlen = 1;
592 msg.msg_name = name;
593 msg.msg_namelen = namelen;
595 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
596 /* Data sent */
597 if (sent == len) {
598 /* All data sent. Success. */
599 return (0);
600 } else {
601 /* Not all data was sent. Failure */
602 return (-1);
606 /* Send failed */
607 return (error);
611 * idm_iov_sosend - Sends an iovec on a connection.
613 * This function puts the data provided on the wire by calling sosendmsg.
614 * It will return only when all the data has been sent or if an error
615 * occurs.
617 * Returns 0 for success, the socket errno value if sosendmsg fails, and
618 * -1 if sosendmsg returns success but uio_resid != 0
621 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
623 struct msghdr msg;
624 int error;
625 size_t sent = 0;
627 ASSERT(iop != NULL);
629 /* Initialization of the message header. */
630 bzero(&msg, sizeof (msg));
631 msg.msg_iov = iop;
632 msg.msg_iovlen = iovlen;
634 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
635 == 0) {
636 /* Data sent */
637 if (sent == total_len) {
638 /* All data sent. Success. */
639 return (0);
640 } else {
641 /* Not all data was sent. Failure */
642 return (-1);
646 /* Send failed */
647 return (error);
651 * idm_iov_sorecv - Receives an iovec from a connection
653 * This function gets the data asked for from the socket. It will return
654 * only when all the requested data has been retrieved or if an error
655 * occurs.
657 * Returns 0 for success, the socket errno value if sorecvmsg fails, and
658 * -1 if sorecvmsg returns success but uio_resid != 0
661 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
663 struct msghdr msg;
664 int error;
665 size_t recv;
666 int flags;
668 ASSERT(iop != NULL);
670 /* Initialization of the message header. */
671 bzero(&msg, sizeof (msg));
672 msg.msg_iov = iop;
673 msg.msg_iovlen = iovlen;
674 flags = MSG_WAITALL;
676 if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
677 == 0) {
678 /* Received data */
679 if (recv == total_len) {
680 /* All requested data received. Success */
681 return (0);
682 } else {
684 * Not all data was received. The connection has
685 * probably failed.
687 return (-1);
691 /* Receive failed */
692 return (error);
695 static void
696 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
698 int conn_abort = 10000;
699 int conn_notify = 2000;
700 int abort = 30000;
702 /* Pre-connect socket options */
703 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
704 TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
705 CRED());
706 if (boot_conn == B_FALSE) {
707 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
708 TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
709 CRED());
710 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
711 TCP_ABORT_THRESHOLD,
712 (char *)&abort, sizeof (int), CRED());
716 static void
717 idm_set_postconnect_options(ksocket_t ks)
719 const int on = 1;
721 /* Set connect options */
722 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
723 (char *)&idm_so_rcvbuf, sizeof (idm_so_rcvbuf), CRED());
724 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
725 (char *)&idm_so_sndbuf, sizeof (idm_so_sndbuf), CRED());
726 (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
727 (char *)&on, sizeof (on), CRED());
730 static uint32_t
731 n2h24(const uchar_t *ptr)
733 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
736 static boolean_t
737 idm_dataseglenokay(idm_conn_t *ic, idm_pdu_t *pdu)
739 iscsi_hdr_t *bhs;
741 if (ic->ic_conn_type == CONN_TYPE_TGT &&
742 pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
743 IDM_CONN_LOG(CE_WARN,
744 "idm_dataseglenokay: exceeded the max data segment length");
745 return (B_FALSE);
748 bhs = pdu->isp_hdr;
750 * Filter out any RFC3720 data-size violations.
752 switch (IDM_PDU_OPCODE(pdu)) {
753 case ISCSI_OP_SCSI_TASK_MGT_MSG:
754 case ISCSI_OP_SCSI_TASK_MGT_RSP:
755 case ISCSI_OP_RTT_RSP:
756 case ISCSI_OP_LOGOUT_CMD:
758 * Data-segment not allowed and additional headers not allowed.
759 * (both must be zero according to the RFC3720.)
761 if (bhs->hlength != 0 || pdu->isp_datalen != 0)
762 return (B_FALSE);
763 break;
764 case ISCSI_OP_NOOP_OUT:
765 case ISCSI_OP_LOGIN_CMD:
766 case ISCSI_OP_TEXT_CMD:
767 case ISCSI_OP_SNACK_CMD:
768 case ISCSI_OP_NOOP_IN:
769 case ISCSI_OP_SCSI_RSP:
770 case ISCSI_OP_LOGIN_RSP:
771 case ISCSI_OP_TEXT_RSP:
772 case ISCSI_OP_SCSI_DATA_RSP:
773 case ISCSI_OP_LOGOUT_RSP:
774 case ISCSI_OP_ASYNC_EVENT:
775 case ISCSI_OP_REJECT_MSG:
777 * Additional headers not allowed.
778 * (must be zero according to RFC3720.)
780 if (bhs->hlength)
781 return (B_FALSE);
782 break;
783 case ISCSI_OP_SCSI_CMD:
785 * See RFC3720, section 10.3
787 * For pure read cmds, data-segment-length must be zero.
788 * For non-final transfers, data-size must be even number of
789 * 4-byte words.
790 * For any transfer, an expected byte count must be provided.
791 * For bidirectional transfers, an additional-header must be
792 * provided (for the read byte-count.)
794 if (pdu->isp_datalen) {
795 if ((bhs->flags & (ISCSI_FLAG_CMD_READ |
796 ISCSI_FLAG_CMD_WRITE)) == ISCSI_FLAG_CMD_READ)
797 return (B_FALSE);
798 if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 &&
799 ((pdu->isp_datalen & 0x3) != 0))
800 return (B_FALSE);
802 if (bhs->flags & (ISCSI_FLAG_CMD_READ |
803 ISCSI_FLAG_CMD_WRITE)) {
804 iscsi_scsi_cmd_hdr_t *cmdhdr =
805 (iscsi_scsi_cmd_hdr_t *)bhs;
807 * we're transfering some data, we must have a
808 * byte count
810 if (cmdhdr->data_length == 0)
811 return (B_FALSE);
813 break;
814 case ISCSI_OP_SCSI_DATA:
816 * See RFC3720, section 10.7
818 * Additional headers aren't allowed, and the data-size must
819 * be an even number of 4-byte words (unless the final bit
820 * is set.)
822 if (bhs->hlength)
823 return (B_FALSE);
824 if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 &&
825 ((pdu->isp_datalen & 0x3) != 0))
826 return (B_FALSE);
827 break;
828 default:
829 break;
831 return (B_TRUE);
834 static idm_status_t
835 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
837 iscsi_hdr_t *bhs;
838 uint32_t hdr_digest_crc;
839 uint32_t crc_calculated;
840 void *new_hdr;
841 int ahslen = 0;
842 int total_len = 0;
843 int iovlen = 0;
844 struct iovec iov[2];
845 idm_so_conn_t *so_conn;
846 int rc;
848 so_conn = ic->ic_transport_private;
851 * Read BHS
853 bhs = pdu->isp_hdr;
854 rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
855 if (rc != IDM_STATUS_SUCCESS) {
856 return (IDM_STATUS_FAIL);
860 * Check actual AHS length against the amount available in the buffer
862 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
863 (bhs->hlength * sizeof (uint32_t));
864 pdu->isp_datalen = n2h24(bhs->dlength);
866 if (!idm_dataseglenokay(ic, pdu)) {
867 IDM_CONN_LOG(CE_WARN,
868 "idm_sorecvhdr: invalid data segment length");
869 return (IDM_STATUS_FAIL);
871 if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
872 /* Allocate a new header segment and change the callback */
873 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
874 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
875 pdu->isp_hdr = new_hdr;
876 pdu->isp_flags |= IDM_PDU_ADDL_HDR;
879 * This callback will restore the expected values after
880 * the RX PDU has been processed.
882 pdu->isp_callback = idm_sorx_addl_pdu_cb;
886 * Setup receipt of additional header and header digest (if enabled).
888 if (bhs->hlength > 0) {
889 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
890 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
891 iov[iovlen].iov_len = ahslen;
892 total_len += iov[iovlen].iov_len;
893 iovlen++;
896 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
897 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
898 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
899 total_len += iov[iovlen].iov_len;
900 iovlen++;
903 if ((iovlen != 0) &&
904 (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
905 total_len) != 0)) {
906 return (IDM_STATUS_FAIL);
910 * Validate header digest if enabled
912 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
913 crc_calculated = idm_crc32c(pdu->isp_hdr,
914 sizeof (iscsi_hdr_t) + ahslen);
915 if (crc_calculated != hdr_digest_crc) {
916 /* Invalid Header Digest */
917 return (IDM_STATUS_HEADER_DIGEST);
921 return (0);
925 * idm_so_ini_conn_create()
926 * Allocate the sockets transport connection resources.
928 static idm_status_t
929 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
931 ksocket_t so;
932 idm_so_conn_t *so_conn;
933 idm_status_t idmrc;
935 so = idm_socreate(cr->cr_domain, cr->cr_type,
936 cr->cr_protocol);
937 if (so == NULL) {
938 return (IDM_STATUS_FAIL);
941 /* Bind the socket if configured to do so */
942 if (cr->cr_bound) {
943 if (ksocket_bind(so, &cr->cr_bound_addr.sin,
944 SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
945 idm_sodestroy(so);
946 return (IDM_STATUS_FAIL);
950 idmrc = idm_so_conn_create_common(ic, so);
951 if (idmrc != IDM_STATUS_SUCCESS) {
952 idm_soshutdown(so);
953 idm_sodestroy(so);
954 return (IDM_STATUS_FAIL);
957 so_conn = ic->ic_transport_private;
958 /* Set up socket options */
959 idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
961 return (IDM_STATUS_SUCCESS);
965 * idm_so_ini_conn_destroy()
966 * Tear down the sockets transport connection resources.
968 static void
969 idm_so_ini_conn_destroy(idm_conn_t *ic)
971 idm_so_conn_destroy_common(ic);
975 * idm_so_ini_conn_connect()
976 * Establish the connection referred to by the handle previously allocated via
977 * idm_so_ini_conn_create().
979 static idm_status_t
980 idm_so_ini_conn_connect(idm_conn_t *ic)
982 idm_so_conn_t *so_conn;
983 struct sonode *node = NULL;
984 int rc;
985 clock_t lbolt, conn_login_max, conn_login_interval;
986 boolean_t nonblock;
988 so_conn = ic->ic_transport_private;
989 nonblock = ic->ic_conn_params.nonblock_socket;
990 conn_login_max = ic->ic_conn_params.conn_login_max;
991 conn_login_interval = ddi_get_lbolt() +
992 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
994 if (nonblock == B_TRUE) {
995 node = ((struct sonode *)(so_conn->ic_so));
996 /* Set to none block socket mode */
997 idm_so_socket_set_nonblock(node);
998 do {
999 rc = ksocket_connect(so_conn->ic_so,
1000 &ic->ic_ini_dst_addr.sin,
1001 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
1002 CRED());
1003 if (rc == 0 || rc == EISCONN) {
1004 /* socket success or already success */
1005 rc = IDM_STATUS_SUCCESS;
1006 break;
1008 if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
1009 (rc == ECONNRESET)) {
1010 /* socket connection timeout or refuse */
1011 break;
1013 lbolt = ddi_get_lbolt();
1014 if (lbolt > conn_login_max) {
1016 * Connection retry timeout,
1017 * failed connect to target.
1019 break;
1021 if (lbolt < conn_login_interval) {
1022 if ((rc == EINPROGRESS) || (rc == EALREADY)) {
1023 /* TCP connect still in progress */
1024 ddi_sleep(IN_PROGRESS_DELAY);
1025 continue;
1026 } else {
1027 delay(conn_login_interval - lbolt);
1030 conn_login_interval = ddi_get_lbolt() +
1031 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
1032 } while (rc != 0);
1033 /* resume to nonblock mode */
1034 if (rc == IDM_STATUS_SUCCESS) {
1035 idm_so_socket_set_block(node);
1037 } else {
1038 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
1039 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
1042 if (rc != 0) {
1043 idm_soshutdown(so_conn->ic_so);
1044 return (IDM_STATUS_FAIL);
1047 idm_so_conn_connect_common(ic);
1049 idm_set_postconnect_options(so_conn->ic_so);
1051 return (IDM_STATUS_SUCCESS);
1054 idm_status_t
1055 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
1057 idm_status_t idmrc;
1059 idm_set_postconnect_options(new_so);
1060 idmrc = idm_so_conn_create_common(ic, new_so);
1062 return (idmrc);
1065 static void
1066 idm_so_tgt_conn_destroy(idm_conn_t *ic)
1068 idm_so_conn_destroy_common(ic);
1072 * idm_so_tgt_conn_connect()
1073 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
1074 * is invoked from the SM as a result of an inbound connection request.
1076 static idm_status_t
1077 idm_so_tgt_conn_connect(idm_conn_t *ic)
1079 idm_so_conn_connect_common(ic);
1081 return (IDM_STATUS_SUCCESS);
1084 static idm_status_t
1085 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
1087 idm_so_conn_t *so_conn;
1089 so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
1090 so_conn->ic_so = new_so;
1092 ic->ic_transport_private = so_conn;
1093 ic->ic_transport_hdrlen = 0;
1095 /* Set the scoreboarding flag on this connection */
1096 ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
1097 ic->ic_conn_params.max_recv_dataseglen =
1098 ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1099 ic->ic_conn_params.max_xmit_dataseglen =
1100 ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1103 * Initialize tx thread mutex and list
1105 mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1106 cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1107 list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1108 offsetof(idm_pdu_t, idm_tx_link));
1110 return (IDM_STATUS_SUCCESS);
1113 static void
1114 idm_so_conn_destroy_common(idm_conn_t *ic)
1116 idm_so_conn_t *so_conn = ic->ic_transport_private;
1118 ic->ic_transport_private = NULL;
1119 idm_sodestroy(so_conn->ic_so);
1120 list_destroy(&so_conn->ic_tx_list);
1121 mutex_destroy(&so_conn->ic_tx_mutex);
1122 cv_destroy(&so_conn->ic_tx_cv);
1124 kmem_free(so_conn, sizeof (idm_so_conn_t));
1127 static void
1128 idm_so_conn_connect_common(idm_conn_t *ic)
1130 idm_so_conn_t *so_conn;
1131 struct sockaddr_in6 t_addr;
1132 socklen_t t_addrlen = 0;
1134 so_conn = ic->ic_transport_private;
1135 bzero(&t_addr, sizeof (struct sockaddr_in6));
1136 t_addrlen = sizeof (struct sockaddr_in6);
1138 /* Set the local and remote addresses in the idm conn handle */
1139 (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1140 &t_addrlen, CRED());
1141 bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1142 (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1143 &t_addrlen, CRED());
1144 bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1146 mutex_enter(&ic->ic_mutex);
1147 so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1148 &p0, TS_RUN, minclsyspri);
1149 so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1150 &p0, TS_RUN, minclsyspri);
1152 while (so_conn->ic_rx_thread_did == 0 ||
1153 so_conn->ic_tx_thread_did == 0)
1154 cv_wait(&ic->ic_cv, &ic->ic_mutex);
1155 mutex_exit(&ic->ic_mutex);
1159 * idm_so_conn_disconnect()
1160 * Shutdown the socket connection and stop the thread
1162 static void
1163 idm_so_conn_disconnect(idm_conn_t *ic)
1165 idm_so_conn_t *so_conn;
1167 so_conn = ic->ic_transport_private;
1169 mutex_enter(&ic->ic_mutex);
1170 so_conn->ic_rx_thread_running = B_FALSE;
1171 so_conn->ic_tx_thread_running = B_FALSE;
1172 /* We need to wakeup the TX thread */
1173 mutex_enter(&so_conn->ic_tx_mutex);
1174 cv_signal(&so_conn->ic_tx_cv);
1175 mutex_exit(&so_conn->ic_tx_mutex);
1176 mutex_exit(&ic->ic_mutex);
1178 /* This should wakeup the RX thread if it is sleeping */
1179 idm_soshutdown(so_conn->ic_so);
1181 thread_join(so_conn->ic_tx_thread_did);
1182 thread_join(so_conn->ic_rx_thread_did);
1186 * idm_so_tgt_svc_create()
1187 * Establish a service on an IP address and port. idm_svc_req_t contains
1188 * the service parameters.
1190 /*ARGSUSED*/
1191 static idm_status_t
1192 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1194 idm_so_svc_t *so_svc;
1196 so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1198 /* Set the new sockets service in svc handle */
1199 is->is_so_svc = (void *)so_svc;
1201 return (IDM_STATUS_SUCCESS);
1205 * idm_so_tgt_svc_destroy()
1206 * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1208 static void
1209 idm_so_tgt_svc_destroy(idm_svc_t *is)
1211 /* the socket will have been torn down; free the service */
1212 kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1216 * idm_so_tgt_svc_online()
1217 * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1220 static idm_status_t
1221 idm_so_tgt_svc_online(idm_svc_t *is)
1223 idm_so_svc_t *so_svc;
1224 idm_svc_req_t *sr = &is->is_svc_req;
1225 struct sockaddr_in6 sin6_ip;
1226 const uint32_t on = 1;
1228 mutex_enter(&is->is_mutex);
1229 so_svc = (idm_so_svc_t *)is->is_so_svc;
1232 * Try creating an IPv6 socket first
1234 if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1235 mutex_exit(&is->is_mutex);
1236 return (IDM_STATUS_FAIL);
1237 } else {
1238 bzero(&sin6_ip, sizeof (sin6_ip));
1239 sin6_ip.sin6_family = AF_INET6;
1240 sin6_ip.sin6_port = htons(sr->sr_port);
1241 sin6_ip.sin6_addr = in6addr_any;
1243 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1244 SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1246 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1247 sizeof (sin6_ip), CRED()) != 0) {
1248 mutex_exit(&is->is_mutex);
1249 idm_sodestroy(so_svc->is_so);
1250 return (IDM_STATUS_FAIL);
1254 idm_set_postconnect_options(so_svc->is_so);
1256 if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1257 mutex_exit(&is->is_mutex);
1258 idm_soshutdown(so_svc->is_so);
1259 idm_sodestroy(so_svc->is_so);
1260 return (IDM_STATUS_FAIL);
1263 /* Launch a watch thread */
1264 so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1265 is, 0, &p0, TS_RUN, minclsyspri);
1267 if (so_svc->is_thread == NULL) {
1268 /* Failure to launch; teardown the socket */
1269 mutex_exit(&is->is_mutex);
1270 idm_soshutdown(so_svc->is_so);
1271 idm_sodestroy(so_svc->is_so);
1272 return (IDM_STATUS_FAIL);
1274 ksocket_hold(so_svc->is_so);
1275 /* Wait for the port watcher thread to start */
1276 while (!so_svc->is_thread_running)
1277 cv_wait(&is->is_cv, &is->is_mutex);
1278 mutex_exit(&is->is_mutex);
1280 return (IDM_STATUS_SUCCESS);
1284 * idm_so_tgt_svc_offline
1286 * Stop listening on the IP address and port identified by idm_svc_t.
1288 static void
1289 idm_so_tgt_svc_offline(idm_svc_t *is)
1291 idm_so_svc_t *so_svc;
1292 mutex_enter(&is->is_mutex);
1293 so_svc = (idm_so_svc_t *)is->is_so_svc;
1294 so_svc->is_thread_running = B_FALSE;
1295 mutex_exit(&is->is_mutex);
1298 * Teardown socket
1300 idm_sodestroy(so_svc->is_so);
1303 * Now we expect the port watcher thread to terminate
1305 thread_join(so_svc->is_thread_did);
1309 * Watch thread for target service connection establishment.
1311 void
1312 idm_so_svc_port_watcher(void *arg)
1314 idm_svc_t *svc = arg;
1315 ksocket_t new_so;
1316 idm_conn_t *ic;
1317 idm_status_t idmrc;
1318 idm_so_svc_t *so_svc;
1319 int rc;
1320 struct sockaddr_in6 t_addr;
1321 socklen_t t_addrlen;
1323 bzero(&t_addr, sizeof (struct sockaddr_in6));
1324 t_addrlen = sizeof (struct sockaddr_in6);
1325 mutex_enter(&svc->is_mutex);
1327 so_svc = svc->is_so_svc;
1328 so_svc->is_thread_running = B_TRUE;
1329 so_svc->is_thread_did = so_svc->is_thread->t_did;
1331 cv_signal(&svc->is_cv);
1333 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1334 svc->is_svc_req.sr_port);
1336 while (so_svc->is_thread_running) {
1337 mutex_exit(&svc->is_mutex);
1339 if ((rc = ksocket_accept(so_svc->is_so,
1340 (struct sockaddr *)&t_addr, &t_addrlen,
1341 &new_so, CRED())) != 0) {
1342 mutex_enter(&svc->is_mutex);
1343 if (rc != ECONNABORTED && rc != EINTR) {
1344 IDM_SVC_LOG(CE_NOTE, "idm_so_svc_port_watcher:"
1345 " ksocket_accept failed %d", rc);
1348 * Unclean shutdown of this thread is not handled
1349 * wait for !is_thread_running.
1351 continue;
1353 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1354 &ic);
1355 if (idmrc != IDM_STATUS_SUCCESS) {
1356 /* Drop connection */
1357 idm_soshutdown(new_so);
1358 idm_sodestroy(new_so);
1359 mutex_enter(&svc->is_mutex);
1360 continue;
1363 idmrc = idm_so_tgt_conn_create(ic, new_so);
1364 if (idmrc != IDM_STATUS_SUCCESS) {
1365 idm_svc_conn_destroy(ic);
1366 idm_soshutdown(new_so);
1367 idm_sodestroy(new_so);
1368 mutex_enter(&svc->is_mutex);
1369 continue;
1373 * Kick the state machine. At CS_S3_XPT_UP the state machine
1374 * will notify the client (target) about the new connection.
1376 idm_conn_event(ic, CE_CONNECT_ACCEPT, (uintptr_t)NULL);
1378 mutex_enter(&svc->is_mutex);
1380 ksocket_rele(so_svc->is_so);
1381 so_svc->is_thread_running = B_FALSE;
1382 mutex_exit(&svc->is_mutex);
1384 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1385 svc->is_svc_req.sr_port);
1387 thread_exit();
1391 * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1392 * frees resources associated with the task.
1394 * It's not clear that this should return idm_status_t. What do we do
1395 * if it fails?
1397 static idm_status_t
1398 idm_so_free_task_rsrc(idm_task_t *idt)
1400 idm_buf_t *idb, *next_idb;
1403 * There is nothing to cleanup on initiator connections
1405 if (IDM_CONN_ISINI(idt->idt_ic))
1406 return (IDM_STATUS_SUCCESS);
1409 * If this is a target connection, call idm_buf_rx_from_ini_done for
1410 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1412 * In addition, remove any buffers associated with this task from
1413 * the ic_tx_list. We'll do this by walking the idt_inbufv list, but
1414 * items don't actually get removed from that list (and completion
1415 * routines called) until idm_task_cleanup.
1417 mutex_enter(&idt->idt_mutex);
1419 for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1420 next_idb = list_next(&idt->idt_outbufv, idb);
1421 if (idb->idb_in_transport) {
1423 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1425 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1426 uintptr_t, idb->idb_buf,
1427 uint32_t, idb->idb_bufoffset,
1428 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1429 uint32_t, idb->idb_xfer_len,
1430 int, XFER_BUF_RX_FROM_INI);
1431 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1432 mutex_enter(&idt->idt_mutex);
1436 for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1437 next_idb = list_next(&idt->idt_inbufv, idb);
1439 * We want to remove these items from the tx_list as well,
1440 * but knowing it's in the idt_inbufv list is not a guarantee
1441 * that it's in the tx_list. If it's on the tx list then
1442 * let idm_sotx_thread() clean it up.
1444 if (idb->idb_in_transport && !idb->idb_tx_thread) {
1446 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1448 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1449 uintptr_t, idb->idb_buf,
1450 uint32_t, idb->idb_bufoffset,
1451 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1452 uint32_t, idb->idb_xfer_len,
1453 int, XFER_BUF_TX_TO_INI);
1454 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1455 mutex_enter(&idt->idt_mutex);
1459 mutex_exit(&idt->idt_mutex);
1461 return (IDM_STATUS_SUCCESS);
1465 * idm_so_negotiate_key_values() validates the key values for this connection
1467 /* ARGSUSED */
1468 static kv_status_t
1469 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1470 nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1472 /* All parameters are negotiated at the iscsit level */
1473 return (KV_HANDLED);
1477 * idm_so_notice_key_values() activates the negotiated key values for
1478 * this connection.
1480 static void
1481 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1483 char *nvp_name;
1484 nvpair_t *nvp;
1485 nvpair_t *next_nvp;
1486 int nvrc;
1487 idm_status_t idm_status;
1488 const idm_kv_xlate_t *ikvx;
1489 uint64_t num_val;
1491 for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1492 nvp != NULL; nvp = next_nvp) {
1493 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1494 nvp_name = nvpair_name(nvp);
1496 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1497 switch (ikvx->ik_key_id) {
1498 case KI_HEADER_DIGEST:
1499 case KI_DATA_DIGEST:
1500 idm_status = idm_so_handle_digest(it, nvp, ikvx);
1501 ASSERT(idm_status == 0);
1503 /* Remove processed item from negotiated_nvl list */
1504 nvrc = nvlist_remove_all(
1505 negotiated_nvl, ikvx->ik_key_name);
1506 ASSERT(nvrc == 0);
1507 break;
1508 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1510 * Just pass the value down to idm layer.
1511 * No need to remove it from negotiated_nvl list here.
1513 nvrc = nvpair_value_uint64(nvp, &num_val);
1514 ASSERT(nvrc == 0);
1515 it->ic_conn_params.max_xmit_dataseglen =
1516 (uint32_t)num_val;
1517 break;
1518 default:
1519 break;
1525 * idm_so_declare_key_values() declares the key values for this connection
1527 /* ARGSUSED */
1528 static kv_status_t
1529 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1530 nvlist_t *outgoing_nvl)
1532 char *nvp_name;
1533 nvpair_t *nvp;
1534 nvpair_t *next_nvp;
1535 kv_status_t kvrc;
1536 int nvrc = 0;
1537 const idm_kv_xlate_t *ikvx;
1538 uint64_t num_val;
1540 for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1541 nvp != NULL && nvrc == 0; nvp = next_nvp) {
1542 next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1543 nvp_name = nvpair_name(nvp);
1545 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1546 switch (ikvx->ik_key_id) {
1547 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1548 if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1549 break;
1551 if (outgoing_nvl &&
1552 (nvrc = nvlist_add_uint64(outgoing_nvl,
1553 nvp_name, num_val)) != 0) {
1554 break;
1556 it->ic_conn_params.max_recv_dataseglen =
1557 (uint32_t)num_val;
1558 break;
1559 default:
1560 break;
1563 kvrc = idm_nvstat_to_kvstat(nvrc);
1564 return (kvrc);
1567 static idm_status_t
1568 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1569 const idm_kv_xlate_t *ikvx)
1571 int nvrc;
1572 char *digest_choice_string;
1574 nvrc = nvpair_value_string(digest_choice,
1575 &digest_choice_string);
1576 ASSERT(nvrc == 0);
1577 if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1578 switch (ikvx->ik_key_id) {
1579 case KI_HEADER_DIGEST:
1580 it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1581 break;
1582 case KI_DATA_DIGEST:
1583 it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1584 break;
1585 default:
1586 ASSERT(0);
1587 break;
1589 } else if (strcasecmp(digest_choice_string, "none") == 0) {
1590 switch (ikvx->ik_key_id) {
1591 case KI_HEADER_DIGEST:
1592 it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1593 break;
1594 case KI_DATA_DIGEST:
1595 it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1596 break;
1597 default:
1598 ASSERT(0);
1599 break;
1601 } else {
1602 ASSERT(0);
1605 return (IDM_STATUS_SUCCESS);
1610 * idm_so_conn_is_capable() verifies that the passed connection is provided
1611 * for by the sockets interface.
1613 /* ARGSUSED */
1614 static boolean_t
1615 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1617 return (B_TRUE);
1621 * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1622 * idm_sorecv_scsidata() function invoked earlier actually reads the data
1623 * off the socket into the appropriate buffers.
1625 static void
1626 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1628 iscsi_data_hdr_t *bhs;
1629 idm_task_t *idt;
1630 idm_buf_t *idb;
1631 uint32_t datasn;
1632 size_t offset;
1633 iscsi_hdr_t *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1634 iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1636 ASSERT(ic != NULL);
1637 ASSERT(pdu != NULL);
1638 ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP);
1640 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1641 datasn = ntohl(bhs->datasn);
1642 offset = ntohl(bhs->offset);
1645 * Look up the task corresponding to the initiator task tag
1646 * to get the buffers affiliated with the task.
1648 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1649 if (idt == NULL) {
1650 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1651 idm_pdu_rx_protocol_error(ic, pdu);
1652 return;
1655 idb = pdu->isp_sorx_buf;
1656 if (idb == NULL) {
1657 IDM_CONN_LOG(CE_WARN,
1658 "idm_so_rx_datain: failed to find buffer");
1659 idm_task_rele(idt);
1660 idm_pdu_rx_protocol_error(ic, pdu);
1661 return;
1665 * DataSN values should be sequential and should not have any gaps or
1666 * repetitions. Check the DataSN with the one stored in the task.
1668 if (datasn == idt->idt_exp_datasn) {
1669 idt->idt_exp_datasn++; /* keep track of DataSN received */
1670 } else {
1671 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1672 idm_task_rele(idt);
1673 idm_pdu_rx_protocol_error(ic, pdu);
1674 return;
1678 * PDUs in a sequence should be in continuously increasing
1679 * address offset
1681 if (offset != idb->idb_exp_offset) {
1682 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1683 idm_task_rele(idt);
1684 idm_pdu_rx_protocol_error(ic, pdu);
1685 return;
1687 /* Expected next relative buffer offset */
1688 idb->idb_exp_offset += n2h24(bhs->dlength);
1689 idt->idt_rx_bytes += n2h24(bhs->dlength);
1691 idm_task_rele(idt);
1694 * For now call scsi_rsp which will process the data rsp
1695 * Revisit, need to provide an explicit client entry point for
1696 * phase collapse completions.
1698 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) &&
1699 (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1700 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1703 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1707 * The idm_so_rx_dataout() function is used by the iSCSI target to read
1708 * data from the Data-Out PDU sent by the iSCSI initiator.
1710 * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1711 * task to get the buffers associated with the PDU. A PDU might span buffers.
1712 * The data is then read into the respective buffer.
1714 static void
1715 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1718 iscsi_data_hdr_t *bhs;
1719 idm_task_t *idt;
1720 idm_buf_t *idb;
1721 size_t offset;
1723 ASSERT(ic != NULL);
1724 ASSERT(pdu != NULL);
1725 ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA);
1727 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1728 offset = ntohl(bhs->offset);
1731 * Look up the task corresponding to the initiator task tag
1732 * to get the buffers affiliated with the task.
1734 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1735 if (idt == NULL) {
1736 IDM_CONN_LOG(CE_WARN,
1737 "idm_so_rx_dataout: failed to find task");
1738 idm_pdu_rx_protocol_error(ic, pdu);
1739 return;
1742 idb = pdu->isp_sorx_buf;
1743 if (idb == NULL) {
1744 IDM_CONN_LOG(CE_WARN,
1745 "idm_so_rx_dataout: failed to find buffer");
1746 idm_task_rele(idt);
1747 idm_pdu_rx_protocol_error(ic, pdu);
1748 return;
1751 /* Keep track of data transferred - check data offsets */
1752 if (offset != idb->idb_exp_offset) {
1753 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1754 "%ld, %d", offset, idb->idb_exp_offset);
1755 idm_task_rele(idt);
1756 idm_pdu_rx_protocol_error(ic, pdu);
1757 return;
1759 /* Expected next relative offset */
1760 idb->idb_exp_offset += ntoh24(bhs->dlength);
1761 idt->idt_rx_bytes += n2h24(bhs->dlength);
1764 * Call the buffer callback when the transfer is complete
1766 * The connection state machine should only abort tasks after
1767 * shutting down the connection so we are assured that there
1768 * won't be a simultaneous attempt to abort this task at the
1769 * same time as we are processing this PDU (due to a connection
1770 * state change).
1772 if (bhs->flags & ISCSI_FLAG_FINAL) {
1774 * We have gotten the last data-message for the current
1775 * transfer. idb_xfer_len represents the data that the
1776 * command intended to transfer, it does not represent the
1777 * actual number of bytes transferred. If we have not
1778 * transferred the expected number of bytes something is
1779 * wrong.
1781 * We have two options, when there is a mismatch, we can
1782 * regard the transfer as invalid -- or we can modify our
1783 * notion of "xfer_len." In order to be as stringent as
1784 * possible, here we regard this transfer as in error; and
1785 * bail out.
1787 if (idb->idb_buflen == idb->idb_xfer_len &&
1788 idb->idb_buflen !=
1789 (idb->idb_exp_offset - idb->idb_bufoffset)) {
1790 printf("idm_so_rx_dataout: incomplete transfer, "
1791 "protocol err");
1792 IDM_CONN_LOG(CE_NOTE,
1793 "idm_so_rx_dataout: incomplete transfer: %ld, %d",
1794 offset, (int)(idb->idb_exp_offset - offset));
1795 idm_task_rele(idt);
1796 idm_pdu_rx_protocol_error(ic, pdu);
1797 return;
1800 * We only want to call idm_buf_rx_from_ini_done once
1801 * per transfer. It's possible that this task has
1802 * already been aborted in which case
1803 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1804 * for each buffer with idb_in_transport==B_TRUE. To
1805 * close this window and ensure that this doesn't happen,
1806 * we'll clear idb->idb_in_transport now while holding
1807 * the task mutex. This is only really an issue for
1808 * SCSI task abort -- if tasks were being aborted because
1809 * of a connection state change the state machine would
1810 * have already stopped the receive thread.
1812 mutex_enter(&idt->idt_mutex);
1815 * Release the task hold here (obtained in idm_task_find)
1816 * because the task may complete synchronously during
1817 * idm_buf_rx_from_ini_done. Since we still have an active
1818 * buffer we know there is at least one additional hold on idt.
1820 idm_task_rele(idt);
1823 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1825 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1826 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1827 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1828 uint32_t, idb->idb_xfer_len,
1829 int, XFER_BUF_RX_FROM_INI);
1830 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1831 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1832 return;
1835 idm_task_rele(idt);
1836 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1840 * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1841 * the R2T PDU sent by the iSCSI target indicating that it is ready to
1842 * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1843 * and looks up the task in the task tree using the itt to get the output
1844 * buffers associated the task. The R2T PDU contains the offset of the
1845 * requested data and the data length. This function then constructs a
1846 * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1847 * PDU is associated with the R2T by the Target Transfer Tag (ttt).
1850 static void
1851 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1853 idm_task_t *idt;
1854 idm_buf_t *idb;
1855 iscsi_rtt_hdr_t *rtt_hdr;
1856 uint32_t data_offset;
1857 uint32_t data_length;
1859 ASSERT(ic != NULL);
1860 ASSERT(pdu != NULL);
1862 rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1863 data_offset = ntohl(rtt_hdr->data_offset);
1864 data_length = ntohl(rtt_hdr->data_length);
1865 idt = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1867 if (idt == NULL) {
1868 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1869 idm_pdu_rx_protocol_error(ic, pdu);
1870 return;
1873 /* Find the buffer bound to the task by the iSCSI initiator */
1874 mutex_enter(&idt->idt_mutex);
1875 idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1876 if (idb == NULL) {
1877 mutex_exit(&idt->idt_mutex);
1878 idm_task_rele(idt);
1879 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1880 idm_pdu_rx_protocol_error(ic, pdu);
1881 return;
1884 /* return buffer contains this data */
1885 if (data_offset + data_length > idb->idb_buflen) {
1886 /* Overflow */
1887 mutex_exit(&idt->idt_mutex);
1888 idm_task_rele(idt);
1889 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1890 "buffer");
1891 idm_pdu_rx_protocol_error(ic, pdu);
1892 return;
1895 idt->idt_r2t_ttt = rtt_hdr->ttt;
1896 idt->idt_exp_datasn = 0;
1898 idm_so_send_rtt_data(ic, idt, idb, data_offset,
1899 ntohl(rtt_hdr->data_length));
1901 * the idt_mutex is released in idm_so_send_rtt_data
1904 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1905 idm_task_rele(idt);
1909 idm_status_t
1910 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1912 uint8_t pad[ISCSI_PAD_WORD_LEN];
1913 int pad_len;
1914 uint32_t data_digest_crc;
1915 uint32_t crc_calculated;
1916 int total_len;
1917 idm_so_conn_t *so_conn;
1919 so_conn = ic->ic_transport_private;
1921 pad_len = ((ISCSI_PAD_WORD_LEN -
1922 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1923 (ISCSI_PAD_WORD_LEN - 1));
1925 ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1927 total_len = pdu->isp_datalen;
1929 if (pad_len) {
1930 pdu->isp_iov[pdu->isp_iovlen].iov_base = (char *)&pad;
1931 pdu->isp_iov[pdu->isp_iovlen].iov_len = pad_len;
1932 total_len += pad_len;
1933 pdu->isp_iovlen++;
1936 /* setup data digest */
1937 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1938 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1939 (char *)&data_digest_crc;
1940 pdu->isp_iov[pdu->isp_iovlen].iov_len =
1941 sizeof (data_digest_crc);
1942 total_len += sizeof (data_digest_crc);
1943 pdu->isp_iovlen++;
1946 pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1948 if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1949 pdu->isp_iovlen, total_len) != 0) {
1950 return (IDM_STATUS_IO);
1953 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1954 crc_calculated = idm_crc32c(pdu->isp_data,
1955 pdu->isp_datalen);
1956 if (pad_len) {
1957 crc_calculated = idm_crc32c_continued((char *)&pad,
1958 pad_len, crc_calculated);
1960 if (crc_calculated != data_digest_crc) {
1961 IDM_CONN_LOG(CE_WARN,
1962 "idm_sorecvdata: "
1963 "CRC error: actual 0x%x, calc 0x%x",
1964 data_digest_crc, crc_calculated);
1966 /* Invalid Data Digest */
1967 return (IDM_STATUS_DATA_DIGEST);
1971 return (IDM_STATUS_SUCCESS);
1975 * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1976 * Data-type PDU header must be read into the idm_pdu_t structure prior to
1977 * calling this function.
1979 idm_status_t
1980 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1982 iscsi_data_hdr_t *bhs;
1983 idm_task_t *task;
1984 uint32_t offset;
1985 uint8_t opcode;
1986 uint32_t dlength;
1987 list_t *buflst;
1988 uint32_t xfer_bytes;
1989 idm_status_t status;
1991 ASSERT(ic != NULL);
1992 ASSERT(pdu != NULL);
1994 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1996 offset = ntohl(bhs->offset);
1997 opcode = IDM_PDU_OPCODE(pdu);
1998 dlength = n2h24(bhs->dlength);
2000 ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
2001 (opcode == ISCSI_OP_SCSI_DATA));
2004 * Successful lookup implicitly gets a "hold" on the task. This
2005 * hold must be released before leaving this function. At one
2006 * point we were caching this task context and retaining the hold
2007 * but it turned out to be very difficult to release the hold properly.
2008 * The task can be aborted and the connection shutdown between this
2009 * call and the subsequent expected call to idm_so_rx_datain/
2010 * idm_so_rx_dataout (in which case those functions are not called).
2011 * Releasing the hold in the PDU callback doesn't work well either
2012 * because the whole task may be completed by then at which point
2013 * it is too late to release the hold -- for better or worse this
2014 * code doesn't wait on the refcnts during normal operation.
2015 * idm_task_find() is very fast and it is not a huge burden if we
2016 * have to do it twice.
2018 task = idm_task_find(ic, bhs->itt, bhs->ttt);
2019 if (task == NULL) {
2020 IDM_CONN_LOG(CE_WARN,
2021 "idm_sorecv_scsidata: could not find task");
2022 return (IDM_STATUS_FAIL);
2025 mutex_enter(&task->idt_mutex);
2026 buflst = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
2027 &task->idt_inbufv : &task->idt_outbufv;
2028 pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
2029 mutex_exit(&task->idt_mutex);
2031 if (pdu->isp_sorx_buf == NULL) {
2032 idm_task_rele(task);
2033 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
2034 "buffer for offset %x opcode=%x",
2035 offset, opcode);
2036 return (IDM_STATUS_FAIL);
2039 xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
2040 ASSERT(xfer_bytes != 0);
2041 if (xfer_bytes != dlength) {
2042 idm_task_rele(task);
2044 * Buffer overflow, connection error. The PDU data is still
2045 * sitting in the socket so we can't use the connection
2046 * again until that data is drained.
2048 return (IDM_STATUS_FAIL);
2051 status = idm_sorecvdata(ic, pdu);
2053 idm_task_rele(task);
2055 return (status);
2058 static uint32_t
2059 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
2061 uint32_t buf_ro = ro - idb->idb_bufoffset;
2062 uint32_t xfer_len = min(dlength, idb->idb_buflen - buf_ro);
2064 ASSERT(ro >= idb->idb_bufoffset);
2066 pdu->isp_iov[pdu->isp_iovlen].iov_base =
2067 (caddr_t)idb->idb_buf + buf_ro;
2068 pdu->isp_iov[pdu->isp_iovlen].iov_len = xfer_len;
2069 pdu->isp_iovlen++;
2071 return (xfer_len);
2075 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
2077 pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
2078 ASSERT(pdu->isp_data != NULL);
2080 pdu->isp_databuflen = pdu->isp_datalen;
2081 pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
2082 pdu->isp_iov[0].iov_len = pdu->isp_datalen;
2083 pdu->isp_iovlen = 1;
2085 * Since we are associating a new data buffer with this received
2086 * PDU we need to set a specific callback to free the data
2087 * after the PDU is processed.
2089 pdu->isp_flags |= IDM_PDU_ADDL_DATA;
2090 pdu->isp_callback = idm_sorx_addl_pdu_cb;
2092 return (idm_sorecvdata(ic, pdu));
2095 void
2096 idm_sorx_thread(void *arg)
2098 boolean_t conn_failure = B_FALSE;
2099 idm_conn_t *ic = (idm_conn_t *)arg;
2100 idm_so_conn_t *so_conn;
2101 idm_pdu_t *pdu;
2102 idm_status_t rc;
2104 idm_conn_hold(ic);
2106 mutex_enter(&ic->ic_mutex);
2108 so_conn = ic->ic_transport_private;
2109 so_conn->ic_rx_thread_running = B_TRUE;
2110 so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
2111 cv_signal(&ic->ic_cv);
2113 while (so_conn->ic_rx_thread_running) {
2114 mutex_exit(&ic->ic_mutex);
2117 * Get PDU with default header size (large enough for
2118 * BHS plus any anticipated AHS). PDU from
2119 * the cache will have all values set correctly
2120 * for sockets RX including callback.
2122 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2123 pdu->isp_ic = ic;
2124 pdu->isp_flags = 0;
2125 pdu->isp_transport_hdrlen = 0;
2127 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2129 * Call idm_pdu_complete so that we call the callback
2130 * and ensure any memory allocated in idm_sorecvhdr
2131 * gets freed up.
2133 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2136 * If ic_rx_thread_running is still set then
2137 * this is some kind of connection problem
2138 * on the socket. In this case we want to
2139 * generate an event. Otherwise some other
2140 * thread closed the socket due to another
2141 * issue in which case we don't need to
2142 * generate an event.
2144 mutex_enter(&ic->ic_mutex);
2145 if (so_conn->ic_rx_thread_running) {
2146 conn_failure = B_TRUE;
2147 so_conn->ic_rx_thread_running = B_FALSE;
2150 continue;
2154 * Header has been read and validated. Now we need
2155 * to read the PDU data payload (if present). SCSI data
2156 * need to be transferred from the socket directly into
2157 * the associated transfer buffer for the SCSI task.
2159 if (pdu->isp_datalen != 0) {
2160 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2161 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2162 rc = idm_sorecv_scsidata(ic, pdu);
2164 * All SCSI errors are fatal to the
2165 * connection right now since we have no
2166 * place to put the data. What we need
2167 * is some kind of sink to dispose of unwanted
2168 * SCSI data. For example an invalid task tag
2169 * should not kill the connection (although
2170 * we may want to drop the connection).
2172 } else {
2174 * Not data PDUs so allocate a buffer for the
2175 * data segment and read the remaining data.
2177 rc = idm_sorecv_nonscsidata(ic, pdu);
2179 if (rc != 0) {
2181 * Call idm_pdu_complete so that we call the
2182 * callback and ensure any memory allocated
2183 * in idm_sorecvhdr gets freed up.
2185 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2188 * If ic_rx_thread_running is still set then
2189 * this is some kind of connection problem
2190 * on the socket. In this case we want to
2191 * generate an event. Otherwise some other
2192 * thread closed the socket due to another
2193 * issue in which case we don't need to
2194 * generate an event.
2196 mutex_enter(&ic->ic_mutex);
2197 if (so_conn->ic_rx_thread_running) {
2198 conn_failure = B_TRUE;
2199 so_conn->ic_rx_thread_running = B_FALSE;
2201 continue;
2206 * Process RX PDU
2208 idm_pdu_rx(ic, pdu);
2210 mutex_enter(&ic->ic_mutex);
2213 mutex_exit(&ic->ic_mutex);
2216 * If we dropped out of the RX processing loop because of
2217 * a socket problem or other connection failure (including
2218 * digest errors) then we need to generate a state machine
2219 * event to shut the connection down.
2220 * If the state machine is already in, for example, INIT_ERROR, this
2221 * event will get dropped, and the TX thread will never be notified
2222 * to shut down. To be safe, we'll just notify it here.
2224 if (conn_failure) {
2225 if (so_conn->ic_tx_thread_running) {
2226 so_conn->ic_tx_thread_running = B_FALSE;
2227 mutex_enter(&so_conn->ic_tx_mutex);
2228 cv_signal(&so_conn->ic_tx_cv);
2229 mutex_exit(&so_conn->ic_tx_mutex);
2232 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2235 idm_conn_rele(ic);
2237 thread_exit();
2241 * idm_so_tx
2243 * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2244 * point. By definition, it is supposed to be fast. So, simply queue
2245 * the entry and return. The real work is done by idm_i_so_tx() via
2246 * idm_sotx_thread().
2249 static void
2250 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2252 idm_so_conn_t *so_conn = ic->ic_transport_private;
2254 ASSERT(pdu->isp_ic == ic);
2255 mutex_enter(&so_conn->ic_tx_mutex);
2257 if (!so_conn->ic_tx_thread_running) {
2258 mutex_exit(&so_conn->ic_tx_mutex);
2259 idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2260 return;
2263 list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2264 cv_signal(&so_conn->ic_tx_cv);
2265 mutex_exit(&so_conn->ic_tx_mutex);
2268 static idm_status_t
2269 idm_i_so_tx(idm_pdu_t *pdu)
2271 idm_conn_t *ic = pdu->isp_ic;
2272 idm_status_t status = IDM_STATUS_SUCCESS;
2273 uint8_t pad[ISCSI_PAD_WORD_LEN];
2274 int pad_len;
2275 uint32_t hdr_digest_crc;
2276 uint32_t data_digest_crc = 0;
2277 int total_len = 0;
2278 int iovlen = 0;
2279 struct iovec iov[6];
2280 idm_so_conn_t *so_conn;
2282 so_conn = ic->ic_transport_private;
2284 /* Setup BHS */
2285 iov[iovlen].iov_base = (caddr_t)pdu->isp_hdr;
2286 iov[iovlen].iov_len = pdu->isp_hdrlen;
2287 total_len += iov[iovlen].iov_len;
2288 iovlen++;
2290 /* Setup header digest */
2291 if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2292 (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2293 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2295 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
2296 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
2297 total_len += iov[iovlen].iov_len;
2298 iovlen++;
2301 /* Setup the data */
2302 if (pdu->isp_datalen) {
2303 idm_task_t *idt;
2304 idm_buf_t *idb;
2305 iscsi_data_hdr_t *ihp;
2306 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2307 /* Write of immediate data */
2308 if (ic->ic_ffp &&
2309 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_CMD ||
2310 IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA)) {
2311 idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2312 if (idt) {
2313 mutex_enter(&idt->idt_mutex);
2314 idb = idm_buf_find(&idt->idt_outbufv, 0);
2315 mutex_exit(&idt->idt_mutex);
2317 * If the initiator call to idm_buf_alloc
2318 * failed then we can get to this point
2319 * without a bound buffer. The associated
2320 * connection failure will clean things up
2321 * later. It would be nice to come up with
2322 * a cleaner way to handle this. In
2323 * particular it seems absurd to look up
2324 * the task and the buffer just to update
2325 * this counter.
2327 if (idb)
2328 idb->idb_xfer_len += pdu->isp_datalen;
2329 idm_task_rele(idt);
2333 iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2334 iov[iovlen].iov_len = pdu->isp_datalen;
2335 total_len += iov[iovlen].iov_len;
2336 iovlen++;
2339 /* Setup the data pad if necessary */
2340 pad_len = ((ISCSI_PAD_WORD_LEN -
2341 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2342 (ISCSI_PAD_WORD_LEN - 1));
2344 if (pad_len) {
2345 bzero(pad, sizeof (pad));
2346 iov[iovlen].iov_base = (void *)&pad;
2347 iov[iovlen].iov_len = pad_len;
2348 total_len += iov[iovlen].iov_len;
2349 iovlen++;
2353 * Setup the data digest if enabled. Data-digest is not sent
2354 * for login-phase PDUs.
2356 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2357 ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2358 (pdu->isp_datalen || pad_len)) {
2360 * RFC3720/10.2.3: A zero-length Data Segment also
2361 * implies a zero-length data digest.
2363 if (pdu->isp_datalen) {
2364 data_digest_crc = idm_crc32c(pdu->isp_data,
2365 pdu->isp_datalen);
2367 if (pad_len) {
2368 data_digest_crc = idm_crc32c_continued(&pad,
2369 pad_len, data_digest_crc);
2372 iov[iovlen].iov_base = (caddr_t)&data_digest_crc;
2373 iov[iovlen].iov_len = sizeof (data_digest_crc);
2374 total_len += iov[iovlen].iov_len;
2375 iovlen++;
2378 /* Transmit the PDU */
2379 if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2380 total_len) != 0) {
2381 /* Set error status */
2382 IDM_CONN_LOG(CE_WARN,
2383 "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2384 "data: %p", (void *) so_conn->ic_so, (void *) ic,
2385 (void *) pdu->isp_data);
2386 status = IDM_STATUS_IO;
2390 * Success does not mean that the PDU actually reached the
2391 * remote node since it could get dropped along the way.
2393 idm_pdu_complete(pdu, status);
2395 return (status);
2399 * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2400 * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2401 * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2402 * A target can invoke this function multiple times for a single read command
2403 * (identified by the same ITT) to split the input into several sequences.
2405 * DataSN starts with 0 for the first data PDU of an input command and advances
2406 * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2407 * which is set to 1 for the last data PDU of a sequence.
2408 * If the initiator supports phase collapse, the status bit must be set along
2409 * with the F bit to indicate that the status is shipped together with the last
2410 * Data-In PDU.
2412 * The data PDUs within a sequence will be sent in order with the buffer offset
2413 * in increasing order. i.e. initiator and target must have negotiated the
2414 * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2416 * Caller holds idt->idt_mutex
2418 static idm_status_t
2419 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2421 idm_so_conn_t *so_conn = idb->idb_ic->ic_transport_private;
2422 idm_pdu_t tmppdu;
2424 ASSERT(mutex_owned(&idt->idt_mutex));
2427 * Put the idm_buf_t on the tx queue. It will be transmitted by
2428 * idm_sotx_thread.
2430 mutex_enter(&so_conn->ic_tx_mutex);
2432 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2433 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2434 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2435 uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2437 if (!so_conn->ic_tx_thread_running) {
2438 mutex_exit(&so_conn->ic_tx_mutex);
2440 * Don't release idt->idt_mutex since we're supposed to hold
2441 * in when calling idm_buf_tx_to_ini_done
2443 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2444 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2445 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2446 uint32_t, idb->idb_xfer_len,
2447 int, XFER_BUF_TX_TO_INI);
2448 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2449 return (IDM_STATUS_FAIL);
2453 * Build a template for the data PDU headers we will use so that
2454 * the SN values will stay consistent with other PDU's we are
2455 * transmitting like R2T and SCSI status.
2457 bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2458 tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2459 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2460 ISCSI_OP_SCSI_DATA_RSP);
2461 idb->idb_tx_thread = B_TRUE;
2462 list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2463 cv_signal(&so_conn->ic_tx_cv);
2464 mutex_exit(&so_conn->ic_tx_mutex);
2465 mutex_exit(&idt->idt_mutex);
2468 * Returning success here indicates the transfer was successfully
2469 * dispatched -- it does not mean that the transfer completed
2470 * successfully.
2472 return (IDM_STATUS_SUCCESS);
2476 * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2477 * data blocks it is ready to receive from the initiator in response to a WRITE
2478 * SCSI command. The target iSCSI layer passes the information about the desired
2479 * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2480 * offset and datalen are passed via the 'idb' argument.
2482 * Scope for Prototype build:
2483 * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2484 * negotiated the "InitialR2T" to "Yes".
2486 * Caller holds idt->idt_mutex
2488 static idm_status_t
2489 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2491 idm_pdu_t *pdu;
2492 iscsi_rtt_hdr_t *rtt;
2494 ASSERT(mutex_owned(&idt->idt_mutex));
2496 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2497 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2498 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2499 uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2501 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2502 pdu->isp_ic = idt->idt_ic;
2503 pdu->isp_flags = IDM_PDU_SET_STATSN;
2504 bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2506 /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2507 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2509 /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2510 rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2512 rtt->opcode = ISCSI_OP_RTT_RSP;
2513 rtt->flags = ISCSI_FLAG_FINAL;
2514 rtt->data_offset = htonl(idb->idb_bufoffset);
2515 rtt->data_length = htonl(idb->idb_xfer_len);
2516 rtt->rttsn = htonl(idt->idt_exp_rttsn++);
2518 /* Keep track of buffer offsets */
2519 idb->idb_exp_offset = idb->idb_bufoffset;
2520 mutex_exit(&idt->idt_mutex);
2523 * Transmit the PDU.
2525 idm_pdu_tx(pdu);
2527 return (IDM_STATUS_SUCCESS);
2530 static idm_status_t
2531 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2533 if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2534 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2535 KM_NOSLEEP);
2536 idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2537 } else {
2538 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2539 idb->idb_buf_private = NULL;
2542 if (idb->idb_buf == NULL) {
2543 IDM_CONN_LOG(CE_NOTE,
2544 "idm_so_buf_alloc: failed buffer allocation");
2545 return (IDM_STATUS_FAIL);
2548 return (IDM_STATUS_SUCCESS);
2551 /* ARGSUSED */
2552 static idm_status_t
2553 idm_so_buf_setup(idm_buf_t *idb)
2555 /* Ensure bufalloc'd flag is unset */
2556 idb->idb_bufalloc = B_FALSE;
2558 return (IDM_STATUS_SUCCESS);
2561 /* ARGSUSED */
2562 static void
2563 idm_so_buf_teardown(idm_buf_t *idb)
2565 /* nothing to do here */
2568 static void
2569 idm_so_buf_free(idm_buf_t *idb)
2571 if (idb->idb_buf_private == NULL) {
2572 kmem_free(idb->idb_buf, idb->idb_buflen);
2573 } else {
2574 kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2578 static void
2579 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2580 uint32_t offset, uint32_t length)
2582 idm_so_conn_t *so_conn = ic->ic_transport_private;
2583 idm_pdu_t tmppdu;
2584 idm_buf_t *rtt_buf;
2586 ASSERT(mutex_owned(&idt->idt_mutex));
2589 * Allocate a buffer to represent the RTT transfer. We could further
2590 * optimize this by allocating the buffers internally from an rtt
2591 * specific buffer cache since this is socket-specific code but for
2592 * now we will keep it simple.
2594 rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2595 if (rtt_buf == NULL) {
2597 * If we're in FFP then the failure was likely a resource
2598 * allocation issue and we should close the connection by
2599 * sending a CE_TRANSPORT_FAIL event.
2601 * If we're not in FFP then idm_buf_alloc will always
2602 * fail and the state is transitioning to "complete" anyway
2603 * so we won't bother to send an event.
2605 mutex_enter(&ic->ic_state_mutex);
2606 if (ic->ic_ffp)
2607 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2608 (uintptr_t)NULL, CT_NONE);
2609 mutex_exit(&ic->ic_state_mutex);
2610 mutex_exit(&idt->idt_mutex);
2611 return;
2614 rtt_buf->idb_buf_cb = NULL;
2615 rtt_buf->idb_cb_arg = NULL;
2616 rtt_buf->idb_bufoffset = offset;
2617 rtt_buf->idb_xfer_len = length;
2618 rtt_buf->idb_ic = idt->idt_ic;
2619 rtt_buf->idb_task_binding = idt;
2622 * The new buffer (if any) represents an additional
2623 * reference on the task
2625 idm_task_hold(idt);
2626 mutex_exit(&idt->idt_mutex);
2629 * Put the idm_buf_t on the tx queue. It will be transmitted by
2630 * idm_sotx_thread.
2632 mutex_enter(&so_conn->ic_tx_mutex);
2634 if (!so_conn->ic_tx_thread_running) {
2635 idm_buf_free(rtt_buf);
2636 mutex_exit(&so_conn->ic_tx_mutex);
2637 idm_task_rele(idt);
2638 return;
2642 * Build a template for the data PDU headers we will use so that
2643 * the SN values will stay consistent with other PDU's we are
2644 * transmitting like R2T and SCSI status.
2646 bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2647 tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2648 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2649 ISCSI_OP_SCSI_DATA);
2650 rtt_buf->idb_tx_thread = B_TRUE;
2651 rtt_buf->idb_in_transport = B_TRUE;
2652 list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2653 cv_signal(&so_conn->ic_tx_cv);
2654 mutex_exit(&so_conn->ic_tx_mutex);
2657 static void
2658 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2661 * Don't worry about status -- we assume any error handling
2662 * is performed by the caller (idm_sotx_thread).
2664 idb->idb_in_transport = B_FALSE;
2665 idm_task_rele(idt);
2666 idm_buf_free(idb);
2669 static idm_status_t
2670 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2671 uint32_t buf_region_offset, uint32_t buf_region_length)
2673 idm_conn_t *ic;
2674 uint32_t max_dataseglen;
2675 size_t remainder, chunk;
2676 uint32_t data_offset = buf_region_offset;
2677 iscsi_data_hdr_t *bhs;
2678 idm_pdu_t *pdu;
2679 idm_status_t tx_status;
2681 ASSERT(mutex_owned(&idt->idt_mutex));
2683 ic = idt->idt_ic;
2685 max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2686 remainder = buf_region_length;
2688 while (remainder) {
2689 if (idt->idt_state != TASK_ACTIVE) {
2690 ASSERT((idt->idt_state != TASK_IDLE) &&
2691 (idt->idt_state != TASK_COMPLETE));
2692 return (IDM_STATUS_ABORTED);
2695 /* check to see if we need to chunk the data */
2696 if (remainder > max_dataseglen) {
2697 chunk = max_dataseglen;
2698 } else {
2699 chunk = remainder;
2702 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2703 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2704 pdu->isp_ic = ic;
2705 pdu->isp_flags = 0; /* initialize isp_flags */
2708 * We've already built a build a header template
2709 * to use during the transfer. Use this template so that
2710 * the SN values stay consistent with any unrelated PDU's
2711 * being transmitted.
2713 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2714 sizeof (iscsi_hdr_t));
2717 * Set DataSN, data offset, and flags in BHS
2718 * For the prototype build, A = 0, S = 0, U = 0
2720 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2722 bhs->datasn = htonl(idt->idt_exp_datasn++);
2724 hton24(bhs->dlength, chunk);
2725 bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2727 /* setup data */
2728 pdu->isp_data = (uint8_t *)idb->idb_buf + data_offset;
2729 pdu->isp_datalen = (uint_t)chunk;
2731 if (chunk == remainder) {
2732 bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2733 /* Piggyback the status with the last data PDU */
2734 if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2735 pdu->isp_flags |= IDM_PDU_SET_STATSN |
2736 IDM_PDU_ADVANCE_STATSN;
2737 (*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2738 (idt, pdu);
2739 idt->idt_flags |=
2740 IDM_TASK_PHASECOLLAPSE_SUCCESS;
2745 remainder -= chunk;
2746 data_offset += chunk;
2748 /* Instrument the data-send DTrace probe. */
2749 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2750 DTRACE_ISCSI_2(data__send,
2751 idm_conn_t *, idt->idt_ic,
2752 iscsi_data_rsp_hdr_t *,
2753 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2757 * Now that we're done working with idt_exp_datasn,
2758 * idt->idt_state and idb->idb_bufoffset we can release
2759 * the task lock -- don't want to hold it across the
2760 * call to idm_i_so_tx since we could block.
2762 mutex_exit(&idt->idt_mutex);
2765 * Transmit the PDU. Call the internal routine directly
2766 * as there is already implicit ordering.
2768 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2769 mutex_enter(&idt->idt_mutex);
2770 return (tx_status);
2773 mutex_enter(&idt->idt_mutex);
2774 idt->idt_tx_bytes += chunk;
2777 return (IDM_STATUS_SUCCESS);
2781 * TX PDU cache
2783 /* ARGSUSED */
2785 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2787 idm_pdu_t *pdu = hdl;
2789 bzero(pdu, sizeof (idm_pdu_t));
2790 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2791 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2792 pdu->isp_callback = idm_sotx_cache_pdu_cb;
2793 pdu->isp_magic = IDM_PDU_MAGIC;
2794 bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2796 return (0);
2799 /* ARGSUSED */
2800 void
2801 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2803 /* reset values between use */
2804 pdu->isp_datalen = 0;
2806 kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2810 * RX PDU cache
2812 /* ARGSUSED */
2814 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2816 idm_pdu_t *pdu = hdl;
2818 bzero(pdu, sizeof (idm_pdu_t));
2819 pdu->isp_magic = IDM_PDU_MAGIC;
2820 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2821 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2823 return (0);
2826 /* ARGSUSED */
2827 static void
2828 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2830 pdu->isp_iovlen = 0;
2831 pdu->isp_sorx_buf = 0;
2832 kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2835 static void
2836 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2839 * We had to modify our cached RX PDU with a longer header buffer
2840 * and/or a longer data buffer. Release the new buffers and fix
2841 * the fields back to what we would expect for a cached RX PDU.
2843 if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2844 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2846 if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2847 kmem_free(pdu->isp_data, pdu->isp_datalen);
2849 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2850 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2851 pdu->isp_data = NULL;
2852 pdu->isp_datalen = 0;
2853 pdu->isp_sorx_buf = 0;
2854 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2855 idm_sorx_cache_pdu_cb(pdu, status);
2859 * This thread is only active when I/O is queued for transmit
2860 * because the socket is busy.
2862 void
2863 idm_sotx_thread(void *arg)
2865 idm_conn_t *ic = arg;
2866 idm_tx_obj_t *object, *next;
2867 idm_so_conn_t *so_conn;
2868 idm_status_t status = IDM_STATUS_SUCCESS;
2870 idm_conn_hold(ic);
2872 mutex_enter(&ic->ic_mutex);
2873 so_conn = ic->ic_transport_private;
2874 so_conn->ic_tx_thread_running = B_TRUE;
2875 so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2876 cv_signal(&ic->ic_cv);
2877 mutex_exit(&ic->ic_mutex);
2879 mutex_enter(&so_conn->ic_tx_mutex);
2881 while (so_conn->ic_tx_thread_running) {
2882 while (list_is_empty(&so_conn->ic_tx_list)) {
2883 DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2884 cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2885 DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2887 if (!so_conn->ic_tx_thread_running) {
2888 goto tx_bail;
2892 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2893 list_remove(&so_conn->ic_tx_list, object);
2894 mutex_exit(&so_conn->ic_tx_mutex);
2896 switch (object->idm_tx_obj_magic) {
2897 case IDM_PDU_MAGIC: {
2898 idm_pdu_t *pdu = (idm_pdu_t *)object;
2899 DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2900 idm_pdu_t *, (idm_pdu_t *)object);
2902 if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2903 /* No IDM task */
2904 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2906 status = idm_i_so_tx((idm_pdu_t *)object);
2907 break;
2909 case IDM_BUF_MAGIC: {
2910 idm_buf_t *idb = (idm_buf_t *)object;
2911 idm_task_t *idt = idb->idb_task_binding;
2913 DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2914 idm_buf_t *, idb);
2916 mutex_enter(&idt->idt_mutex);
2917 status = idm_so_send_buf_region(idt,
2918 idb, 0, idb->idb_xfer_len);
2921 * TX thread owns the buffer so we expect it to
2922 * be "in transport"
2924 ASSERT(idb->idb_in_transport);
2925 if (IDM_CONN_ISTGT(ic)) {
2927 * idm_buf_tx_to_ini_done releases
2928 * idt->idt_mutex
2930 DTRACE_ISCSI_8(xfer__done,
2931 idm_conn_t *, idt->idt_ic,
2932 uintptr_t, idb->idb_buf,
2933 uint32_t, idb->idb_bufoffset,
2934 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2935 uint32_t, idb->idb_xfer_len,
2936 int, XFER_BUF_TX_TO_INI);
2937 idm_buf_tx_to_ini_done(idt, idb, status);
2938 } else {
2939 idm_so_send_rtt_data_done(idt, idb);
2940 mutex_exit(&idt->idt_mutex);
2942 break;
2945 default:
2946 IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2947 "(0x%08x)", object->idm_tx_obj_magic);
2948 status = IDM_STATUS_FAIL;
2951 mutex_enter(&so_conn->ic_tx_mutex);
2953 if (status != IDM_STATUS_SUCCESS) {
2954 so_conn->ic_tx_thread_running = B_FALSE;
2955 idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2960 * Before we leave, we need to abort every item remaining in the
2961 * TX list.
2964 tx_bail:
2965 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2967 while (object != NULL) {
2968 next = list_next(&so_conn->ic_tx_list, object);
2970 list_remove(&so_conn->ic_tx_list, object);
2971 switch (object->idm_tx_obj_magic) {
2972 case IDM_PDU_MAGIC:
2973 idm_pdu_complete((idm_pdu_t *)object,
2974 IDM_STATUS_ABORTED);
2975 break;
2977 case IDM_BUF_MAGIC: {
2978 idm_buf_t *idb = (idm_buf_t *)object;
2979 idm_task_t *idt = idb->idb_task_binding;
2980 mutex_exit(&so_conn->ic_tx_mutex);
2981 mutex_enter(&idt->idt_mutex);
2983 * TX thread owns the buffer so we expect it to
2984 * be "in transport"
2986 ASSERT(idb->idb_in_transport);
2987 if (IDM_CONN_ISTGT(ic)) {
2989 * idm_buf_tx_to_ini_done releases
2990 * idt->idt_mutex
2992 DTRACE_ISCSI_8(xfer__done,
2993 idm_conn_t *, idt->idt_ic,
2994 uintptr_t, idb->idb_buf,
2995 uint32_t, idb->idb_bufoffset,
2996 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2997 uint32_t, idb->idb_xfer_len,
2998 int, XFER_BUF_TX_TO_INI);
2999 idm_buf_tx_to_ini_done(idt, idb,
3000 IDM_STATUS_ABORTED);
3001 } else {
3002 idm_so_send_rtt_data_done(idt, idb);
3003 mutex_exit(&idt->idt_mutex);
3005 mutex_enter(&so_conn->ic_tx_mutex);
3006 break;
3008 default:
3009 IDM_CONN_LOG(CE_WARN,
3010 "idm_sotx_thread: Unexpected magic "
3011 "(0x%08x)", object->idm_tx_obj_magic);
3014 object = next;
3017 mutex_exit(&so_conn->ic_tx_mutex);
3018 idm_conn_rele(ic);
3019 thread_exit();
3020 /*NOTREACHED*/
3023 static void
3024 idm_so_socket_set_nonblock(struct sonode *node)
3026 (void) fop_setfl(node->so_vnode, node->so_flag,
3027 (node->so_state | FNONBLOCK), CRED(), NULL);
3030 static void
3031 idm_so_socket_set_block(struct sonode *node)
3033 (void) fop_setfl(node->so_vnode, node->so_flag,
3034 (node->so_state & (~FNONBLOCK)), CRED(), NULL);
3039 * Called by kernel sockets when the connection has been accepted or
3040 * rejected. In early volo, a "disconnect" callback was sent instead of
3041 * "connectfailed", so we check for both.
3043 /* ARGSUSED */
3044 void
3045 idm_so_timed_socket_connect_cb(ksocket_t ks,
3046 ksocket_callback_event_t ev, void *arg, uintptr_t info)
3048 idm_so_timed_socket_t *itp = arg;
3049 ASSERT(itp != NULL);
3050 ASSERT(ev == KSOCKET_EV_CONNECTED ||
3051 ev == KSOCKET_EV_CONNECTFAILED ||
3052 ev == KSOCKET_EV_DISCONNECTED);
3054 mutex_enter(&idm_so_timed_socket_mutex);
3055 itp->it_callback_called = B_TRUE;
3056 if (ev == KSOCKET_EV_CONNECTED) {
3057 itp->it_socket_error_code = 0;
3058 } else {
3059 /* Make sure the error code is non-zero on error */
3060 if (info == 0)
3061 info = ECONNRESET;
3062 itp->it_socket_error_code = (int)info;
3064 cv_signal(&itp->it_cv);
3065 mutex_exit(&idm_so_timed_socket_mutex);
3069 idm_so_timed_socket_connect(ksocket_t ks,
3070 struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
3072 clock_t conn_login_max;
3073 int rc, nonblocking, rval;
3074 idm_so_timed_socket_t it;
3075 ksocket_callbacks_t ks_cb;
3077 conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
3080 * Set to non-block socket mode, with callback on connect
3081 * Early volo used "disconnected" instead of "connectfailed",
3082 * so set callback to look for both.
3084 bzero(&it, sizeof (it));
3085 ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
3086 KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
3087 ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
3088 ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
3089 ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
3090 cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
3091 rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
3092 if (rc != 0)
3093 return (rc);
3095 /* Set to non-blocking mode */
3096 nonblocking = 1;
3097 rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3098 CRED());
3099 if (rc != 0)
3100 goto cleanup;
3102 bzero(&it, sizeof (it));
3103 for (;;) {
3105 * Warning -- in a loopback scenario, the call to
3106 * the connect_cb can occur inside the call to
3107 * ksocket_connect. Do not hold the mutex around the
3108 * call to ksocket_connect.
3110 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
3111 if (rc == 0 || rc == EISCONN) {
3112 /* socket success or already success */
3113 rc = 0;
3114 break;
3116 if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3117 break;
3120 /* TCP connect still in progress. See if out of time. */
3121 if (ddi_get_lbolt() > conn_login_max) {
3123 * Connection retry timeout,
3124 * failed connect to target.
3126 rc = ETIMEDOUT;
3127 break;
3131 * TCP connect still in progress. Sleep until callback.
3132 * Do NOT go to sleep if the callback already occurred!
3134 mutex_enter(&idm_so_timed_socket_mutex);
3135 if (!it.it_callback_called) {
3136 (void) cv_timedwait(&it.it_cv,
3137 &idm_so_timed_socket_mutex, conn_login_max);
3139 if (it.it_callback_called) {
3140 rc = it.it_socket_error_code;
3141 mutex_exit(&idm_so_timed_socket_mutex);
3142 break;
3144 /* If timer expires, go call ksocket_connect one last time. */
3145 mutex_exit(&idm_so_timed_socket_mutex);
3148 /* resume blocking mode */
3149 nonblocking = 0;
3150 (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3151 CRED());
3152 cleanup:
3153 (void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3154 cv_destroy(&it.it_cv);
3155 if (rc != 0) {
3156 idm_soshutdown(ks);
3158 return (rc);
3162 void
3163 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3165 int dp_addr_size;
3166 struct sockaddr_in *sin;
3167 struct sockaddr_in6 *sin6;
3169 /* Build sockaddr_storage for this portal (idm_addr_t) */
3170 bzero(sa, sizeof (*sa));
3171 dp_addr_size = dportal->a_addr.i_insize;
3172 if (dp_addr_size == sizeof (struct in_addr)) {
3173 /* IPv4 */
3174 sa->ss_family = AF_INET;
3175 sin = (struct sockaddr_in *)sa;
3176 sin->sin_port = htons(dportal->a_port);
3177 bcopy(&dportal->a_addr.i_addr.in4,
3178 &sin->sin_addr, sizeof (struct in_addr));
3179 } else if (dp_addr_size == sizeof (struct in6_addr)) {
3180 /* IPv6 */
3181 sa->ss_family = AF_INET6;
3182 sin6 = (struct sockaddr_in6 *)sa;
3183 sin6->sin6_port = htons(dportal->a_port);
3184 bcopy(&dportal->a_addr.i_addr.in6,
3185 &sin6->sin6_addr, sizeof (struct in6_addr));
3186 } else {
3187 ASSERT(0);
3193 * return a human-readable form of a sockaddr_storage, in the form
3194 * [ip-address]:port. This is used in calls to logging functions.
3195 * If several calls to idm_sa_ntop are made within the same invocation
3196 * of a logging function, then each one needs its own buf.
3198 const char *
3199 idm_sa_ntop(const struct sockaddr_storage *sa,
3200 char *buf, size_t size)
3202 static const char bogus_ip[] = "[0].-1";
3203 char tmp[INET6_ADDRSTRLEN];
3205 switch (sa->ss_family) {
3206 case AF_INET6: {
3207 const struct sockaddr_in6 *in6 =
3208 (const struct sockaddr_in6 *) sa;
3210 (void) inet_ntop(in6->sin6_family, &in6->sin6_addr, tmp,
3211 sizeof (tmp));
3212 if (strlen(tmp) + sizeof ("[].65535") > size)
3213 goto err;
3214 /* struct sockaddr_storage gets port info from v4 loc */
3215 (void) snprintf(buf, size, "[%s].%u", tmp,
3216 ntohs(in6->sin6_port));
3217 return (buf);
3219 case AF_INET: {
3220 const struct sockaddr_in *in = (const struct sockaddr_in *) sa;
3222 (void) inet_ntop(in->sin_family, &in->sin_addr, tmp,
3223 sizeof (tmp));
3224 if (strlen(tmp) + sizeof ("[].65535") > size)
3225 goto err;
3226 (void) snprintf(buf, size, "[%s].%u", tmp,
3227 ntohs(in->sin_port));
3228 return (buf);
3230 default:
3231 break;
3233 err:
3234 (void) snprintf(buf, size, "%s", bogus_ip);
3235 return (buf);