4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
36 #include <sys/socket_impl.h>
37 #include <sys/taskq_impl.h>
42 * idm_taskid_table_lock, idm_task_t.idt_mutex
45 #define CF_LOGIN_READY 0x00000001
46 #define CF_INITIAL_LOGIN 0x00000002
47 #define CF_ERROR 0x80000000
55 * Watchdog interval in seconds
57 #define IDM_WD_INTERVAL 5
60 * Timeout period before the client "keepalive" callback is invoked in
61 * seconds if the connection is idle.
63 #define IDM_TRANSPORT_KEEPALIVE_IDLE_TIMEOUT 20
66 * Timeout period before a TRANSPORT_FAIL event is generated in seconds
67 * if the connection is idle.
69 #define IDM_TRANSPORT_FAIL_IDLE_TIMEOUT 30
72 * IDM reference count structure. Audit code is shamelessly adapted
76 #define REFCNT_AUDIT_STACK_DEPTH 16
77 #define REFCNT_AUDIT_BUF_MAX_REC 16
82 pc_t anr_stack
[REFCNT_AUDIT_STACK_DEPTH
];
83 } refcnt_audit_record_t
;
88 refcnt_audit_record_t anb_records
[REFCNT_AUDIT_BUF_MAX_REC
];
91 #define REFCNT_AUDIT(_rf_) { \
92 refcnt_audit_record_t *anr; \
94 anr = (_rf_)->ir_audit_buf.anb_records; \
95 anr += (_rf_)->ir_audit_buf.anb_index; \
96 (_rf_)->ir_audit_buf.anb_index++; \
97 (_rf_)->ir_audit_buf.anb_index &= \
98 (_rf_)->ir_audit_buf.anb_max_index; \
99 anr->anr_refcnt = (_rf_)->ir_refcnt; \
100 anr->anr_depth = getpcstack(anr->anr_stack, \
101 REFCNT_AUDIT_STACK_DEPTH); \
106 typedef void (idm_refcnt_cb_t
)(void *ref_obj
);
114 typedef struct idm_refcnt_s
{
116 void *ir_referenced_obj
;
117 idm_refcnt_wait_t ir_waiting
;
120 idm_refcnt_cb_t
*ir_cb
;
121 refcnt_audit_buf_t ir_audit_buf
;
125 * connection parameters - These parameters would be populated at
126 * connection create, or during key-value negotiation at login
128 typedef struct idm_conn_params_s
{
129 uint32_t max_recv_dataseglen
;
130 uint32_t max_xmit_dataseglen
;
131 uint32_t conn_login_max
;
132 uint32_t conn_login_interval
;
133 boolean_t nonblock_socket
;
136 typedef struct idm_svc_s
{
137 list_node_t is_list_node
;
140 kmutex_t is_count_mutex
;
141 kcondvar_t is_count_cv
;
142 idm_refcnt_t is_refcnt
;
144 /* transport-specific service components */
147 idm_svc_req_t is_svc_req
;
150 #define ISCSI_MAX_TSIH_LEN 6 /* 0x%04x */
151 #define ISCSI_MAX_ISID_LEN ISCSI_ISID_LEN * 2
153 typedef struct idm_conn_s
{
154 list_node_t ic_list_node
;
156 idm_refcnt_t ic_refcnt
;
157 idm_svc_t
*ic_svc_binding
; /* Target conn. only */
158 idm_sockaddr_t ic_ini_dst_addr
;
159 struct sockaddr_storage ic_laddr
; /* conn local address */
160 struct sockaddr_storage ic_raddr
; /* conn remote address */
163 * the target_name, initiator_name, initiator session
164 * identifier and target session identifying handle
165 * are only used for target connections.
167 char ic_target_name
[ISCSI_MAX_NAME_LEN
+ 1];
168 char ic_initiator_name
[ISCSI_MAX_NAME_LEN
+ 1];
169 char ic_tsih
[ISCSI_MAX_TSIH_LEN
+ 1];
170 char ic_isid
[ISCSI_MAX_ISID_LEN
+ 1];
171 idm_conn_state_t ic_state
;
172 idm_conn_state_t ic_last_state
;
173 sm_audit_buf_t ic_state_audit
;
174 kmutex_t ic_state_mutex
;
175 kcondvar_t ic_state_cv
;
176 uint32_t ic_state_flags
;
177 timeout_id_t ic_state_timeout
;
178 struct idm_conn_s
*ic_reinstate_conn
; /* For conn reinst. */
179 struct idm_conn_s
*ic_logout_conn
; /* For other conn logout */
180 taskq_t
*ic_state_taskq
;
182 boolean_t ic_login_info_valid
;
183 boolean_t ic_rdma_extensions
;
184 uint16_t ic_login_cid
;
188 idm_status_t ic_conn_sm_status
;
191 boolean_t ic_keepalive
;
192 uint32_t ic_internal_cid
;
194 uint32_t ic_conn_flags
;
195 idm_conn_type_t ic_conn_type
;
196 idm_conn_ops_t ic_conn_ops
;
197 idm_transport_ops_t
*ic_transport_ops
;
198 idm_transport_type_t ic_transport_type
;
199 int ic_transport_hdrlen
;
200 void *ic_transport_private
;
201 idm_conn_param_t ic_conn_params
;
203 * Save client callback to interpose idm callback
205 idm_pdu_cb_t
*ic_client_callback
;
206 clock_t ic_timestamp
;
209 #define IDM_CONN_HEADER_DIGEST 0x00000001
210 #define IDM_CONN_DATA_DIGEST 0x00000002
211 #define IDM_CONN_USE_SCOREBOARD 0x00000004
213 #define IDM_CONN_ISINI(ICI_IC) ((ICI_IC)->ic_conn_type == CONN_TYPE_INI)
214 #define IDM_CONN_ISTGT(ICI_IC) ((ICI_IC)->ic_conn_type == CONN_TYPE_TGT)
217 * An IDM target task can transfer data using multiple buffers. The task
218 * will maintain a list of buffers, and each buffer will contain the relative
219 * offset of the transfer and a pointer to the next buffer in the list.
221 * Note on client private data:
222 * idt_private is intended to be a pointer to some sort of client-
225 * idt_client_handle is a more generic client-private piece of data that can
226 * be used by the client for the express purpose of task lookup. The driving
227 * use case for this is for the client to store the initiator task tag for
228 * a given task so that it may be more easily retrieved for task management.
230 * The key take away here is that clients should never call
231 * idm_task_find_by_handle in the performance path.
233 * An initiator will require only one buffer per task, the offset will be 0.
236 typedef struct idm_task_s
{
237 idm_conn_t
*idt_ic
; /* Associated connection */
238 /* connection type is in idt_ic->ic_conn_type */
240 void *idt_private
; /* Client private data */
241 uintptr_t idt_client_handle
; /* Client private */
242 uint32_t idt_tt
; /* Task tag */
243 uint32_t idt_r2t_ttt
; /* R2T Target Task tag */
244 idm_task_state_t idt_state
;
245 idm_refcnt_t idt_refcnt
;
250 int idt_tx_to_ini_start
;
251 int idt_tx_to_ini_done
;
252 int idt_rx_from_ini_start
;
253 int idt_rx_from_ini_done
;
254 int idt_tx_bytes
; /* IDM_CONN_USE_SCOREBOARD */
255 int idt_rx_bytes
; /* IDM_CONN_USE_SCOREBOARD */
257 uint32_t idt_exp_datasn
; /* expected datasn */
258 uint32_t idt_exp_rttsn
; /* expected rttsn */
259 list_t idt_inbufv
; /* chunks of IN buffers */
260 list_t idt_outbufv
; /* chunks of OUT buffers */
263 * Transport header, which describes this tasks remote tagged buffer
265 int idt_transport_hdrlen
;
266 void *idt_transport_hdr
;
267 uint32_t idt_flags
; /* phase collapse */
270 int idm_task_constructor(void *task_void
, void *arg
, int flags
);
271 void idm_task_destructor(void *task_void
, void *arg
);
273 #define IDM_TASKIDS_MAX 16384
274 #define IDM_BUF_MAGIC 0x49425546 /* "IBUF" */
276 #define IDM_TASK_PHASECOLLAPSE_REQ 0x00000001 /* request phase collapse */
277 #define IDM_TASK_PHASECOLLAPSE_SUCCESS 0x00000002 /* phase collapse success */
279 /* Protect with task mutex */
280 typedef struct idm_buf_s
{
281 uint32_t idb_magic
; /* "IBUF" */
284 * Note: idm_tx_link *must* be the second element in the list for
285 * proper TX PDU ordering.
287 list_node_t idm_tx_link
; /* link in a list of TX objects */
289 list_node_t idb_buflink
; /* link in a multi-buffer data xfer */
290 idm_conn_t
*idb_ic
; /* Associated connection */
291 void *idb_buf
; /* data */
292 uint64_t idb_buflen
; /* length of buffer */
293 size_t idb_bufoffset
; /* offset in a multi-buffer xfer */
294 boolean_t idb_bufalloc
; /* true if alloc'd in idm_buf_alloc */
296 * DataPDUInOrder=Yes, so to track that the PDUs in a sequence are sent
297 * in continuously increasing address order, check that offsets for a
298 * single buffer xfer are in order.
300 uint32_t idb_exp_offset
;
301 size_t idb_xfer_len
; /* Current requested xfer len */
302 void *idb_buf_private
; /* transport-specific buf handle */
303 void *idb_reg_private
; /* transport-specific reg handle */
304 void *idb_bufptr
; /* transport-specific bcopy pointer */
305 boolean_t idb_bufbcopy
; /* true if bcopy required */
307 idm_buf_cb_t
*idb_buf_cb
; /* Data Completion Notify, tgt only */
308 void *idb_cb_arg
; /* Client private data */
309 idm_task_t
*idb_task_binding
;
310 timespec_t idb_xfer_start
;
311 timespec_t idb_xfer_done
;
312 boolean_t idb_in_transport
;
313 boolean_t idb_tx_thread
; /* Sockets only */
314 iscsi_hdr_t idb_data_hdr_tmpl
; /* Sockets only */
315 idm_status_t idb_status
;
322 } idm_bufpat_check_type_t
;
324 #define BUFPAT_MATCH(bc_bufpat, bc_idb) \
325 ((bufpat->bufpat_idb == bc_idb) && \
326 (bufpat->bufpat_bufmagic == IDM_BUF_MAGIC))
328 typedef struct idm_bufpat_s
{
330 uint32_t bufpat_bufmagic
;
331 uint32_t bufpat_offset
;
334 #define PDU_MAX_IOVLEN 12
335 #define IDM_PDU_MAGIC 0x49504455 /* "IPDU" */
337 typedef struct idm_pdu_s
{
338 uint32_t isp_magic
; /* "IPDU" */
341 * Internal - Order is vital. idm_tx_link *must* be the second
342 * element in this structure for proper TX PDU ordering.
344 list_node_t idm_tx_link
;
346 list_node_t isp_client_lnd
;
348 idm_conn_t
*isp_ic
; /* Must be set */
349 iscsi_hdr_t
*isp_hdr
;
354 /* Transport header */
355 void *isp_transport_hdr
;
356 uint32_t isp_transport_hdrlen
;
357 void *isp_transport_private
;
360 * isp_data is used for sending SCSI status, NOP, text, scsi and
361 * non-scsi data. Data is received using isp_iov and isp_iovlen
362 * to support data over multiple buffers.
365 idm_pdu_cb_t
*isp_callback
;
366 idm_status_t isp_status
;
369 * The following four elements are only used in
370 * idm_sorecv_scsidata() currently.
372 struct iovec isp_iov
[PDU_MAX_IOVLEN
];
374 idm_buf_t
*isp_sorx_buf
;
376 /* Implementation data for idm_pdu_alloc and sorx PDU cache */
378 uint_t isp_hdrbuflen
;
379 uint_t isp_databuflen
;
380 hrtime_t isp_queue_time
;
382 /* Taskq dispatching state for deferred PDU */
383 taskq_ent_t isp_tqent
;
387 * This "generic" object is used when removing an item from the ic_tx_list
388 * in order to determine whether it's an idm_pdu_t or an idm_buf_t
392 uint32_t idm_tx_obj_magic
;
394 * idm_tx_link *must* be the second element in this structure.
396 list_node_t idm_tx_link
;
400 #define IDM_PDU_OPCODE(PDU) \
401 ((PDU)->isp_hdr->opcode & ISCSI_OPCODE_MASK)
403 #define IDM_PDU_ALLOC 0x00000001
404 #define IDM_PDU_ADDL_HDR 0x00000002
405 #define IDM_PDU_ADDL_DATA 0x00000004
406 #define IDM_PDU_LOGIN_TX 0x00000008
407 #define IDM_PDU_SET_STATSN 0x00000010
408 #define IDM_PDU_ADVANCE_STATSN 0x00000020
410 #define OSD_EXT_CDB_AHSLEN (200 - 15)
411 #define BIDI_AHS_LENGTH 5
412 #define IDM_SORX_CACHE_AHSLEN \
413 (((OSD_EXT_CDB_AHSLEN + 3) + \
414 (BIDI_AHS_LENGTH + 3)) / sizeof (uint32_t))
415 #define IDM_SORX_CACHE_HDRLEN (sizeof (iscsi_hdr_t) + IDM_SORX_CACHE_AHSLEN)
421 #define IDM_IDPOOL_MAGIC 0x4944504C /* IDPL */
422 #define IDM_IDPOOL_MIN_SIZE 64 /* Number of IDs to begin with */
423 #define IDM_IDPOOL_MAX_SIZE 64 * 1024
425 typedef struct idm_idpool
{
434 uint32_t id_free_counter
;
435 uint32_t id_max_free_counter
;
439 * Global IDM state structure
442 kmutex_t idm_global_mutex
;
443 taskq_t
*idm_global_taskq
;
444 kthread_t
*idm_wd_thread
;
445 kt_did_t idm_wd_thread_did
;
446 boolean_t idm_wd_thread_running
;
447 kcondvar_t idm_wd_cv
;
448 list_t idm_tgt_svc_list
;
449 kcondvar_t idm_tgt_svc_cv
;
450 list_t idm_tgt_conn_list
;
451 int idm_tgt_conn_count
;
452 list_t idm_ini_conn_list
;
453 kmem_cache_t
*idm_buf_cache
;
454 kmem_cache_t
*idm_task_cache
;
455 krwlock_t idm_taskid_table_lock
;
456 idm_task_t
**idm_taskid_table
;
457 uint32_t idm_taskid_next
;
458 uint32_t idm_taskid_max
;
459 idm_idpool_t idm_conn_id_pool
;
460 kmem_cache_t
*idm_sotx_pdu_cache
;
461 kmem_cache_t
*idm_sorx_pdu_cache
;
462 kmem_cache_t
*idm_so_128k_buf_cache
;
465 idm_global_t idm
; /* Global state */
468 idm_idpool_create(idm_idpool_t
*pool
);
471 idm_idpool_destroy(idm_idpool_t
*pool
);
474 idm_idpool_alloc(idm_idpool_t
*pool
, uint16_t *id
);
477 idm_idpool_free(idm_idpool_t
*pool
, uint16_t id
);
480 idm_pdu_rx(idm_conn_t
*ic
, idm_pdu_t
*pdu
);
483 idm_pdu_tx_forward(idm_conn_t
*ic
, idm_pdu_t
*pdu
);
486 idm_pdu_rx_forward_ffp(idm_conn_t
*ic
, idm_pdu_t
*pdu
);
489 idm_pdu_rx_forward(idm_conn_t
*ic
, idm_pdu_t
*pdu
);
492 idm_pdu_tx_protocol_error(idm_conn_t
*ic
, idm_pdu_t
*pdu
);
495 idm_pdu_rx_protocol_error(idm_conn_t
*ic
, idm_pdu_t
*pdu
);
497 void idm_parse_login_rsp(idm_conn_t
*ic
, idm_pdu_t
*logout_req_pdu
,
500 void idm_parse_logout_req(idm_conn_t
*ic
, idm_pdu_t
*logout_req_pdu
,
503 void idm_parse_logout_rsp(idm_conn_t
*ic
, idm_pdu_t
*login_rsp_pdu
,
506 idm_status_t
idm_svc_conn_create(idm_svc_t
*is
, idm_transport_type_t type
,
507 idm_conn_t
**ic_result
);
509 void idm_svc_conn_destroy(idm_conn_t
*ic
);
511 idm_status_t
idm_ini_conn_finish(idm_conn_t
*ic
);
513 idm_status_t
idm_tgt_conn_finish(idm_conn_t
*ic
);
515 idm_conn_t
*idm_conn_create_common(idm_conn_type_t conn_type
,
516 idm_transport_type_t tt
, idm_conn_ops_t
*conn_ops
);
518 void idm_conn_destroy_common(idm_conn_t
*ic
);
520 void idm_conn_close(idm_conn_t
*ic
);
522 uint32_t idm_cid_alloc(void);
524 void idm_cid_free(uint32_t cid
);
526 uint32_t idm_crc32c(void *address
, unsigned long length
);
528 uint32_t idm_crc32c_continued(void *address
, unsigned long length
,
531 void idm_listbuf_insert(list_t
*lst
, idm_buf_t
*buf
);
533 idm_conn_t
*idm_lookup_conn(uint8_t *isid
, uint16_t tsih
, uint16_t cid
);
539 #endif /* _IDM_IMPL_H_ */