1 // SPDX-License-Identifier: GPL-2.0
3 * NVMe over Fabrics TCP target.
4 * Copyright (c) 2018 Lightbits Labs. All rights reserved.
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 #include <linux/module.h>
8 #include <linux/init.h>
9 #include <linux/slab.h>
10 #include <linux/err.h>
11 #include <linux/key.h>
12 #include <linux/nvme-tcp.h>
13 #include <linux/nvme-keyring.h>
17 #include <net/tls_prot.h>
18 #include <net/handshake.h>
19 #include <linux/inet.h>
20 #include <linux/llist.h>
21 #include <crypto/hash.h>
22 #include <trace/events/sock.h>
26 #define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE)
27 #define NVMET_TCP_MAXH2CDATA 0x400000 /* 16M arbitrary limit */
28 #define NVMET_TCP_BACKLOG 128
30 static int param_store_val(const char *str
, int *val
, int min
, int max
)
34 ret
= kstrtoint(str
, 10, &new_val
);
38 if (new_val
< min
|| new_val
> max
)
45 static int set_params(const char *str
, const struct kernel_param
*kp
)
47 return param_store_val(str
, kp
->arg
, 0, INT_MAX
);
50 static const struct kernel_param_ops set_param_ops
= {
55 /* Define the socket priority to use for connections were it is desirable
56 * that the NIC consider performing optimized packet processing or filtering.
57 * A non-zero value being sufficient to indicate general consideration of any
58 * possible optimization. Making it a module param allows for alternative
59 * values that may be unique for some NIC implementations.
61 static int so_priority
;
62 device_param_cb(so_priority
, &set_param_ops
, &so_priority
, 0644);
63 MODULE_PARM_DESC(so_priority
, "nvmet tcp socket optimize priority: Default 0");
65 /* Define a time period (in usecs) that io_work() shall sample an activated
66 * queue before determining it to be idle. This optional module behavior
67 * can enable NIC solutions that support socket optimized packet processing
68 * using advanced interrupt moderation techniques.
70 static int idle_poll_period_usecs
;
71 device_param_cb(idle_poll_period_usecs
, &set_param_ops
,
72 &idle_poll_period_usecs
, 0644);
73 MODULE_PARM_DESC(idle_poll_period_usecs
,
74 "nvmet tcp io_work poll till idle time period in usecs: Default 0");
76 #ifdef CONFIG_NVME_TARGET_TCP_TLS
78 * TLS handshake timeout
80 static int tls_handshake_timeout
= 10;
81 module_param(tls_handshake_timeout
, int, 0644);
82 MODULE_PARM_DESC(tls_handshake_timeout
,
83 "nvme TLS handshake timeout in seconds (default 10)");
86 #define NVMET_TCP_RECV_BUDGET 8
87 #define NVMET_TCP_SEND_BUDGET 8
88 #define NVMET_TCP_IO_WORK_BUDGET 64
90 enum nvmet_tcp_send_state
{
91 NVMET_TCP_SEND_DATA_PDU
,
95 NVMET_TCP_SEND_RESPONSE
98 enum nvmet_tcp_recv_state
{
101 NVMET_TCP_RECV_DDGST
,
106 NVMET_TCP_F_INIT_FAILED
= (1 << 0),
109 struct nvmet_tcp_cmd
{
110 struct nvmet_tcp_queue
*queue
;
111 struct nvmet_req req
;
113 struct nvme_tcp_cmd_pdu
*cmd_pdu
;
114 struct nvme_tcp_rsp_pdu
*rsp_pdu
;
115 struct nvme_tcp_data_pdu
*data_pdu
;
116 struct nvme_tcp_r2t_pdu
*r2t_pdu
;
124 char recv_cbuf
[CMSG_LEN(sizeof(char))];
125 struct msghdr recv_msg
;
129 struct list_head entry
;
130 struct llist_node lentry
;
134 struct scatterlist
*cur_sg
;
135 enum nvmet_tcp_send_state state
;
141 enum nvmet_tcp_queue_state
{
142 NVMET_TCP_Q_CONNECTING
,
143 NVMET_TCP_Q_TLS_HANDSHAKE
,
145 NVMET_TCP_Q_DISCONNECTING
,
149 struct nvmet_tcp_queue
{
151 struct nvmet_tcp_port
*port
;
152 struct work_struct io_work
;
153 struct nvmet_cq nvme_cq
;
154 struct nvmet_sq nvme_sq
;
158 struct nvmet_tcp_cmd
*cmds
;
159 unsigned int nr_cmds
;
160 struct list_head free_list
;
161 struct llist_head resp_list
;
162 struct list_head resp_send_list
;
164 struct nvmet_tcp_cmd
*snd_cmd
;
169 enum nvmet_tcp_recv_state rcv_state
;
170 struct nvmet_tcp_cmd
*cmd
;
171 union nvme_tcp_pdu pdu
;
176 struct ahash_request
*snd_hash
;
177 struct ahash_request
*rcv_hash
;
180 key_serial_t tls_pskid
;
181 struct delayed_work tls_handshake_tmo_work
;
183 unsigned long poll_end
;
185 spinlock_t state_lock
;
186 enum nvmet_tcp_queue_state state
;
188 struct sockaddr_storage sockaddr
;
189 struct sockaddr_storage sockaddr_peer
;
190 struct work_struct release_work
;
193 struct list_head queue_list
;
195 struct nvmet_tcp_cmd connect
;
197 struct page_frag_cache pf_cache
;
199 void (*data_ready
)(struct sock
*);
200 void (*state_change
)(struct sock
*);
201 void (*write_space
)(struct sock
*);
204 struct nvmet_tcp_port
{
206 struct work_struct accept_work
;
207 struct nvmet_port
*nport
;
208 struct sockaddr_storage addr
;
209 void (*data_ready
)(struct sock
*);
212 static DEFINE_IDA(nvmet_tcp_queue_ida
);
213 static LIST_HEAD(nvmet_tcp_queue_list
);
214 static DEFINE_MUTEX(nvmet_tcp_queue_mutex
);
216 static struct workqueue_struct
*nvmet_tcp_wq
;
217 static const struct nvmet_fabrics_ops nvmet_tcp_ops
;
218 static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd
*c
);
219 static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd
*cmd
);
221 static inline u16
nvmet_tcp_cmd_tag(struct nvmet_tcp_queue
*queue
,
222 struct nvmet_tcp_cmd
*cmd
)
224 if (unlikely(!queue
->nr_cmds
)) {
225 /* We didn't allocate cmds yet, send 0xffff */
229 return cmd
- queue
->cmds
;
232 static inline bool nvmet_tcp_has_data_in(struct nvmet_tcp_cmd
*cmd
)
234 return nvme_is_write(cmd
->req
.cmd
) &&
235 cmd
->rbytes_done
< cmd
->req
.transfer_len
;
238 static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd
*cmd
)
240 return nvmet_tcp_has_data_in(cmd
) && !cmd
->req
.cqe
->status
;
243 static inline bool nvmet_tcp_need_data_out(struct nvmet_tcp_cmd
*cmd
)
245 return !nvme_is_write(cmd
->req
.cmd
) &&
246 cmd
->req
.transfer_len
> 0 &&
247 !cmd
->req
.cqe
->status
;
250 static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd
*cmd
)
252 return nvme_is_write(cmd
->req
.cmd
) && cmd
->pdu_len
&&
256 static inline struct nvmet_tcp_cmd
*
257 nvmet_tcp_get_cmd(struct nvmet_tcp_queue
*queue
)
259 struct nvmet_tcp_cmd
*cmd
;
261 cmd
= list_first_entry_or_null(&queue
->free_list
,
262 struct nvmet_tcp_cmd
, entry
);
265 list_del_init(&cmd
->entry
);
267 cmd
->rbytes_done
= cmd
->wbytes_done
= 0;
275 static inline void nvmet_tcp_put_cmd(struct nvmet_tcp_cmd
*cmd
)
277 if (unlikely(cmd
== &cmd
->queue
->connect
))
280 list_add_tail(&cmd
->entry
, &cmd
->queue
->free_list
);
283 static inline int queue_cpu(struct nvmet_tcp_queue
*queue
)
285 return queue
->sock
->sk
->sk_incoming_cpu
;
288 static inline u8
nvmet_tcp_hdgst_len(struct nvmet_tcp_queue
*queue
)
290 return queue
->hdr_digest
? NVME_TCP_DIGEST_LENGTH
: 0;
293 static inline u8
nvmet_tcp_ddgst_len(struct nvmet_tcp_queue
*queue
)
295 return queue
->data_digest
? NVME_TCP_DIGEST_LENGTH
: 0;
298 static inline void nvmet_tcp_hdgst(struct ahash_request
*hash
,
299 void *pdu
, size_t len
)
301 struct scatterlist sg
;
303 sg_init_one(&sg
, pdu
, len
);
304 ahash_request_set_crypt(hash
, &sg
, pdu
+ len
, len
);
305 crypto_ahash_digest(hash
);
308 static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue
*queue
,
309 void *pdu
, size_t len
)
311 struct nvme_tcp_hdr
*hdr
= pdu
;
315 if (unlikely(!(hdr
->flags
& NVME_TCP_F_HDGST
))) {
316 pr_err("queue %d: header digest enabled but no header digest\n",
321 recv_digest
= *(__le32
*)(pdu
+ hdr
->hlen
);
322 nvmet_tcp_hdgst(queue
->rcv_hash
, pdu
, len
);
323 exp_digest
= *(__le32
*)(pdu
+ hdr
->hlen
);
324 if (recv_digest
!= exp_digest
) {
325 pr_err("queue %d: header digest error: recv %#x expected %#x\n",
326 queue
->idx
, le32_to_cpu(recv_digest
),
327 le32_to_cpu(exp_digest
));
334 static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue
*queue
, void *pdu
)
336 struct nvme_tcp_hdr
*hdr
= pdu
;
337 u8 digest_len
= nvmet_tcp_hdgst_len(queue
);
340 len
= le32_to_cpu(hdr
->plen
) - hdr
->hlen
-
341 (hdr
->flags
& NVME_TCP_F_HDGST
? digest_len
: 0);
343 if (unlikely(len
&& !(hdr
->flags
& NVME_TCP_F_DDGST
))) {
344 pr_err("queue %d: data digest flag is cleared\n", queue
->idx
);
351 /* If cmd buffers are NULL, no operation is performed */
352 static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd
*cmd
)
355 sgl_free(cmd
->req
.sg
);
360 static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd
*cmd
)
362 struct bio_vec
*iov
= cmd
->iov
;
363 struct scatterlist
*sg
;
364 u32 length
, offset
, sg_offset
;
367 length
= cmd
->pdu_len
;
368 nr_pages
= DIV_ROUND_UP(length
, PAGE_SIZE
);
369 offset
= cmd
->rbytes_done
;
370 cmd
->sg_idx
= offset
/ PAGE_SIZE
;
371 sg_offset
= offset
% PAGE_SIZE
;
372 sg
= &cmd
->req
.sg
[cmd
->sg_idx
];
375 u32 iov_len
= min_t(u32
, length
, sg
->length
- sg_offset
);
377 bvec_set_page(iov
, sg_page(sg
), iov_len
,
378 sg
->offset
+ sg_offset
);
386 iov_iter_bvec(&cmd
->recv_msg
.msg_iter
, ITER_DEST
, cmd
->iov
,
387 nr_pages
, cmd
->pdu_len
);
390 static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue
*queue
)
392 queue
->rcv_state
= NVMET_TCP_RECV_ERR
;
393 if (queue
->nvme_sq
.ctrl
)
394 nvmet_ctrl_fatal_error(queue
->nvme_sq
.ctrl
);
396 kernel_sock_shutdown(queue
->sock
, SHUT_RDWR
);
399 static void nvmet_tcp_socket_error(struct nvmet_tcp_queue
*queue
, int status
)
401 queue
->rcv_state
= NVMET_TCP_RECV_ERR
;
402 if (status
== -EPIPE
|| status
== -ECONNRESET
)
403 kernel_sock_shutdown(queue
->sock
, SHUT_RDWR
);
405 nvmet_tcp_fatal_error(queue
);
408 static int nvmet_tcp_map_data(struct nvmet_tcp_cmd
*cmd
)
410 struct nvme_sgl_desc
*sgl
= &cmd
->req
.cmd
->common
.dptr
.sgl
;
411 u32 len
= le32_to_cpu(sgl
->length
);
416 if (sgl
->type
== ((NVME_SGL_FMT_DATA_DESC
<< 4) |
417 NVME_SGL_FMT_OFFSET
)) {
418 if (!nvme_is_write(cmd
->req
.cmd
))
419 return NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
421 if (len
> cmd
->req
.port
->inline_data_size
)
422 return NVME_SC_SGL_INVALID_OFFSET
| NVME_STATUS_DNR
;
425 cmd
->req
.transfer_len
+= len
;
427 cmd
->req
.sg
= sgl_alloc(len
, GFP_KERNEL
, &cmd
->req
.sg_cnt
);
429 return NVME_SC_INTERNAL
;
430 cmd
->cur_sg
= cmd
->req
.sg
;
432 if (nvmet_tcp_has_data_in(cmd
)) {
433 cmd
->iov
= kmalloc_array(cmd
->req
.sg_cnt
,
434 sizeof(*cmd
->iov
), GFP_KERNEL
);
441 nvmet_tcp_free_cmd_buffers(cmd
);
442 return NVME_SC_INTERNAL
;
445 static void nvmet_tcp_calc_ddgst(struct ahash_request
*hash
,
446 struct nvmet_tcp_cmd
*cmd
)
448 ahash_request_set_crypt(hash
, cmd
->req
.sg
,
449 (void *)&cmd
->exp_ddgst
, cmd
->req
.transfer_len
);
450 crypto_ahash_digest(hash
);
453 static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd
*cmd
)
455 struct nvme_tcp_data_pdu
*pdu
= cmd
->data_pdu
;
456 struct nvmet_tcp_queue
*queue
= cmd
->queue
;
457 u8 hdgst
= nvmet_tcp_hdgst_len(cmd
->queue
);
458 u8 ddgst
= nvmet_tcp_ddgst_len(cmd
->queue
);
461 cmd
->state
= NVMET_TCP_SEND_DATA_PDU
;
463 pdu
->hdr
.type
= nvme_tcp_c2h_data
;
464 pdu
->hdr
.flags
= NVME_TCP_F_DATA_LAST
| (queue
->nvme_sq
.sqhd_disabled
?
465 NVME_TCP_F_DATA_SUCCESS
: 0);
466 pdu
->hdr
.hlen
= sizeof(*pdu
);
467 pdu
->hdr
.pdo
= pdu
->hdr
.hlen
+ hdgst
;
469 cpu_to_le32(pdu
->hdr
.hlen
+ hdgst
+
470 cmd
->req
.transfer_len
+ ddgst
);
471 pdu
->command_id
= cmd
->req
.cqe
->command_id
;
472 pdu
->data_length
= cpu_to_le32(cmd
->req
.transfer_len
);
473 pdu
->data_offset
= cpu_to_le32(cmd
->wbytes_done
);
475 if (queue
->data_digest
) {
476 pdu
->hdr
.flags
|= NVME_TCP_F_DDGST
;
477 nvmet_tcp_calc_ddgst(queue
->snd_hash
, cmd
);
480 if (cmd
->queue
->hdr_digest
) {
481 pdu
->hdr
.flags
|= NVME_TCP_F_HDGST
;
482 nvmet_tcp_hdgst(queue
->snd_hash
, pdu
, sizeof(*pdu
));
486 static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd
*cmd
)
488 struct nvme_tcp_r2t_pdu
*pdu
= cmd
->r2t_pdu
;
489 struct nvmet_tcp_queue
*queue
= cmd
->queue
;
490 u8 hdgst
= nvmet_tcp_hdgst_len(cmd
->queue
);
493 cmd
->state
= NVMET_TCP_SEND_R2T
;
495 pdu
->hdr
.type
= nvme_tcp_r2t
;
497 pdu
->hdr
.hlen
= sizeof(*pdu
);
499 pdu
->hdr
.plen
= cpu_to_le32(pdu
->hdr
.hlen
+ hdgst
);
501 pdu
->command_id
= cmd
->req
.cmd
->common
.command_id
;
502 pdu
->ttag
= nvmet_tcp_cmd_tag(cmd
->queue
, cmd
);
503 pdu
->r2t_length
= cpu_to_le32(cmd
->req
.transfer_len
- cmd
->rbytes_done
);
504 pdu
->r2t_offset
= cpu_to_le32(cmd
->rbytes_done
);
505 if (cmd
->queue
->hdr_digest
) {
506 pdu
->hdr
.flags
|= NVME_TCP_F_HDGST
;
507 nvmet_tcp_hdgst(queue
->snd_hash
, pdu
, sizeof(*pdu
));
511 static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd
*cmd
)
513 struct nvme_tcp_rsp_pdu
*pdu
= cmd
->rsp_pdu
;
514 struct nvmet_tcp_queue
*queue
= cmd
->queue
;
515 u8 hdgst
= nvmet_tcp_hdgst_len(cmd
->queue
);
518 cmd
->state
= NVMET_TCP_SEND_RESPONSE
;
520 pdu
->hdr
.type
= nvme_tcp_rsp
;
522 pdu
->hdr
.hlen
= sizeof(*pdu
);
524 pdu
->hdr
.plen
= cpu_to_le32(pdu
->hdr
.hlen
+ hdgst
);
525 if (cmd
->queue
->hdr_digest
) {
526 pdu
->hdr
.flags
|= NVME_TCP_F_HDGST
;
527 nvmet_tcp_hdgst(queue
->snd_hash
, pdu
, sizeof(*pdu
));
531 static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue
*queue
)
533 struct llist_node
*node
;
534 struct nvmet_tcp_cmd
*cmd
;
536 for (node
= llist_del_all(&queue
->resp_list
); node
; node
= node
->next
) {
537 cmd
= llist_entry(node
, struct nvmet_tcp_cmd
, lentry
);
538 list_add(&cmd
->entry
, &queue
->resp_send_list
);
539 queue
->send_list_len
++;
543 static struct nvmet_tcp_cmd
*nvmet_tcp_fetch_cmd(struct nvmet_tcp_queue
*queue
)
545 queue
->snd_cmd
= list_first_entry_or_null(&queue
->resp_send_list
,
546 struct nvmet_tcp_cmd
, entry
);
547 if (!queue
->snd_cmd
) {
548 nvmet_tcp_process_resp_list(queue
);
550 list_first_entry_or_null(&queue
->resp_send_list
,
551 struct nvmet_tcp_cmd
, entry
);
552 if (unlikely(!queue
->snd_cmd
))
556 list_del_init(&queue
->snd_cmd
->entry
);
557 queue
->send_list_len
--;
559 if (nvmet_tcp_need_data_out(queue
->snd_cmd
))
560 nvmet_setup_c2h_data_pdu(queue
->snd_cmd
);
561 else if (nvmet_tcp_need_data_in(queue
->snd_cmd
))
562 nvmet_setup_r2t_pdu(queue
->snd_cmd
);
564 nvmet_setup_response_pdu(queue
->snd_cmd
);
566 return queue
->snd_cmd
;
569 static void nvmet_tcp_queue_response(struct nvmet_req
*req
)
571 struct nvmet_tcp_cmd
*cmd
=
572 container_of(req
, struct nvmet_tcp_cmd
, req
);
573 struct nvmet_tcp_queue
*queue
= cmd
->queue
;
574 struct nvme_sgl_desc
*sgl
;
577 if (unlikely(cmd
== queue
->cmd
)) {
578 sgl
= &cmd
->req
.cmd
->common
.dptr
.sgl
;
579 len
= le32_to_cpu(sgl
->length
);
582 * Wait for inline data before processing the response.
583 * Avoid using helpers, this might happen before
584 * nvmet_req_init is completed.
586 if (queue
->rcv_state
== NVMET_TCP_RECV_PDU
&&
587 len
&& len
<= cmd
->req
.port
->inline_data_size
&&
588 nvme_is_write(cmd
->req
.cmd
))
592 llist_add(&cmd
->lentry
, &queue
->resp_list
);
593 queue_work_on(queue_cpu(queue
), nvmet_tcp_wq
, &cmd
->queue
->io_work
);
596 static void nvmet_tcp_execute_request(struct nvmet_tcp_cmd
*cmd
)
598 if (unlikely(cmd
->flags
& NVMET_TCP_F_INIT_FAILED
))
599 nvmet_tcp_queue_response(&cmd
->req
);
601 cmd
->req
.execute(&cmd
->req
);
604 static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd
*cmd
)
606 struct msghdr msg
= {
607 .msg_flags
= MSG_DONTWAIT
| MSG_MORE
| MSG_SPLICE_PAGES
,
610 u8 hdgst
= nvmet_tcp_hdgst_len(cmd
->queue
);
611 int left
= sizeof(*cmd
->data_pdu
) - cmd
->offset
+ hdgst
;
614 bvec_set_virt(&bvec
, (void *)cmd
->data_pdu
+ cmd
->offset
, left
);
615 iov_iter_bvec(&msg
.msg_iter
, ITER_SOURCE
, &bvec
, 1, left
);
616 ret
= sock_sendmsg(cmd
->queue
->sock
, &msg
);
626 cmd
->state
= NVMET_TCP_SEND_DATA
;
631 static int nvmet_try_send_data(struct nvmet_tcp_cmd
*cmd
, bool last_in_batch
)
633 struct nvmet_tcp_queue
*queue
= cmd
->queue
;
636 while (cmd
->cur_sg
) {
637 struct msghdr msg
= {
638 .msg_flags
= MSG_DONTWAIT
| MSG_SPLICE_PAGES
,
640 struct page
*page
= sg_page(cmd
->cur_sg
);
642 u32 left
= cmd
->cur_sg
->length
- cmd
->offset
;
644 if ((!last_in_batch
&& cmd
->queue
->send_list_len
) ||
645 cmd
->wbytes_done
+ left
< cmd
->req
.transfer_len
||
646 queue
->data_digest
|| !queue
->nvme_sq
.sqhd_disabled
)
647 msg
.msg_flags
|= MSG_MORE
;
649 bvec_set_page(&bvec
, page
, left
, cmd
->offset
);
650 iov_iter_bvec(&msg
.msg_iter
, ITER_SOURCE
, &bvec
, 1, left
);
651 ret
= sock_sendmsg(cmd
->queue
->sock
, &msg
);
656 cmd
->wbytes_done
+= ret
;
659 if (cmd
->offset
== cmd
->cur_sg
->length
) {
660 cmd
->cur_sg
= sg_next(cmd
->cur_sg
);
665 if (queue
->data_digest
) {
666 cmd
->state
= NVMET_TCP_SEND_DDGST
;
669 if (queue
->nvme_sq
.sqhd_disabled
) {
670 cmd
->queue
->snd_cmd
= NULL
;
671 nvmet_tcp_put_cmd(cmd
);
673 nvmet_setup_response_pdu(cmd
);
677 if (queue
->nvme_sq
.sqhd_disabled
)
678 nvmet_tcp_free_cmd_buffers(cmd
);
684 static int nvmet_try_send_response(struct nvmet_tcp_cmd
*cmd
,
687 struct msghdr msg
= { .msg_flags
= MSG_DONTWAIT
| MSG_SPLICE_PAGES
, };
689 u8 hdgst
= nvmet_tcp_hdgst_len(cmd
->queue
);
690 int left
= sizeof(*cmd
->rsp_pdu
) - cmd
->offset
+ hdgst
;
693 if (!last_in_batch
&& cmd
->queue
->send_list_len
)
694 msg
.msg_flags
|= MSG_MORE
;
696 msg
.msg_flags
|= MSG_EOR
;
698 bvec_set_virt(&bvec
, (void *)cmd
->rsp_pdu
+ cmd
->offset
, left
);
699 iov_iter_bvec(&msg
.msg_iter
, ITER_SOURCE
, &bvec
, 1, left
);
700 ret
= sock_sendmsg(cmd
->queue
->sock
, &msg
);
709 nvmet_tcp_free_cmd_buffers(cmd
);
710 cmd
->queue
->snd_cmd
= NULL
;
711 nvmet_tcp_put_cmd(cmd
);
715 static int nvmet_try_send_r2t(struct nvmet_tcp_cmd
*cmd
, bool last_in_batch
)
717 struct msghdr msg
= { .msg_flags
= MSG_DONTWAIT
| MSG_SPLICE_PAGES
, };
719 u8 hdgst
= nvmet_tcp_hdgst_len(cmd
->queue
);
720 int left
= sizeof(*cmd
->r2t_pdu
) - cmd
->offset
+ hdgst
;
723 if (!last_in_batch
&& cmd
->queue
->send_list_len
)
724 msg
.msg_flags
|= MSG_MORE
;
726 msg
.msg_flags
|= MSG_EOR
;
728 bvec_set_virt(&bvec
, (void *)cmd
->r2t_pdu
+ cmd
->offset
, left
);
729 iov_iter_bvec(&msg
.msg_iter
, ITER_SOURCE
, &bvec
, 1, left
);
730 ret
= sock_sendmsg(cmd
->queue
->sock
, &msg
);
739 cmd
->queue
->snd_cmd
= NULL
;
743 static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd
*cmd
, bool last_in_batch
)
745 struct nvmet_tcp_queue
*queue
= cmd
->queue
;
746 int left
= NVME_TCP_DIGEST_LENGTH
- cmd
->offset
;
747 struct msghdr msg
= { .msg_flags
= MSG_DONTWAIT
};
749 .iov_base
= (u8
*)&cmd
->exp_ddgst
+ cmd
->offset
,
754 if (!last_in_batch
&& cmd
->queue
->send_list_len
)
755 msg
.msg_flags
|= MSG_MORE
;
757 msg
.msg_flags
|= MSG_EOR
;
759 ret
= kernel_sendmsg(queue
->sock
, &msg
, &iov
, 1, iov
.iov_len
);
760 if (unlikely(ret
<= 0))
769 if (queue
->nvme_sq
.sqhd_disabled
) {
770 cmd
->queue
->snd_cmd
= NULL
;
771 nvmet_tcp_put_cmd(cmd
);
773 nvmet_setup_response_pdu(cmd
);
778 static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue
*queue
,
781 struct nvmet_tcp_cmd
*cmd
= queue
->snd_cmd
;
784 if (!cmd
|| queue
->state
== NVMET_TCP_Q_DISCONNECTING
) {
785 cmd
= nvmet_tcp_fetch_cmd(queue
);
790 if (cmd
->state
== NVMET_TCP_SEND_DATA_PDU
) {
791 ret
= nvmet_try_send_data_pdu(cmd
);
796 if (cmd
->state
== NVMET_TCP_SEND_DATA
) {
797 ret
= nvmet_try_send_data(cmd
, last_in_batch
);
802 if (cmd
->state
== NVMET_TCP_SEND_DDGST
) {
803 ret
= nvmet_try_send_ddgst(cmd
, last_in_batch
);
808 if (cmd
->state
== NVMET_TCP_SEND_R2T
) {
809 ret
= nvmet_try_send_r2t(cmd
, last_in_batch
);
814 if (cmd
->state
== NVMET_TCP_SEND_RESPONSE
)
815 ret
= nvmet_try_send_response(cmd
, last_in_batch
);
827 static int nvmet_tcp_try_send(struct nvmet_tcp_queue
*queue
,
828 int budget
, int *sends
)
832 for (i
= 0; i
< budget
; i
++) {
833 ret
= nvmet_tcp_try_send_one(queue
, i
== budget
- 1);
834 if (unlikely(ret
< 0)) {
835 nvmet_tcp_socket_error(queue
, ret
);
837 } else if (ret
== 0) {
846 static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue
*queue
)
849 queue
->left
= sizeof(struct nvme_tcp_hdr
);
851 queue
->rcv_state
= NVMET_TCP_RECV_PDU
;
854 static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue
*queue
)
856 struct crypto_ahash
*tfm
= crypto_ahash_reqtfm(queue
->rcv_hash
);
858 ahash_request_free(queue
->rcv_hash
);
859 ahash_request_free(queue
->snd_hash
);
860 crypto_free_ahash(tfm
);
863 static int nvmet_tcp_alloc_crypto(struct nvmet_tcp_queue
*queue
)
865 struct crypto_ahash
*tfm
;
867 tfm
= crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC
);
871 queue
->snd_hash
= ahash_request_alloc(tfm
, GFP_KERNEL
);
872 if (!queue
->snd_hash
)
874 ahash_request_set_callback(queue
->snd_hash
, 0, NULL
, NULL
);
876 queue
->rcv_hash
= ahash_request_alloc(tfm
, GFP_KERNEL
);
877 if (!queue
->rcv_hash
)
879 ahash_request_set_callback(queue
->rcv_hash
, 0, NULL
, NULL
);
883 ahash_request_free(queue
->snd_hash
);
885 crypto_free_ahash(tfm
);
890 static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue
*queue
)
892 struct nvme_tcp_icreq_pdu
*icreq
= &queue
->pdu
.icreq
;
893 struct nvme_tcp_icresp_pdu
*icresp
= &queue
->pdu
.icresp
;
894 struct msghdr msg
= {};
898 if (le32_to_cpu(icreq
->hdr
.plen
) != sizeof(struct nvme_tcp_icreq_pdu
)) {
899 pr_err("bad nvme-tcp pdu length (%d)\n",
900 le32_to_cpu(icreq
->hdr
.plen
));
901 nvmet_tcp_fatal_error(queue
);
905 if (icreq
->pfv
!= NVME_TCP_PFV_1_0
) {
906 pr_err("queue %d: bad pfv %d\n", queue
->idx
, icreq
->pfv
);
910 if (icreq
->hpda
!= 0) {
911 pr_err("queue %d: unsupported hpda %d\n", queue
->idx
,
916 queue
->hdr_digest
= !!(icreq
->digest
& NVME_TCP_HDR_DIGEST_ENABLE
);
917 queue
->data_digest
= !!(icreq
->digest
& NVME_TCP_DATA_DIGEST_ENABLE
);
918 if (queue
->hdr_digest
|| queue
->data_digest
) {
919 ret
= nvmet_tcp_alloc_crypto(queue
);
924 memset(icresp
, 0, sizeof(*icresp
));
925 icresp
->hdr
.type
= nvme_tcp_icresp
;
926 icresp
->hdr
.hlen
= sizeof(*icresp
);
928 icresp
->hdr
.plen
= cpu_to_le32(icresp
->hdr
.hlen
);
929 icresp
->pfv
= cpu_to_le16(NVME_TCP_PFV_1_0
);
930 icresp
->maxdata
= cpu_to_le32(NVMET_TCP_MAXH2CDATA
);
932 if (queue
->hdr_digest
)
933 icresp
->digest
|= NVME_TCP_HDR_DIGEST_ENABLE
;
934 if (queue
->data_digest
)
935 icresp
->digest
|= NVME_TCP_DATA_DIGEST_ENABLE
;
937 iov
.iov_base
= icresp
;
938 iov
.iov_len
= sizeof(*icresp
);
939 ret
= kernel_sendmsg(queue
->sock
, &msg
, &iov
, 1, iov
.iov_len
);
941 queue
->state
= NVMET_TCP_Q_FAILED
;
942 return ret
; /* queue removal will cleanup */
945 queue
->state
= NVMET_TCP_Q_LIVE
;
946 nvmet_prepare_receive_pdu(queue
);
950 static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue
*queue
,
951 struct nvmet_tcp_cmd
*cmd
, struct nvmet_req
*req
)
953 size_t data_len
= le32_to_cpu(req
->cmd
->common
.dptr
.sgl
.length
);
957 * This command has not been processed yet, hence we are trying to
958 * figure out if there is still pending data left to receive. If
959 * we don't, we can simply prepare for the next pdu and bail out,
960 * otherwise we will need to prepare a buffer and receive the
961 * stale data before continuing forward.
963 if (!nvme_is_write(cmd
->req
.cmd
) || !data_len
||
964 data_len
> cmd
->req
.port
->inline_data_size
) {
965 nvmet_prepare_receive_pdu(queue
);
969 ret
= nvmet_tcp_map_data(cmd
);
971 pr_err("queue %d: failed to map data\n", queue
->idx
);
972 nvmet_tcp_fatal_error(queue
);
976 queue
->rcv_state
= NVMET_TCP_RECV_DATA
;
977 nvmet_tcp_build_pdu_iovec(cmd
);
978 cmd
->flags
|= NVMET_TCP_F_INIT_FAILED
;
981 static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue
*queue
)
983 struct nvme_tcp_data_pdu
*data
= &queue
->pdu
.data
;
984 struct nvmet_tcp_cmd
*cmd
;
985 unsigned int exp_data_len
;
987 if (likely(queue
->nr_cmds
)) {
988 if (unlikely(data
->ttag
>= queue
->nr_cmds
)) {
989 pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n",
990 queue
->idx
, data
->ttag
, queue
->nr_cmds
);
993 cmd
= &queue
->cmds
[data
->ttag
];
995 cmd
= &queue
->connect
;
998 if (le32_to_cpu(data
->data_offset
) != cmd
->rbytes_done
) {
999 pr_err("ttag %u unexpected data offset %u (expected %u)\n",
1000 data
->ttag
, le32_to_cpu(data
->data_offset
),
1005 exp_data_len
= le32_to_cpu(data
->hdr
.plen
) -
1006 nvmet_tcp_hdgst_len(queue
) -
1007 nvmet_tcp_ddgst_len(queue
) -
1010 cmd
->pdu_len
= le32_to_cpu(data
->data_length
);
1011 if (unlikely(cmd
->pdu_len
!= exp_data_len
||
1012 cmd
->pdu_len
== 0 ||
1013 cmd
->pdu_len
> NVMET_TCP_MAXH2CDATA
)) {
1014 pr_err("H2CData PDU len %u is invalid\n", cmd
->pdu_len
);
1018 nvmet_tcp_build_pdu_iovec(cmd
);
1020 queue
->rcv_state
= NVMET_TCP_RECV_DATA
;
1025 /* FIXME: use proper transport errors */
1026 nvmet_tcp_fatal_error(queue
);
1030 static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue
*queue
)
1032 struct nvme_tcp_hdr
*hdr
= &queue
->pdu
.cmd
.hdr
;
1033 struct nvme_command
*nvme_cmd
= &queue
->pdu
.cmd
.cmd
;
1034 struct nvmet_req
*req
;
1037 if (unlikely(queue
->state
== NVMET_TCP_Q_CONNECTING
)) {
1038 if (hdr
->type
!= nvme_tcp_icreq
) {
1039 pr_err("unexpected pdu type (%d) before icreq\n",
1041 nvmet_tcp_fatal_error(queue
);
1044 return nvmet_tcp_handle_icreq(queue
);
1047 if (unlikely(hdr
->type
== nvme_tcp_icreq
)) {
1048 pr_err("queue %d: received icreq pdu in state %d\n",
1049 queue
->idx
, queue
->state
);
1050 nvmet_tcp_fatal_error(queue
);
1054 if (hdr
->type
== nvme_tcp_h2c_data
) {
1055 ret
= nvmet_tcp_handle_h2c_data_pdu(queue
);
1061 queue
->cmd
= nvmet_tcp_get_cmd(queue
);
1062 if (unlikely(!queue
->cmd
)) {
1063 /* This should never happen */
1064 pr_err("queue %d: out of commands (%d) send_list_len: %d, opcode: %d",
1065 queue
->idx
, queue
->nr_cmds
, queue
->send_list_len
,
1066 nvme_cmd
->common
.opcode
);
1067 nvmet_tcp_fatal_error(queue
);
1071 req
= &queue
->cmd
->req
;
1072 memcpy(req
->cmd
, nvme_cmd
, sizeof(*nvme_cmd
));
1074 if (unlikely(!nvmet_req_init(req
, &queue
->nvme_cq
,
1075 &queue
->nvme_sq
, &nvmet_tcp_ops
))) {
1076 pr_err("failed cmd %p id %d opcode %d, data_len: %d\n",
1077 req
->cmd
, req
->cmd
->common
.command_id
,
1078 req
->cmd
->common
.opcode
,
1079 le32_to_cpu(req
->cmd
->common
.dptr
.sgl
.length
));
1081 nvmet_tcp_handle_req_failure(queue
, queue
->cmd
, req
);
1085 ret
= nvmet_tcp_map_data(queue
->cmd
);
1086 if (unlikely(ret
)) {
1087 pr_err("queue %d: failed to map data\n", queue
->idx
);
1088 if (nvmet_tcp_has_inline_data(queue
->cmd
))
1089 nvmet_tcp_fatal_error(queue
);
1091 nvmet_req_complete(req
, ret
);
1096 if (nvmet_tcp_need_data_in(queue
->cmd
)) {
1097 if (nvmet_tcp_has_inline_data(queue
->cmd
)) {
1098 queue
->rcv_state
= NVMET_TCP_RECV_DATA
;
1099 nvmet_tcp_build_pdu_iovec(queue
->cmd
);
1103 nvmet_tcp_queue_response(&queue
->cmd
->req
);
1107 queue
->cmd
->req
.execute(&queue
->cmd
->req
);
1109 nvmet_prepare_receive_pdu(queue
);
1113 static const u8 nvme_tcp_pdu_sizes
[] = {
1114 [nvme_tcp_icreq
] = sizeof(struct nvme_tcp_icreq_pdu
),
1115 [nvme_tcp_cmd
] = sizeof(struct nvme_tcp_cmd_pdu
),
1116 [nvme_tcp_h2c_data
] = sizeof(struct nvme_tcp_data_pdu
),
1119 static inline u8
nvmet_tcp_pdu_size(u8 type
)
1123 return (idx
< ARRAY_SIZE(nvme_tcp_pdu_sizes
) &&
1124 nvme_tcp_pdu_sizes
[idx
]) ?
1125 nvme_tcp_pdu_sizes
[idx
] : 0;
1128 static inline bool nvmet_tcp_pdu_valid(u8 type
)
1131 case nvme_tcp_icreq
:
1133 case nvme_tcp_h2c_data
:
1141 static int nvmet_tcp_tls_record_ok(struct nvmet_tcp_queue
*queue
,
1142 struct msghdr
*msg
, char *cbuf
)
1144 struct cmsghdr
*cmsg
= (struct cmsghdr
*)cbuf
;
1145 u8 ctype
, level
, description
;
1148 ctype
= tls_get_record_type(queue
->sock
->sk
, cmsg
);
1152 case TLS_RECORD_TYPE_DATA
:
1154 case TLS_RECORD_TYPE_ALERT
:
1155 tls_alert_recv(queue
->sock
->sk
, msg
, &level
, &description
);
1156 if (level
== TLS_ALERT_LEVEL_FATAL
) {
1157 pr_err("queue %d: TLS Alert desc %u\n",
1158 queue
->idx
, description
);
1161 pr_warn("queue %d: TLS Alert desc %u\n",
1162 queue
->idx
, description
);
1167 /* discard this record type */
1168 pr_err("queue %d: TLS record %d unhandled\n",
1176 static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue
*queue
)
1178 struct nvme_tcp_hdr
*hdr
= &queue
->pdu
.cmd
.hdr
;
1181 char cbuf
[CMSG_LEN(sizeof(char))] = {};
1182 struct msghdr msg
= { .msg_flags
= MSG_DONTWAIT
};
1185 iov
.iov_base
= (void *)&queue
->pdu
+ queue
->offset
;
1186 iov
.iov_len
= queue
->left
;
1187 if (queue
->tls_pskid
) {
1188 msg
.msg_control
= cbuf
;
1189 msg
.msg_controllen
= sizeof(cbuf
);
1191 len
= kernel_recvmsg(queue
->sock
, &msg
, &iov
, 1,
1192 iov
.iov_len
, msg
.msg_flags
);
1193 if (unlikely(len
< 0))
1195 if (queue
->tls_pskid
) {
1196 ret
= nvmet_tcp_tls_record_ok(queue
, &msg
, cbuf
);
1201 queue
->offset
+= len
;
1206 if (queue
->offset
== sizeof(struct nvme_tcp_hdr
)) {
1207 u8 hdgst
= nvmet_tcp_hdgst_len(queue
);
1209 if (unlikely(!nvmet_tcp_pdu_valid(hdr
->type
))) {
1210 pr_err("unexpected pdu type %d\n", hdr
->type
);
1211 nvmet_tcp_fatal_error(queue
);
1215 if (unlikely(hdr
->hlen
!= nvmet_tcp_pdu_size(hdr
->type
))) {
1216 pr_err("pdu %d bad hlen %d\n", hdr
->type
, hdr
->hlen
);
1220 queue
->left
= hdr
->hlen
- queue
->offset
+ hdgst
;
1224 if (queue
->hdr_digest
&&
1225 nvmet_tcp_verify_hdgst(queue
, &queue
->pdu
, hdr
->hlen
)) {
1226 nvmet_tcp_fatal_error(queue
); /* fatal */
1230 if (queue
->data_digest
&&
1231 nvmet_tcp_check_ddgst(queue
, &queue
->pdu
)) {
1232 nvmet_tcp_fatal_error(queue
); /* fatal */
1236 return nvmet_tcp_done_recv_pdu(queue
);
1239 static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd
*cmd
)
1241 struct nvmet_tcp_queue
*queue
= cmd
->queue
;
1243 nvmet_tcp_calc_ddgst(queue
->rcv_hash
, cmd
);
1245 queue
->left
= NVME_TCP_DIGEST_LENGTH
;
1246 queue
->rcv_state
= NVMET_TCP_RECV_DDGST
;
1249 static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue
*queue
)
1251 struct nvmet_tcp_cmd
*cmd
= queue
->cmd
;
1254 while (msg_data_left(&cmd
->recv_msg
)) {
1255 len
= sock_recvmsg(cmd
->queue
->sock
, &cmd
->recv_msg
,
1256 cmd
->recv_msg
.msg_flags
);
1259 if (queue
->tls_pskid
) {
1260 ret
= nvmet_tcp_tls_record_ok(cmd
->queue
,
1261 &cmd
->recv_msg
, cmd
->recv_cbuf
);
1266 cmd
->pdu_recv
+= len
;
1267 cmd
->rbytes_done
+= len
;
1270 if (queue
->data_digest
) {
1271 nvmet_tcp_prep_recv_ddgst(cmd
);
1275 if (cmd
->rbytes_done
== cmd
->req
.transfer_len
)
1276 nvmet_tcp_execute_request(cmd
);
1278 nvmet_prepare_receive_pdu(queue
);
1282 static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue
*queue
)
1284 struct nvmet_tcp_cmd
*cmd
= queue
->cmd
;
1286 char cbuf
[CMSG_LEN(sizeof(char))] = {};
1287 struct msghdr msg
= { .msg_flags
= MSG_DONTWAIT
};
1289 .iov_base
= (void *)&cmd
->recv_ddgst
+ queue
->offset
,
1290 .iov_len
= queue
->left
1293 if (queue
->tls_pskid
) {
1294 msg
.msg_control
= cbuf
;
1295 msg
.msg_controllen
= sizeof(cbuf
);
1297 len
= kernel_recvmsg(queue
->sock
, &msg
, &iov
, 1,
1298 iov
.iov_len
, msg
.msg_flags
);
1299 if (unlikely(len
< 0))
1301 if (queue
->tls_pskid
) {
1302 ret
= nvmet_tcp_tls_record_ok(queue
, &msg
, cbuf
);
1307 queue
->offset
+= len
;
1312 if (queue
->data_digest
&& cmd
->exp_ddgst
!= cmd
->recv_ddgst
) {
1313 pr_err("queue %d: cmd %d pdu (%d) data digest error: recv %#x expected %#x\n",
1314 queue
->idx
, cmd
->req
.cmd
->common
.command_id
,
1315 queue
->pdu
.cmd
.hdr
.type
, le32_to_cpu(cmd
->recv_ddgst
),
1316 le32_to_cpu(cmd
->exp_ddgst
));
1317 nvmet_req_uninit(&cmd
->req
);
1318 nvmet_tcp_free_cmd_buffers(cmd
);
1319 nvmet_tcp_fatal_error(queue
);
1324 if (cmd
->rbytes_done
== cmd
->req
.transfer_len
)
1325 nvmet_tcp_execute_request(cmd
);
1329 nvmet_prepare_receive_pdu(queue
);
1333 static int nvmet_tcp_try_recv_one(struct nvmet_tcp_queue
*queue
)
1337 if (unlikely(queue
->rcv_state
== NVMET_TCP_RECV_ERR
))
1340 if (queue
->rcv_state
== NVMET_TCP_RECV_PDU
) {
1341 result
= nvmet_tcp_try_recv_pdu(queue
);
1346 if (queue
->rcv_state
== NVMET_TCP_RECV_DATA
) {
1347 result
= nvmet_tcp_try_recv_data(queue
);
1352 if (queue
->rcv_state
== NVMET_TCP_RECV_DDGST
) {
1353 result
= nvmet_tcp_try_recv_ddgst(queue
);
1360 if (result
== -EAGAIN
)
1367 static int nvmet_tcp_try_recv(struct nvmet_tcp_queue
*queue
,
1368 int budget
, int *recvs
)
1372 for (i
= 0; i
< budget
; i
++) {
1373 ret
= nvmet_tcp_try_recv_one(queue
);
1374 if (unlikely(ret
< 0)) {
1375 nvmet_tcp_socket_error(queue
, ret
);
1377 } else if (ret
== 0) {
1386 static void nvmet_tcp_release_queue(struct kref
*kref
)
1388 struct nvmet_tcp_queue
*queue
=
1389 container_of(kref
, struct nvmet_tcp_queue
, kref
);
1391 WARN_ON(queue
->state
!= NVMET_TCP_Q_DISCONNECTING
);
1392 queue_work(nvmet_wq
, &queue
->release_work
);
1395 static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue
*queue
)
1397 spin_lock_bh(&queue
->state_lock
);
1398 if (queue
->state
== NVMET_TCP_Q_TLS_HANDSHAKE
) {
1399 /* Socket closed during handshake */
1400 tls_handshake_cancel(queue
->sock
->sk
);
1402 if (queue
->state
!= NVMET_TCP_Q_DISCONNECTING
) {
1403 queue
->state
= NVMET_TCP_Q_DISCONNECTING
;
1404 kref_put(&queue
->kref
, nvmet_tcp_release_queue
);
1406 spin_unlock_bh(&queue
->state_lock
);
1409 static inline void nvmet_tcp_arm_queue_deadline(struct nvmet_tcp_queue
*queue
)
1411 queue
->poll_end
= jiffies
+ usecs_to_jiffies(idle_poll_period_usecs
);
1414 static bool nvmet_tcp_check_queue_deadline(struct nvmet_tcp_queue
*queue
,
1417 if (!idle_poll_period_usecs
)
1421 nvmet_tcp_arm_queue_deadline(queue
);
1423 return !time_after(jiffies
, queue
->poll_end
);
1426 static void nvmet_tcp_io_work(struct work_struct
*w
)
1428 struct nvmet_tcp_queue
*queue
=
1429 container_of(w
, struct nvmet_tcp_queue
, io_work
);
1436 ret
= nvmet_tcp_try_recv(queue
, NVMET_TCP_RECV_BUDGET
, &ops
);
1442 ret
= nvmet_tcp_try_send(queue
, NVMET_TCP_SEND_BUDGET
, &ops
);
1448 } while (pending
&& ops
< NVMET_TCP_IO_WORK_BUDGET
);
1451 * Requeue the worker if idle deadline period is in progress or any
1452 * ops activity was recorded during the do-while loop above.
1454 if (nvmet_tcp_check_queue_deadline(queue
, ops
) || pending
)
1455 queue_work_on(queue_cpu(queue
), nvmet_tcp_wq
, &queue
->io_work
);
1458 static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue
*queue
,
1459 struct nvmet_tcp_cmd
*c
)
1461 u8 hdgst
= nvmet_tcp_hdgst_len(queue
);
1464 c
->req
.port
= queue
->port
->nport
;
1466 c
->cmd_pdu
= page_frag_alloc(&queue
->pf_cache
,
1467 sizeof(*c
->cmd_pdu
) + hdgst
, GFP_KERNEL
| __GFP_ZERO
);
1470 c
->req
.cmd
= &c
->cmd_pdu
->cmd
;
1472 c
->rsp_pdu
= page_frag_alloc(&queue
->pf_cache
,
1473 sizeof(*c
->rsp_pdu
) + hdgst
, GFP_KERNEL
| __GFP_ZERO
);
1476 c
->req
.cqe
= &c
->rsp_pdu
->cqe
;
1478 c
->data_pdu
= page_frag_alloc(&queue
->pf_cache
,
1479 sizeof(*c
->data_pdu
) + hdgst
, GFP_KERNEL
| __GFP_ZERO
);
1483 c
->r2t_pdu
= page_frag_alloc(&queue
->pf_cache
,
1484 sizeof(*c
->r2t_pdu
) + hdgst
, GFP_KERNEL
| __GFP_ZERO
);
1488 if (queue
->state
== NVMET_TCP_Q_TLS_HANDSHAKE
) {
1489 c
->recv_msg
.msg_control
= c
->recv_cbuf
;
1490 c
->recv_msg
.msg_controllen
= sizeof(c
->recv_cbuf
);
1492 c
->recv_msg
.msg_flags
= MSG_DONTWAIT
| MSG_NOSIGNAL
;
1494 list_add_tail(&c
->entry
, &queue
->free_list
);
1498 page_frag_free(c
->data_pdu
);
1500 page_frag_free(c
->rsp_pdu
);
1502 page_frag_free(c
->cmd_pdu
);
1506 static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd
*c
)
1508 page_frag_free(c
->r2t_pdu
);
1509 page_frag_free(c
->data_pdu
);
1510 page_frag_free(c
->rsp_pdu
);
1511 page_frag_free(c
->cmd_pdu
);
1514 static int nvmet_tcp_alloc_cmds(struct nvmet_tcp_queue
*queue
)
1516 struct nvmet_tcp_cmd
*cmds
;
1517 int i
, ret
= -EINVAL
, nr_cmds
= queue
->nr_cmds
;
1519 cmds
= kcalloc(nr_cmds
, sizeof(struct nvmet_tcp_cmd
), GFP_KERNEL
);
1523 for (i
= 0; i
< nr_cmds
; i
++) {
1524 ret
= nvmet_tcp_alloc_cmd(queue
, cmds
+ i
);
1534 nvmet_tcp_free_cmd(cmds
+ i
);
1540 static void nvmet_tcp_free_cmds(struct nvmet_tcp_queue
*queue
)
1542 struct nvmet_tcp_cmd
*cmds
= queue
->cmds
;
1545 for (i
= 0; i
< queue
->nr_cmds
; i
++)
1546 nvmet_tcp_free_cmd(cmds
+ i
);
1548 nvmet_tcp_free_cmd(&queue
->connect
);
1552 static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue
*queue
)
1554 struct socket
*sock
= queue
->sock
;
1556 write_lock_bh(&sock
->sk
->sk_callback_lock
);
1557 sock
->sk
->sk_data_ready
= queue
->data_ready
;
1558 sock
->sk
->sk_state_change
= queue
->state_change
;
1559 sock
->sk
->sk_write_space
= queue
->write_space
;
1560 sock
->sk
->sk_user_data
= NULL
;
1561 write_unlock_bh(&sock
->sk
->sk_callback_lock
);
1564 static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue
*queue
)
1566 struct nvmet_tcp_cmd
*cmd
= queue
->cmds
;
1569 for (i
= 0; i
< queue
->nr_cmds
; i
++, cmd
++) {
1570 if (nvmet_tcp_need_data_in(cmd
))
1571 nvmet_req_uninit(&cmd
->req
);
1574 if (!queue
->nr_cmds
&& nvmet_tcp_need_data_in(&queue
->connect
)) {
1575 /* failed in connect */
1576 nvmet_req_uninit(&queue
->connect
.req
);
1580 static void nvmet_tcp_free_cmd_data_in_buffers(struct nvmet_tcp_queue
*queue
)
1582 struct nvmet_tcp_cmd
*cmd
= queue
->cmds
;
1585 for (i
= 0; i
< queue
->nr_cmds
; i
++, cmd
++)
1586 nvmet_tcp_free_cmd_buffers(cmd
);
1587 nvmet_tcp_free_cmd_buffers(&queue
->connect
);
1590 static void nvmet_tcp_release_queue_work(struct work_struct
*w
)
1592 struct nvmet_tcp_queue
*queue
=
1593 container_of(w
, struct nvmet_tcp_queue
, release_work
);
1595 mutex_lock(&nvmet_tcp_queue_mutex
);
1596 list_del_init(&queue
->queue_list
);
1597 mutex_unlock(&nvmet_tcp_queue_mutex
);
1599 nvmet_tcp_restore_socket_callbacks(queue
);
1600 cancel_delayed_work_sync(&queue
->tls_handshake_tmo_work
);
1601 cancel_work_sync(&queue
->io_work
);
1602 /* stop accepting incoming data */
1603 queue
->rcv_state
= NVMET_TCP_RECV_ERR
;
1605 nvmet_tcp_uninit_data_in_cmds(queue
);
1606 nvmet_sq_destroy(&queue
->nvme_sq
);
1607 cancel_work_sync(&queue
->io_work
);
1608 nvmet_tcp_free_cmd_data_in_buffers(queue
);
1609 /* ->sock will be released by fput() */
1610 fput(queue
->sock
->file
);
1611 nvmet_tcp_free_cmds(queue
);
1612 if (queue
->hdr_digest
|| queue
->data_digest
)
1613 nvmet_tcp_free_crypto(queue
);
1614 ida_free(&nvmet_tcp_queue_ida
, queue
->idx
);
1615 page_frag_cache_drain(&queue
->pf_cache
);
1619 static void nvmet_tcp_data_ready(struct sock
*sk
)
1621 struct nvmet_tcp_queue
*queue
;
1623 trace_sk_data_ready(sk
);
1625 read_lock_bh(&sk
->sk_callback_lock
);
1626 queue
= sk
->sk_user_data
;
1627 if (likely(queue
)) {
1628 if (queue
->data_ready
)
1629 queue
->data_ready(sk
);
1630 if (queue
->state
!= NVMET_TCP_Q_TLS_HANDSHAKE
)
1631 queue_work_on(queue_cpu(queue
), nvmet_tcp_wq
,
1634 read_unlock_bh(&sk
->sk_callback_lock
);
1637 static void nvmet_tcp_write_space(struct sock
*sk
)
1639 struct nvmet_tcp_queue
*queue
;
1641 read_lock_bh(&sk
->sk_callback_lock
);
1642 queue
= sk
->sk_user_data
;
1643 if (unlikely(!queue
))
1646 if (unlikely(queue
->state
== NVMET_TCP_Q_CONNECTING
)) {
1647 queue
->write_space(sk
);
1651 if (sk_stream_is_writeable(sk
)) {
1652 clear_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
);
1653 queue_work_on(queue_cpu(queue
), nvmet_tcp_wq
, &queue
->io_work
);
1656 read_unlock_bh(&sk
->sk_callback_lock
);
1659 static void nvmet_tcp_state_change(struct sock
*sk
)
1661 struct nvmet_tcp_queue
*queue
;
1663 read_lock_bh(&sk
->sk_callback_lock
);
1664 queue
= sk
->sk_user_data
;
1668 switch (sk
->sk_state
) {
1673 case TCP_CLOSE_WAIT
:
1676 nvmet_tcp_schedule_release_queue(queue
);
1679 pr_warn("queue %d unhandled state %d\n",
1680 queue
->idx
, sk
->sk_state
);
1683 read_unlock_bh(&sk
->sk_callback_lock
);
1686 static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue
*queue
)
1688 struct socket
*sock
= queue
->sock
;
1689 struct inet_sock
*inet
= inet_sk(sock
->sk
);
1692 ret
= kernel_getsockname(sock
,
1693 (struct sockaddr
*)&queue
->sockaddr
);
1697 ret
= kernel_getpeername(sock
,
1698 (struct sockaddr
*)&queue
->sockaddr_peer
);
1703 * Cleanup whatever is sitting in the TCP transmit queue on socket
1704 * close. This is done to prevent stale data from being sent should
1705 * the network connection be restored before TCP times out.
1707 sock_no_linger(sock
->sk
);
1709 if (so_priority
> 0)
1710 sock_set_priority(sock
->sk
, so_priority
);
1712 /* Set socket type of service */
1713 if (inet
->rcv_tos
> 0)
1714 ip_sock_set_tos(sock
->sk
, inet
->rcv_tos
);
1717 write_lock_bh(&sock
->sk
->sk_callback_lock
);
1718 if (sock
->sk
->sk_state
!= TCP_ESTABLISHED
) {
1720 * If the socket is already closing, don't even start
1725 sock
->sk
->sk_user_data
= queue
;
1726 queue
->data_ready
= sock
->sk
->sk_data_ready
;
1727 sock
->sk
->sk_data_ready
= nvmet_tcp_data_ready
;
1728 queue
->state_change
= sock
->sk
->sk_state_change
;
1729 sock
->sk
->sk_state_change
= nvmet_tcp_state_change
;
1730 queue
->write_space
= sock
->sk
->sk_write_space
;
1731 sock
->sk
->sk_write_space
= nvmet_tcp_write_space
;
1732 if (idle_poll_period_usecs
)
1733 nvmet_tcp_arm_queue_deadline(queue
);
1734 queue_work_on(queue_cpu(queue
), nvmet_tcp_wq
, &queue
->io_work
);
1736 write_unlock_bh(&sock
->sk
->sk_callback_lock
);
1741 #ifdef CONFIG_NVME_TARGET_TCP_TLS
1742 static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue
*queue
)
1744 struct nvme_tcp_hdr
*hdr
= &queue
->pdu
.cmd
.hdr
;
1747 .iov_base
= (u8
*)&queue
->pdu
+ queue
->offset
,
1748 .iov_len
= sizeof(struct nvme_tcp_hdr
),
1750 char cbuf
[CMSG_LEN(sizeof(char))] = {};
1751 struct msghdr msg
= {
1752 .msg_control
= cbuf
,
1753 .msg_controllen
= sizeof(cbuf
),
1754 .msg_flags
= MSG_PEEK
,
1757 if (nvmet_port_secure_channel_required(queue
->port
->nport
))
1760 len
= kernel_recvmsg(queue
->sock
, &msg
, &iov
, 1,
1761 iov
.iov_len
, msg
.msg_flags
);
1762 if (unlikely(len
< 0)) {
1763 pr_debug("queue %d: peek error %d\n",
1768 ret
= nvmet_tcp_tls_record_ok(queue
, &msg
, cbuf
);
1772 if (len
< sizeof(struct nvme_tcp_hdr
)) {
1773 pr_debug("queue %d: short read, %d bytes missing\n",
1774 queue
->idx
, (int)iov
.iov_len
- len
);
1777 pr_debug("queue %d: hdr type %d hlen %d plen %d size %d\n",
1778 queue
->idx
, hdr
->type
, hdr
->hlen
, hdr
->plen
,
1779 (int)sizeof(struct nvme_tcp_icreq_pdu
));
1780 if (hdr
->type
== nvme_tcp_icreq
&&
1781 hdr
->hlen
== sizeof(struct nvme_tcp_icreq_pdu
) &&
1782 hdr
->plen
== cpu_to_le32(sizeof(struct nvme_tcp_icreq_pdu
))) {
1783 pr_debug("queue %d: icreq detected\n",
1790 static void nvmet_tcp_tls_handshake_done(void *data
, int status
,
1791 key_serial_t peerid
)
1793 struct nvmet_tcp_queue
*queue
= data
;
1795 pr_debug("queue %d: TLS handshake done, key %x, status %d\n",
1796 queue
->idx
, peerid
, status
);
1797 spin_lock_bh(&queue
->state_lock
);
1798 if (WARN_ON(queue
->state
!= NVMET_TCP_Q_TLS_HANDSHAKE
)) {
1799 spin_unlock_bh(&queue
->state_lock
);
1803 queue
->tls_pskid
= peerid
;
1804 queue
->state
= NVMET_TCP_Q_CONNECTING
;
1806 queue
->state
= NVMET_TCP_Q_FAILED
;
1807 spin_unlock_bh(&queue
->state_lock
);
1809 cancel_delayed_work_sync(&queue
->tls_handshake_tmo_work
);
1811 nvmet_tcp_schedule_release_queue(queue
);
1813 nvmet_tcp_set_queue_sock(queue
);
1814 kref_put(&queue
->kref
, nvmet_tcp_release_queue
);
1817 static void nvmet_tcp_tls_handshake_timeout(struct work_struct
*w
)
1819 struct nvmet_tcp_queue
*queue
= container_of(to_delayed_work(w
),
1820 struct nvmet_tcp_queue
, tls_handshake_tmo_work
);
1822 pr_warn("queue %d: TLS handshake timeout\n", queue
->idx
);
1824 * If tls_handshake_cancel() fails we've lost the race with
1825 * nvmet_tcp_tls_handshake_done() */
1826 if (!tls_handshake_cancel(queue
->sock
->sk
))
1828 spin_lock_bh(&queue
->state_lock
);
1829 if (WARN_ON(queue
->state
!= NVMET_TCP_Q_TLS_HANDSHAKE
)) {
1830 spin_unlock_bh(&queue
->state_lock
);
1833 queue
->state
= NVMET_TCP_Q_FAILED
;
1834 spin_unlock_bh(&queue
->state_lock
);
1835 nvmet_tcp_schedule_release_queue(queue
);
1836 kref_put(&queue
->kref
, nvmet_tcp_release_queue
);
1839 static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue
*queue
)
1841 int ret
= -EOPNOTSUPP
;
1842 struct tls_handshake_args args
;
1844 if (queue
->state
!= NVMET_TCP_Q_TLS_HANDSHAKE
) {
1845 pr_warn("cannot start TLS in state %d\n", queue
->state
);
1849 kref_get(&queue
->kref
);
1850 pr_debug("queue %d: TLS ServerHello\n", queue
->idx
);
1851 memset(&args
, 0, sizeof(args
));
1852 args
.ta_sock
= queue
->sock
;
1853 args
.ta_done
= nvmet_tcp_tls_handshake_done
;
1854 args
.ta_data
= queue
;
1855 args
.ta_keyring
= key_serial(queue
->port
->nport
->keyring
);
1856 args
.ta_timeout_ms
= tls_handshake_timeout
* 1000;
1858 ret
= tls_server_hello_psk(&args
, GFP_KERNEL
);
1860 kref_put(&queue
->kref
, nvmet_tcp_release_queue
);
1861 pr_err("failed to start TLS, err=%d\n", ret
);
1863 queue_delayed_work(nvmet_wq
, &queue
->tls_handshake_tmo_work
,
1864 tls_handshake_timeout
* HZ
);
1869 static void nvmet_tcp_tls_handshake_timeout(struct work_struct
*w
) {}
1872 static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port
*port
,
1873 struct socket
*newsock
)
1875 struct nvmet_tcp_queue
*queue
;
1876 struct file
*sock_file
= NULL
;
1879 queue
= kzalloc(sizeof(*queue
), GFP_KERNEL
);
1885 INIT_WORK(&queue
->release_work
, nvmet_tcp_release_queue_work
);
1886 INIT_WORK(&queue
->io_work
, nvmet_tcp_io_work
);
1887 kref_init(&queue
->kref
);
1888 queue
->sock
= newsock
;
1891 spin_lock_init(&queue
->state_lock
);
1892 if (queue
->port
->nport
->disc_addr
.tsas
.tcp
.sectype
==
1893 NVMF_TCP_SECTYPE_TLS13
)
1894 queue
->state
= NVMET_TCP_Q_TLS_HANDSHAKE
;
1896 queue
->state
= NVMET_TCP_Q_CONNECTING
;
1897 INIT_LIST_HEAD(&queue
->free_list
);
1898 init_llist_head(&queue
->resp_list
);
1899 INIT_LIST_HEAD(&queue
->resp_send_list
);
1901 sock_file
= sock_alloc_file(queue
->sock
, O_CLOEXEC
, NULL
);
1902 if (IS_ERR(sock_file
)) {
1903 ret
= PTR_ERR(sock_file
);
1904 goto out_free_queue
;
1907 queue
->idx
= ida_alloc(&nvmet_tcp_queue_ida
, GFP_KERNEL
);
1908 if (queue
->idx
< 0) {
1913 ret
= nvmet_tcp_alloc_cmd(queue
, &queue
->connect
);
1915 goto out_ida_remove
;
1917 ret
= nvmet_sq_init(&queue
->nvme_sq
);
1919 goto out_free_connect
;
1921 nvmet_prepare_receive_pdu(queue
);
1923 mutex_lock(&nvmet_tcp_queue_mutex
);
1924 list_add_tail(&queue
->queue_list
, &nvmet_tcp_queue_list
);
1925 mutex_unlock(&nvmet_tcp_queue_mutex
);
1927 INIT_DELAYED_WORK(&queue
->tls_handshake_tmo_work
,
1928 nvmet_tcp_tls_handshake_timeout
);
1929 #ifdef CONFIG_NVME_TARGET_TCP_TLS
1930 if (queue
->state
== NVMET_TCP_Q_TLS_HANDSHAKE
) {
1931 struct sock
*sk
= queue
->sock
->sk
;
1933 /* Restore the default callbacks before starting upcall */
1934 read_lock_bh(&sk
->sk_callback_lock
);
1935 sk
->sk_user_data
= NULL
;
1936 sk
->sk_data_ready
= port
->data_ready
;
1937 read_unlock_bh(&sk
->sk_callback_lock
);
1938 if (!nvmet_tcp_try_peek_pdu(queue
)) {
1939 if (!nvmet_tcp_tls_handshake(queue
))
1941 /* TLS handshake failed, terminate the connection */
1942 goto out_destroy_sq
;
1944 /* Not a TLS connection, continue with normal processing */
1945 queue
->state
= NVMET_TCP_Q_CONNECTING
;
1949 ret
= nvmet_tcp_set_queue_sock(queue
);
1951 goto out_destroy_sq
;
1955 mutex_lock(&nvmet_tcp_queue_mutex
);
1956 list_del_init(&queue
->queue_list
);
1957 mutex_unlock(&nvmet_tcp_queue_mutex
);
1958 nvmet_sq_destroy(&queue
->nvme_sq
);
1960 nvmet_tcp_free_cmd(&queue
->connect
);
1962 ida_free(&nvmet_tcp_queue_ida
, queue
->idx
);
1964 fput(queue
->sock
->file
);
1968 pr_err("failed to allocate queue, error %d\n", ret
);
1970 sock_release(newsock
);
1973 static void nvmet_tcp_accept_work(struct work_struct
*w
)
1975 struct nvmet_tcp_port
*port
=
1976 container_of(w
, struct nvmet_tcp_port
, accept_work
);
1977 struct socket
*newsock
;
1981 ret
= kernel_accept(port
->sock
, &newsock
, O_NONBLOCK
);
1984 pr_warn("failed to accept err=%d\n", ret
);
1987 nvmet_tcp_alloc_queue(port
, newsock
);
1991 static void nvmet_tcp_listen_data_ready(struct sock
*sk
)
1993 struct nvmet_tcp_port
*port
;
1995 trace_sk_data_ready(sk
);
1997 read_lock_bh(&sk
->sk_callback_lock
);
1998 port
= sk
->sk_user_data
;
2002 if (sk
->sk_state
== TCP_LISTEN
)
2003 queue_work(nvmet_wq
, &port
->accept_work
);
2005 read_unlock_bh(&sk
->sk_callback_lock
);
2008 static int nvmet_tcp_add_port(struct nvmet_port
*nport
)
2010 struct nvmet_tcp_port
*port
;
2011 __kernel_sa_family_t af
;
2014 port
= kzalloc(sizeof(*port
), GFP_KERNEL
);
2018 switch (nport
->disc_addr
.adrfam
) {
2019 case NVMF_ADDR_FAMILY_IP4
:
2022 case NVMF_ADDR_FAMILY_IP6
:
2026 pr_err("address family %d not supported\n",
2027 nport
->disc_addr
.adrfam
);
2032 ret
= inet_pton_with_scope(&init_net
, af
, nport
->disc_addr
.traddr
,
2033 nport
->disc_addr
.trsvcid
, &port
->addr
);
2035 pr_err("malformed ip/port passed: %s:%s\n",
2036 nport
->disc_addr
.traddr
, nport
->disc_addr
.trsvcid
);
2040 port
->nport
= nport
;
2041 INIT_WORK(&port
->accept_work
, nvmet_tcp_accept_work
);
2042 if (port
->nport
->inline_data_size
< 0)
2043 port
->nport
->inline_data_size
= NVMET_TCP_DEF_INLINE_DATA_SIZE
;
2045 ret
= sock_create(port
->addr
.ss_family
, SOCK_STREAM
,
2046 IPPROTO_TCP
, &port
->sock
);
2048 pr_err("failed to create a socket\n");
2052 port
->sock
->sk
->sk_user_data
= port
;
2053 port
->data_ready
= port
->sock
->sk
->sk_data_ready
;
2054 port
->sock
->sk
->sk_data_ready
= nvmet_tcp_listen_data_ready
;
2055 sock_set_reuseaddr(port
->sock
->sk
);
2056 tcp_sock_set_nodelay(port
->sock
->sk
);
2057 if (so_priority
> 0)
2058 sock_set_priority(port
->sock
->sk
, so_priority
);
2060 ret
= kernel_bind(port
->sock
, (struct sockaddr
*)&port
->addr
,
2061 sizeof(port
->addr
));
2063 pr_err("failed to bind port socket %d\n", ret
);
2067 ret
= kernel_listen(port
->sock
, NVMET_TCP_BACKLOG
);
2069 pr_err("failed to listen %d on port sock\n", ret
);
2074 pr_info("enabling port %d (%pISpc)\n",
2075 le16_to_cpu(nport
->disc_addr
.portid
), &port
->addr
);
2080 sock_release(port
->sock
);
2086 static void nvmet_tcp_destroy_port_queues(struct nvmet_tcp_port
*port
)
2088 struct nvmet_tcp_queue
*queue
;
2090 mutex_lock(&nvmet_tcp_queue_mutex
);
2091 list_for_each_entry(queue
, &nvmet_tcp_queue_list
, queue_list
)
2092 if (queue
->port
== port
)
2093 kernel_sock_shutdown(queue
->sock
, SHUT_RDWR
);
2094 mutex_unlock(&nvmet_tcp_queue_mutex
);
2097 static void nvmet_tcp_remove_port(struct nvmet_port
*nport
)
2099 struct nvmet_tcp_port
*port
= nport
->priv
;
2101 write_lock_bh(&port
->sock
->sk
->sk_callback_lock
);
2102 port
->sock
->sk
->sk_data_ready
= port
->data_ready
;
2103 port
->sock
->sk
->sk_user_data
= NULL
;
2104 write_unlock_bh(&port
->sock
->sk
->sk_callback_lock
);
2105 cancel_work_sync(&port
->accept_work
);
2107 * Destroy the remaining queues, which are not belong to any
2110 nvmet_tcp_destroy_port_queues(port
);
2112 sock_release(port
->sock
);
2116 static void nvmet_tcp_delete_ctrl(struct nvmet_ctrl
*ctrl
)
2118 struct nvmet_tcp_queue
*queue
;
2120 mutex_lock(&nvmet_tcp_queue_mutex
);
2121 list_for_each_entry(queue
, &nvmet_tcp_queue_list
, queue_list
)
2122 if (queue
->nvme_sq
.ctrl
== ctrl
)
2123 kernel_sock_shutdown(queue
->sock
, SHUT_RDWR
);
2124 mutex_unlock(&nvmet_tcp_queue_mutex
);
2127 static u16
nvmet_tcp_install_queue(struct nvmet_sq
*sq
)
2129 struct nvmet_tcp_queue
*queue
=
2130 container_of(sq
, struct nvmet_tcp_queue
, nvme_sq
);
2133 struct nvmet_tcp_queue
*q
;
2136 /* Check for pending controller teardown */
2137 mutex_lock(&nvmet_tcp_queue_mutex
);
2138 list_for_each_entry(q
, &nvmet_tcp_queue_list
, queue_list
) {
2139 if (q
->nvme_sq
.ctrl
== sq
->ctrl
&&
2140 q
->state
== NVMET_TCP_Q_DISCONNECTING
)
2143 mutex_unlock(&nvmet_tcp_queue_mutex
);
2144 if (pending
> NVMET_TCP_BACKLOG
)
2145 return NVME_SC_CONNECT_CTRL_BUSY
;
2148 queue
->nr_cmds
= sq
->size
* 2;
2149 if (nvmet_tcp_alloc_cmds(queue
)) {
2151 return NVME_SC_INTERNAL
;
2156 static void nvmet_tcp_disc_port_addr(struct nvmet_req
*req
,
2157 struct nvmet_port
*nport
, char *traddr
)
2159 struct nvmet_tcp_port
*port
= nport
->priv
;
2161 if (inet_addr_is_any((struct sockaddr
*)&port
->addr
)) {
2162 struct nvmet_tcp_cmd
*cmd
=
2163 container_of(req
, struct nvmet_tcp_cmd
, req
);
2164 struct nvmet_tcp_queue
*queue
= cmd
->queue
;
2166 sprintf(traddr
, "%pISc", (struct sockaddr
*)&queue
->sockaddr
);
2168 memcpy(traddr
, nport
->disc_addr
.traddr
, NVMF_TRADDR_SIZE
);
2172 static ssize_t
nvmet_tcp_host_port_addr(struct nvmet_ctrl
*ctrl
,
2173 char *traddr
, size_t traddr_len
)
2175 struct nvmet_sq
*sq
= ctrl
->sqs
[0];
2176 struct nvmet_tcp_queue
*queue
=
2177 container_of(sq
, struct nvmet_tcp_queue
, nvme_sq
);
2179 if (queue
->sockaddr_peer
.ss_family
== AF_UNSPEC
)
2181 return snprintf(traddr
, traddr_len
, "%pISc",
2182 (struct sockaddr
*)&queue
->sockaddr_peer
);
2185 static const struct nvmet_fabrics_ops nvmet_tcp_ops
= {
2186 .owner
= THIS_MODULE
,
2187 .type
= NVMF_TRTYPE_TCP
,
2189 .add_port
= nvmet_tcp_add_port
,
2190 .remove_port
= nvmet_tcp_remove_port
,
2191 .queue_response
= nvmet_tcp_queue_response
,
2192 .delete_ctrl
= nvmet_tcp_delete_ctrl
,
2193 .install_queue
= nvmet_tcp_install_queue
,
2194 .disc_traddr
= nvmet_tcp_disc_port_addr
,
2195 .host_traddr
= nvmet_tcp_host_port_addr
,
2198 static int __init
nvmet_tcp_init(void)
2202 nvmet_tcp_wq
= alloc_workqueue("nvmet_tcp_wq",
2203 WQ_MEM_RECLAIM
| WQ_HIGHPRI
, 0);
2207 ret
= nvmet_register_transport(&nvmet_tcp_ops
);
2213 destroy_workqueue(nvmet_tcp_wq
);
2217 static void __exit
nvmet_tcp_exit(void)
2219 struct nvmet_tcp_queue
*queue
;
2221 nvmet_unregister_transport(&nvmet_tcp_ops
);
2223 flush_workqueue(nvmet_wq
);
2224 mutex_lock(&nvmet_tcp_queue_mutex
);
2225 list_for_each_entry(queue
, &nvmet_tcp_queue_list
, queue_list
)
2226 kernel_sock_shutdown(queue
->sock
, SHUT_RDWR
);
2227 mutex_unlock(&nvmet_tcp_queue_mutex
);
2228 flush_workqueue(nvmet_wq
);
2230 destroy_workqueue(nvmet_tcp_wq
);
2231 ida_destroy(&nvmet_tcp_queue_ida
);
2234 module_init(nvmet_tcp_init
);
2235 module_exit(nvmet_tcp_exit
);
2237 MODULE_DESCRIPTION("NVMe target TCP transport driver");
2238 MODULE_LICENSE("GPL v2");
2239 MODULE_ALIAS("nvmet-transport-3"); /* 3 == NVMF_TRTYPE_TCP */