2 * NVMe I/O command implementation.
3 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/blkdev.h>
16 #include <linux/module.h>
19 int nvmet_bdev_ns_enable(struct nvmet_ns
*ns
)
23 ns
->bdev
= blkdev_get_by_path(ns
->device_path
,
24 FMODE_READ
| FMODE_WRITE
, NULL
);
25 if (IS_ERR(ns
->bdev
)) {
26 ret
= PTR_ERR(ns
->bdev
);
27 if (ret
!= -ENOTBLK
) {
28 pr_err("failed to open block device %s: (%ld)\n",
29 ns
->device_path
, PTR_ERR(ns
->bdev
));
34 ns
->size
= i_size_read(ns
->bdev
->bd_inode
);
35 ns
->blksize_shift
= blksize_bits(bdev_logical_block_size(ns
->bdev
));
39 void nvmet_bdev_ns_disable(struct nvmet_ns
*ns
)
42 blkdev_put(ns
->bdev
, FMODE_WRITE
| FMODE_READ
);
47 static u16
blk_to_nvme_status(struct nvmet_req
*req
, blk_status_t blk_sts
)
49 u16 status
= NVME_SC_SUCCESS
;
51 if (likely(blk_sts
== BLK_STS_OK
))
54 * Right now there exists M : 1 mapping between block layer error
55 * to the NVMe status code (see nvme_error_status()). For consistency,
56 * when we reverse map we use most appropriate NVMe Status code from
57 * the group of the NVMe staus codes used in the nvme_error_status().
61 status
= NVME_SC_CAP_EXCEEDED
| NVME_SC_DNR
;
62 req
->error_loc
= offsetof(struct nvme_rw_command
, length
);
65 status
= NVME_SC_LBA_RANGE
| NVME_SC_DNR
;
66 req
->error_loc
= offsetof(struct nvme_rw_command
, slba
);
69 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
70 switch (req
->cmd
->common
.opcode
) {
72 case nvme_cmd_write_zeroes
:
73 status
= NVME_SC_ONCS_NOT_SUPPORTED
| NVME_SC_DNR
;
76 status
= NVME_SC_INVALID_OPCODE
| NVME_SC_DNR
;
80 status
= NVME_SC_ACCESS_DENIED
;
81 req
->error_loc
= offsetof(struct nvme_rw_command
, nsid
);
86 status
= NVME_SC_INTERNAL
| NVME_SC_DNR
;
87 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
90 switch (req
->cmd
->common
.opcode
) {
93 req
->error_slba
= le64_to_cpu(req
->cmd
->rw
.slba
);
95 case nvme_cmd_write_zeroes
:
97 le64_to_cpu(req
->cmd
->write_zeroes
.slba
);
105 static void nvmet_bio_done(struct bio
*bio
)
107 struct nvmet_req
*req
= bio
->bi_private
;
109 nvmet_req_complete(req
, blk_to_nvme_status(req
, bio
->bi_status
));
110 if (bio
!= &req
->b
.inline_bio
)
114 static void nvmet_bdev_execute_rw(struct nvmet_req
*req
)
116 int sg_cnt
= req
->sg_cnt
;
118 struct scatterlist
*sg
;
120 int op
, op_flags
= 0, i
;
123 nvmet_req_complete(req
, 0);
127 if (req
->cmd
->rw
.opcode
== nvme_cmd_write
) {
129 op_flags
= REQ_SYNC
| REQ_IDLE
;
130 if (req
->cmd
->rw
.control
& cpu_to_le16(NVME_RW_FUA
))
136 if (is_pci_p2pdma_page(sg_page(req
->sg
)))
137 op_flags
|= REQ_NOMERGE
;
139 sector
= le64_to_cpu(req
->cmd
->rw
.slba
);
140 sector
<<= (req
->ns
->blksize_shift
- 9);
142 if (req
->data_len
<= NVMET_MAX_INLINE_DATA_LEN
) {
143 bio
= &req
->b
.inline_bio
;
144 bio_init(bio
, req
->inline_bvec
, ARRAY_SIZE(req
->inline_bvec
));
146 bio
= bio_alloc(GFP_KERNEL
, min(sg_cnt
, BIO_MAX_PAGES
));
148 bio_set_dev(bio
, req
->ns
->bdev
);
149 bio
->bi_iter
.bi_sector
= sector
;
150 bio
->bi_private
= req
;
151 bio
->bi_end_io
= nvmet_bio_done
;
152 bio_set_op_attrs(bio
, op
, op_flags
);
154 for_each_sg(req
->sg
, sg
, req
->sg_cnt
, i
) {
155 while (bio_add_page(bio
, sg_page(sg
), sg
->length
, sg
->offset
)
157 struct bio
*prev
= bio
;
159 bio
= bio_alloc(GFP_KERNEL
, min(sg_cnt
, BIO_MAX_PAGES
));
160 bio_set_dev(bio
, req
->ns
->bdev
);
161 bio
->bi_iter
.bi_sector
= sector
;
162 bio_set_op_attrs(bio
, op
, op_flags
);
164 bio_chain(bio
, prev
);
168 sector
+= sg
->length
>> 9;
175 static void nvmet_bdev_execute_flush(struct nvmet_req
*req
)
177 struct bio
*bio
= &req
->b
.inline_bio
;
179 bio_init(bio
, req
->inline_bvec
, ARRAY_SIZE(req
->inline_bvec
));
180 bio_set_dev(bio
, req
->ns
->bdev
);
181 bio
->bi_private
= req
;
182 bio
->bi_end_io
= nvmet_bio_done
;
183 bio
->bi_opf
= REQ_OP_WRITE
| REQ_PREFLUSH
;
188 u16
nvmet_bdev_flush(struct nvmet_req
*req
)
190 if (blkdev_issue_flush(req
->ns
->bdev
, GFP_KERNEL
, NULL
))
191 return NVME_SC_INTERNAL
| NVME_SC_DNR
;
195 static u16
nvmet_bdev_discard_range(struct nvmet_req
*req
,
196 struct nvme_dsm_range
*range
, struct bio
**bio
)
198 struct nvmet_ns
*ns
= req
->ns
;
201 ret
= __blkdev_issue_discard(ns
->bdev
,
202 le64_to_cpu(range
->slba
) << (ns
->blksize_shift
- 9),
203 le32_to_cpu(range
->nlb
) << (ns
->blksize_shift
- 9),
207 req
->error_slba
= le64_to_cpu(range
->slba
);
209 return blk_to_nvme_status(req
, errno_to_blk_status(ret
));
212 static void nvmet_bdev_execute_discard(struct nvmet_req
*req
)
214 struct nvme_dsm_range range
;
215 struct bio
*bio
= NULL
;
219 for (i
= 0; i
<= le32_to_cpu(req
->cmd
->dsm
.nr
); i
++) {
220 status
= nvmet_copy_from_sgl(req
, i
* sizeof(range
), &range
,
225 status
= nvmet_bdev_discard_range(req
, &range
, &bio
);
231 bio
->bi_private
= req
;
232 bio
->bi_end_io
= nvmet_bio_done
;
234 bio
->bi_status
= BLK_STS_IOERR
;
240 nvmet_req_complete(req
, status
);
244 static void nvmet_bdev_execute_dsm(struct nvmet_req
*req
)
246 switch (le32_to_cpu(req
->cmd
->dsm
.attributes
)) {
248 nvmet_bdev_execute_discard(req
);
250 case NVME_DSMGMT_IDR
:
251 case NVME_DSMGMT_IDW
:
253 /* Not supported yet */
254 nvmet_req_complete(req
, 0);
259 static void nvmet_bdev_execute_write_zeroes(struct nvmet_req
*req
)
261 struct nvme_write_zeroes_cmd
*write_zeroes
= &req
->cmd
->write_zeroes
;
262 struct bio
*bio
= NULL
;
263 u16 status
= NVME_SC_SUCCESS
;
268 sector
= le64_to_cpu(write_zeroes
->slba
) <<
269 (req
->ns
->blksize_shift
- 9);
270 nr_sector
= (((sector_t
)le16_to_cpu(write_zeroes
->length
) + 1) <<
271 (req
->ns
->blksize_shift
- 9));
273 ret
= __blkdev_issue_zeroout(req
->ns
->bdev
, sector
, nr_sector
,
274 GFP_KERNEL
, &bio
, 0);
275 status
= blk_to_nvme_status(req
, errno_to_blk_status(ret
));
277 bio
->bi_private
= req
;
278 bio
->bi_end_io
= nvmet_bio_done
;
281 nvmet_req_complete(req
, status
);
285 u16
nvmet_bdev_parse_io_cmd(struct nvmet_req
*req
)
287 struct nvme_command
*cmd
= req
->cmd
;
289 switch (cmd
->common
.opcode
) {
292 req
->execute
= nvmet_bdev_execute_rw
;
293 req
->data_len
= nvmet_rw_len(req
);
296 req
->execute
= nvmet_bdev_execute_flush
;
300 req
->execute
= nvmet_bdev_execute_dsm
;
301 req
->data_len
= (le32_to_cpu(cmd
->dsm
.nr
) + 1) *
302 sizeof(struct nvme_dsm_range
);
304 case nvme_cmd_write_zeroes
:
305 req
->execute
= nvmet_bdev_execute_write_zeroes
;
308 pr_err("unhandled cmd %d on qid %d\n", cmd
->common
.opcode
,
310 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
311 return NVME_SC_INVALID_OPCODE
| NVME_SC_DNR
;