1 // SPDX-License-Identifier: GPL-2.0
3 * NVMe I/O command implementation.
4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 #include <linux/blkdev.h>
8 #include <linux/module.h>
11 void nvmet_bdev_set_limits(struct block_device
*bdev
, struct nvme_id_ns
*id
)
13 const struct queue_limits
*ql
= &bdev_get_queue(bdev
)->limits
;
14 /* Number of logical blocks per physical block. */
15 const u32 lpp
= ql
->physical_block_size
/ ql
->logical_block_size
;
16 /* Logical blocks per physical block, 0's based. */
17 const __le16 lpp0b
= to0based(lpp
);
20 * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
21 * NAWUPF, and NACWU are defined for this namespace and should be
22 * used by the host for this namespace instead of the AWUN, AWUPF,
23 * and ACWU fields in the Identify Controller data structure. If
24 * any of these fields are zero that means that the corresponding
25 * field from the identify controller data structure should be used.
33 * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
34 * NOWS are defined for this namespace and should be used by
35 * the host for I/O optimization.
38 /* NPWG = Namespace Preferred Write Granularity. 0's based */
40 /* NPWA = Namespace Preferred Write Alignment. 0's based */
42 /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
43 id
->npdg
= to0based(ql
->discard_granularity
/ ql
->logical_block_size
);
44 /* NPDG = Namespace Preferred Deallocate Alignment */
46 /* NOWS = Namespace Optimal Write Size */
47 id
->nows
= to0based(ql
->io_opt
/ ql
->logical_block_size
);
50 int nvmet_bdev_ns_enable(struct nvmet_ns
*ns
)
54 ns
->bdev
= blkdev_get_by_path(ns
->device_path
,
55 FMODE_READ
| FMODE_WRITE
, NULL
);
56 if (IS_ERR(ns
->bdev
)) {
57 ret
= PTR_ERR(ns
->bdev
);
58 if (ret
!= -ENOTBLK
) {
59 pr_err("failed to open block device %s: (%ld)\n",
60 ns
->device_path
, PTR_ERR(ns
->bdev
));
65 ns
->size
= i_size_read(ns
->bdev
->bd_inode
);
66 ns
->blksize_shift
= blksize_bits(bdev_logical_block_size(ns
->bdev
));
70 void nvmet_bdev_ns_disable(struct nvmet_ns
*ns
)
73 blkdev_put(ns
->bdev
, FMODE_WRITE
| FMODE_READ
);
78 static u16
blk_to_nvme_status(struct nvmet_req
*req
, blk_status_t blk_sts
)
80 u16 status
= NVME_SC_SUCCESS
;
82 if (likely(blk_sts
== BLK_STS_OK
))
85 * Right now there exists M : 1 mapping between block layer error
86 * to the NVMe status code (see nvme_error_status()). For consistency,
87 * when we reverse map we use most appropriate NVMe Status code from
88 * the group of the NVMe staus codes used in the nvme_error_status().
92 status
= NVME_SC_CAP_EXCEEDED
| NVME_SC_DNR
;
93 req
->error_loc
= offsetof(struct nvme_rw_command
, length
);
96 status
= NVME_SC_LBA_RANGE
| NVME_SC_DNR
;
97 req
->error_loc
= offsetof(struct nvme_rw_command
, slba
);
100 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
101 switch (req
->cmd
->common
.opcode
) {
103 case nvme_cmd_write_zeroes
:
104 status
= NVME_SC_ONCS_NOT_SUPPORTED
| NVME_SC_DNR
;
107 status
= NVME_SC_INVALID_OPCODE
| NVME_SC_DNR
;
111 status
= NVME_SC_ACCESS_DENIED
;
112 req
->error_loc
= offsetof(struct nvme_rw_command
, nsid
);
117 status
= NVME_SC_INTERNAL
| NVME_SC_DNR
;
118 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
121 switch (req
->cmd
->common
.opcode
) {
124 req
->error_slba
= le64_to_cpu(req
->cmd
->rw
.slba
);
126 case nvme_cmd_write_zeroes
:
128 le64_to_cpu(req
->cmd
->write_zeroes
.slba
);
136 static void nvmet_bio_done(struct bio
*bio
)
138 struct nvmet_req
*req
= bio
->bi_private
;
140 nvmet_req_complete(req
, blk_to_nvme_status(req
, bio
->bi_status
));
141 if (bio
!= &req
->b
.inline_bio
)
145 static void nvmet_bdev_execute_rw(struct nvmet_req
*req
)
147 int sg_cnt
= req
->sg_cnt
;
149 struct scatterlist
*sg
;
150 struct blk_plug plug
;
154 if (!nvmet_check_data_len(req
, nvmet_rw_len(req
)))
158 nvmet_req_complete(req
, 0);
162 if (req
->cmd
->rw
.opcode
== nvme_cmd_write
) {
163 op
= REQ_OP_WRITE
| REQ_SYNC
| REQ_IDLE
;
164 if (req
->cmd
->rw
.control
& cpu_to_le16(NVME_RW_FUA
))
170 if (is_pci_p2pdma_page(sg_page(req
->sg
)))
173 sector
= le64_to_cpu(req
->cmd
->rw
.slba
);
174 sector
<<= (req
->ns
->blksize_shift
- 9);
176 if (req
->transfer_len
<= NVMET_MAX_INLINE_DATA_LEN
) {
177 bio
= &req
->b
.inline_bio
;
178 bio_init(bio
, req
->inline_bvec
, ARRAY_SIZE(req
->inline_bvec
));
180 bio
= bio_alloc(GFP_KERNEL
, min(sg_cnt
, BIO_MAX_PAGES
));
182 bio_set_dev(bio
, req
->ns
->bdev
);
183 bio
->bi_iter
.bi_sector
= sector
;
184 bio
->bi_private
= req
;
185 bio
->bi_end_io
= nvmet_bio_done
;
188 blk_start_plug(&plug
);
189 for_each_sg(req
->sg
, sg
, req
->sg_cnt
, i
) {
190 while (bio_add_page(bio
, sg_page(sg
), sg
->length
, sg
->offset
)
192 struct bio
*prev
= bio
;
194 bio
= bio_alloc(GFP_KERNEL
, min(sg_cnt
, BIO_MAX_PAGES
));
195 bio_set_dev(bio
, req
->ns
->bdev
);
196 bio
->bi_iter
.bi_sector
= sector
;
199 bio_chain(bio
, prev
);
203 sector
+= sg
->length
>> 9;
208 blk_finish_plug(&plug
);
211 static void nvmet_bdev_execute_flush(struct nvmet_req
*req
)
213 struct bio
*bio
= &req
->b
.inline_bio
;
215 if (!nvmet_check_data_len(req
, 0))
218 bio_init(bio
, req
->inline_bvec
, ARRAY_SIZE(req
->inline_bvec
));
219 bio_set_dev(bio
, req
->ns
->bdev
);
220 bio
->bi_private
= req
;
221 bio
->bi_end_io
= nvmet_bio_done
;
222 bio
->bi_opf
= REQ_OP_WRITE
| REQ_PREFLUSH
;
227 u16
nvmet_bdev_flush(struct nvmet_req
*req
)
229 if (blkdev_issue_flush(req
->ns
->bdev
, GFP_KERNEL
, NULL
))
230 return NVME_SC_INTERNAL
| NVME_SC_DNR
;
234 static u16
nvmet_bdev_discard_range(struct nvmet_req
*req
,
235 struct nvme_dsm_range
*range
, struct bio
**bio
)
237 struct nvmet_ns
*ns
= req
->ns
;
240 ret
= __blkdev_issue_discard(ns
->bdev
,
241 le64_to_cpu(range
->slba
) << (ns
->blksize_shift
- 9),
242 le32_to_cpu(range
->nlb
) << (ns
->blksize_shift
- 9),
244 if (ret
&& ret
!= -EOPNOTSUPP
) {
245 req
->error_slba
= le64_to_cpu(range
->slba
);
246 return errno_to_nvme_status(req
, ret
);
248 return NVME_SC_SUCCESS
;
251 static void nvmet_bdev_execute_discard(struct nvmet_req
*req
)
253 struct nvme_dsm_range range
;
254 struct bio
*bio
= NULL
;
258 for (i
= 0; i
<= le32_to_cpu(req
->cmd
->dsm
.nr
); i
++) {
259 status
= nvmet_copy_from_sgl(req
, i
* sizeof(range
), &range
,
264 status
= nvmet_bdev_discard_range(req
, &range
, &bio
);
270 bio
->bi_private
= req
;
271 bio
->bi_end_io
= nvmet_bio_done
;
277 nvmet_req_complete(req
, status
);
281 static void nvmet_bdev_execute_dsm(struct nvmet_req
*req
)
283 if (!nvmet_check_data_len(req
, nvmet_dsm_len(req
)))
286 switch (le32_to_cpu(req
->cmd
->dsm
.attributes
)) {
288 nvmet_bdev_execute_discard(req
);
290 case NVME_DSMGMT_IDR
:
291 case NVME_DSMGMT_IDW
:
293 /* Not supported yet */
294 nvmet_req_complete(req
, 0);
299 static void nvmet_bdev_execute_write_zeroes(struct nvmet_req
*req
)
301 struct nvme_write_zeroes_cmd
*write_zeroes
= &req
->cmd
->write_zeroes
;
302 struct bio
*bio
= NULL
;
307 if (!nvmet_check_data_len(req
, 0))
310 sector
= le64_to_cpu(write_zeroes
->slba
) <<
311 (req
->ns
->blksize_shift
- 9);
312 nr_sector
= (((sector_t
)le16_to_cpu(write_zeroes
->length
) + 1) <<
313 (req
->ns
->blksize_shift
- 9));
315 ret
= __blkdev_issue_zeroout(req
->ns
->bdev
, sector
, nr_sector
,
316 GFP_KERNEL
, &bio
, 0);
318 bio
->bi_private
= req
;
319 bio
->bi_end_io
= nvmet_bio_done
;
322 nvmet_req_complete(req
, errno_to_nvme_status(req
, ret
));
326 u16
nvmet_bdev_parse_io_cmd(struct nvmet_req
*req
)
328 struct nvme_command
*cmd
= req
->cmd
;
330 switch (cmd
->common
.opcode
) {
333 req
->execute
= nvmet_bdev_execute_rw
;
336 req
->execute
= nvmet_bdev_execute_flush
;
339 req
->execute
= nvmet_bdev_execute_dsm
;
341 case nvme_cmd_write_zeroes
:
342 req
->execute
= nvmet_bdev_execute_write_zeroes
;
345 pr_err("unhandled cmd %d on qid %d\n", cmd
->common
.opcode
,
347 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
348 return NVME_SC_INVALID_OPCODE
| NVME_SC_DNR
;