1 // SPDX-License-Identifier: GPL-2.0
3 * NVMe I/O command implementation.
4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 #include <linux/blkdev.h>
8 #include <linux/module.h>
11 void nvmet_bdev_set_limits(struct block_device
*bdev
, struct nvme_id_ns
*id
)
13 const struct queue_limits
*ql
= &bdev_get_queue(bdev
)->limits
;
14 /* Number of logical blocks per physical block. */
15 const u32 lpp
= ql
->physical_block_size
/ ql
->logical_block_size
;
16 /* Logical blocks per physical block, 0's based. */
17 const __le16 lpp0b
= to0based(lpp
);
20 * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
21 * NAWUPF, and NACWU are defined for this namespace and should be
22 * used by the host for this namespace instead of the AWUN, AWUPF,
23 * and ACWU fields in the Identify Controller data structure. If
24 * any of these fields are zero that means that the corresponding
25 * field from the identify controller data structure should be used.
33 * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
34 * NOWS are defined for this namespace and should be used by
35 * the host for I/O optimization.
38 /* NPWG = Namespace Preferred Write Granularity. 0's based */
40 /* NPWA = Namespace Preferred Write Alignment. 0's based */
42 /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
43 id
->npdg
= to0based(ql
->discard_granularity
/ ql
->logical_block_size
);
44 /* NPDG = Namespace Preferred Deallocate Alignment */
46 /* NOWS = Namespace Optimal Write Size */
47 id
->nows
= to0based(ql
->io_opt
/ ql
->logical_block_size
);
50 static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns
*ns
)
52 struct blk_integrity
*bi
= bdev_get_integrity(ns
->bdev
);
55 ns
->metadata_size
= bi
->tuple_size
;
56 if (bi
->profile
== &t10_pi_type1_crc
)
57 ns
->pi_type
= NVME_NS_DPS_PI_TYPE1
;
58 else if (bi
->profile
== &t10_pi_type3_crc
)
59 ns
->pi_type
= NVME_NS_DPS_PI_TYPE3
;
61 /* Unsupported metadata type */
62 ns
->metadata_size
= 0;
66 int nvmet_bdev_ns_enable(struct nvmet_ns
*ns
)
70 ns
->bdev
= blkdev_get_by_path(ns
->device_path
,
71 FMODE_READ
| FMODE_WRITE
, NULL
);
72 if (IS_ERR(ns
->bdev
)) {
73 ret
= PTR_ERR(ns
->bdev
);
74 if (ret
!= -ENOTBLK
) {
75 pr_err("failed to open block device %s: (%ld)\n",
76 ns
->device_path
, PTR_ERR(ns
->bdev
));
81 ns
->size
= i_size_read(ns
->bdev
->bd_inode
);
82 ns
->blksize_shift
= blksize_bits(bdev_logical_block_size(ns
->bdev
));
85 ns
->metadata_size
= 0;
86 if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10
))
87 nvmet_bdev_ns_enable_integrity(ns
);
92 void nvmet_bdev_ns_disable(struct nvmet_ns
*ns
)
95 blkdev_put(ns
->bdev
, FMODE_WRITE
| FMODE_READ
);
100 void nvmet_bdev_ns_revalidate(struct nvmet_ns
*ns
)
102 ns
->size
= i_size_read(ns
->bdev
->bd_inode
);
105 static u16
blk_to_nvme_status(struct nvmet_req
*req
, blk_status_t blk_sts
)
107 u16 status
= NVME_SC_SUCCESS
;
109 if (likely(blk_sts
== BLK_STS_OK
))
112 * Right now there exists M : 1 mapping between block layer error
113 * to the NVMe status code (see nvme_error_status()). For consistency,
114 * when we reverse map we use most appropriate NVMe Status code from
115 * the group of the NVMe staus codes used in the nvme_error_status().
119 status
= NVME_SC_CAP_EXCEEDED
| NVME_SC_DNR
;
120 req
->error_loc
= offsetof(struct nvme_rw_command
, length
);
123 status
= NVME_SC_LBA_RANGE
| NVME_SC_DNR
;
124 req
->error_loc
= offsetof(struct nvme_rw_command
, slba
);
126 case BLK_STS_NOTSUPP
:
127 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
128 switch (req
->cmd
->common
.opcode
) {
130 case nvme_cmd_write_zeroes
:
131 status
= NVME_SC_ONCS_NOT_SUPPORTED
| NVME_SC_DNR
;
134 status
= NVME_SC_INVALID_OPCODE
| NVME_SC_DNR
;
138 status
= NVME_SC_ACCESS_DENIED
;
139 req
->error_loc
= offsetof(struct nvme_rw_command
, nsid
);
143 status
= NVME_SC_INTERNAL
| NVME_SC_DNR
;
144 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
147 switch (req
->cmd
->common
.opcode
) {
150 req
->error_slba
= le64_to_cpu(req
->cmd
->rw
.slba
);
152 case nvme_cmd_write_zeroes
:
154 le64_to_cpu(req
->cmd
->write_zeroes
.slba
);
162 static void nvmet_bio_done(struct bio
*bio
)
164 struct nvmet_req
*req
= bio
->bi_private
;
166 nvmet_req_complete(req
, blk_to_nvme_status(req
, bio
->bi_status
));
167 if (bio
!= &req
->b
.inline_bio
)
171 #ifdef CONFIG_BLK_DEV_INTEGRITY
172 static int nvmet_bdev_alloc_bip(struct nvmet_req
*req
, struct bio
*bio
,
173 struct sg_mapping_iter
*miter
)
175 struct blk_integrity
*bi
;
176 struct bio_integrity_payload
*bip
;
177 struct block_device
*bdev
= req
->ns
->bdev
;
181 bi
= bdev_get_integrity(bdev
);
183 pr_err("Unable to locate bio_integrity\n");
187 bip
= bio_integrity_alloc(bio
, GFP_NOIO
,
188 min_t(unsigned int, req
->metadata_sg_cnt
, BIO_MAX_PAGES
));
190 pr_err("Unable to allocate bio_integrity_payload\n");
194 bip
->bip_iter
.bi_size
= bio_integrity_bytes(bi
, bio_sectors(bio
));
195 /* virtual start sector must be in integrity interval units */
196 bip_set_seed(bip
, bio
->bi_iter
.bi_sector
>>
197 (bi
->interval_exp
- SECTOR_SHIFT
));
199 resid
= bip
->bip_iter
.bi_size
;
200 while (resid
> 0 && sg_miter_next(miter
)) {
201 len
= min_t(size_t, miter
->length
, resid
);
202 rc
= bio_integrity_add_page(bio
, miter
->page
, len
,
203 offset_in_page(miter
->addr
));
204 if (unlikely(rc
!= len
)) {
205 pr_err("bio_integrity_add_page() failed; %d\n", rc
);
206 sg_miter_stop(miter
);
211 if (len
< miter
->length
)
212 miter
->consumed
-= miter
->length
- len
;
214 sg_miter_stop(miter
);
219 static int nvmet_bdev_alloc_bip(struct nvmet_req
*req
, struct bio
*bio
,
220 struct sg_mapping_iter
*miter
)
224 #endif /* CONFIG_BLK_DEV_INTEGRITY */
226 static void nvmet_bdev_execute_rw(struct nvmet_req
*req
)
228 int sg_cnt
= req
->sg_cnt
;
230 struct scatterlist
*sg
;
231 struct blk_plug plug
;
234 struct sg_mapping_iter prot_miter
;
235 unsigned int iter_flags
;
236 unsigned int total_len
= nvmet_rw_data_len(req
) + req
->metadata_len
;
238 if (!nvmet_check_transfer_len(req
, total_len
))
242 nvmet_req_complete(req
, 0);
246 if (req
->cmd
->rw
.opcode
== nvme_cmd_write
) {
247 op
= REQ_OP_WRITE
| REQ_SYNC
| REQ_IDLE
;
248 if (req
->cmd
->rw
.control
& cpu_to_le16(NVME_RW_FUA
))
250 iter_flags
= SG_MITER_TO_SG
;
253 iter_flags
= SG_MITER_FROM_SG
;
256 if (is_pci_p2pdma_page(sg_page(req
->sg
)))
259 sector
= le64_to_cpu(req
->cmd
->rw
.slba
);
260 sector
<<= (req
->ns
->blksize_shift
- 9);
262 if (req
->transfer_len
<= NVMET_MAX_INLINE_DATA_LEN
) {
263 bio
= &req
->b
.inline_bio
;
264 bio_init(bio
, req
->inline_bvec
, ARRAY_SIZE(req
->inline_bvec
));
266 bio
= bio_alloc(GFP_KERNEL
, min(sg_cnt
, BIO_MAX_PAGES
));
268 bio_set_dev(bio
, req
->ns
->bdev
);
269 bio
->bi_iter
.bi_sector
= sector
;
270 bio
->bi_private
= req
;
271 bio
->bi_end_io
= nvmet_bio_done
;
274 blk_start_plug(&plug
);
275 if (req
->metadata_len
)
276 sg_miter_start(&prot_miter
, req
->metadata_sg
,
277 req
->metadata_sg_cnt
, iter_flags
);
279 for_each_sg(req
->sg
, sg
, req
->sg_cnt
, i
) {
280 while (bio_add_page(bio
, sg_page(sg
), sg
->length
, sg
->offset
)
282 struct bio
*prev
= bio
;
284 if (req
->metadata_len
) {
285 rc
= nvmet_bdev_alloc_bip(req
, bio
,
293 bio
= bio_alloc(GFP_KERNEL
, min(sg_cnt
, BIO_MAX_PAGES
));
294 bio_set_dev(bio
, req
->ns
->bdev
);
295 bio
->bi_iter
.bi_sector
= sector
;
298 bio_chain(bio
, prev
);
302 sector
+= sg
->length
>> 9;
306 if (req
->metadata_len
) {
307 rc
= nvmet_bdev_alloc_bip(req
, bio
, &prot_miter
);
315 blk_finish_plug(&plug
);
318 static void nvmet_bdev_execute_flush(struct nvmet_req
*req
)
320 struct bio
*bio
= &req
->b
.inline_bio
;
322 if (!nvmet_check_transfer_len(req
, 0))
325 bio_init(bio
, req
->inline_bvec
, ARRAY_SIZE(req
->inline_bvec
));
326 bio_set_dev(bio
, req
->ns
->bdev
);
327 bio
->bi_private
= req
;
328 bio
->bi_end_io
= nvmet_bio_done
;
329 bio
->bi_opf
= REQ_OP_WRITE
| REQ_PREFLUSH
;
334 u16
nvmet_bdev_flush(struct nvmet_req
*req
)
336 if (blkdev_issue_flush(req
->ns
->bdev
, GFP_KERNEL
))
337 return NVME_SC_INTERNAL
| NVME_SC_DNR
;
341 static u16
nvmet_bdev_discard_range(struct nvmet_req
*req
,
342 struct nvme_dsm_range
*range
, struct bio
**bio
)
344 struct nvmet_ns
*ns
= req
->ns
;
347 ret
= __blkdev_issue_discard(ns
->bdev
,
348 le64_to_cpu(range
->slba
) << (ns
->blksize_shift
- 9),
349 le32_to_cpu(range
->nlb
) << (ns
->blksize_shift
- 9),
351 if (ret
&& ret
!= -EOPNOTSUPP
) {
352 req
->error_slba
= le64_to_cpu(range
->slba
);
353 return errno_to_nvme_status(req
, ret
);
355 return NVME_SC_SUCCESS
;
358 static void nvmet_bdev_execute_discard(struct nvmet_req
*req
)
360 struct nvme_dsm_range range
;
361 struct bio
*bio
= NULL
;
365 for (i
= 0; i
<= le32_to_cpu(req
->cmd
->dsm
.nr
); i
++) {
366 status
= nvmet_copy_from_sgl(req
, i
* sizeof(range
), &range
,
371 status
= nvmet_bdev_discard_range(req
, &range
, &bio
);
377 bio
->bi_private
= req
;
378 bio
->bi_end_io
= nvmet_bio_done
;
384 nvmet_req_complete(req
, status
);
388 static void nvmet_bdev_execute_dsm(struct nvmet_req
*req
)
390 if (!nvmet_check_data_len_lte(req
, nvmet_dsm_len(req
)))
393 switch (le32_to_cpu(req
->cmd
->dsm
.attributes
)) {
395 nvmet_bdev_execute_discard(req
);
397 case NVME_DSMGMT_IDR
:
398 case NVME_DSMGMT_IDW
:
400 /* Not supported yet */
401 nvmet_req_complete(req
, 0);
406 static void nvmet_bdev_execute_write_zeroes(struct nvmet_req
*req
)
408 struct nvme_write_zeroes_cmd
*write_zeroes
= &req
->cmd
->write_zeroes
;
409 struct bio
*bio
= NULL
;
414 if (!nvmet_check_transfer_len(req
, 0))
417 sector
= le64_to_cpu(write_zeroes
->slba
) <<
418 (req
->ns
->blksize_shift
- 9);
419 nr_sector
= (((sector_t
)le16_to_cpu(write_zeroes
->length
) + 1) <<
420 (req
->ns
->blksize_shift
- 9));
422 ret
= __blkdev_issue_zeroout(req
->ns
->bdev
, sector
, nr_sector
,
423 GFP_KERNEL
, &bio
, 0);
425 bio
->bi_private
= req
;
426 bio
->bi_end_io
= nvmet_bio_done
;
429 nvmet_req_complete(req
, errno_to_nvme_status(req
, ret
));
433 u16
nvmet_bdev_parse_io_cmd(struct nvmet_req
*req
)
435 struct nvme_command
*cmd
= req
->cmd
;
437 switch (cmd
->common
.opcode
) {
440 req
->execute
= nvmet_bdev_execute_rw
;
441 if (req
->sq
->ctrl
->pi_support
&& nvmet_ns_has_pi(req
->ns
))
442 req
->metadata_len
= nvmet_rw_metadata_len(req
);
445 req
->execute
= nvmet_bdev_execute_flush
;
448 req
->execute
= nvmet_bdev_execute_dsm
;
450 case nvme_cmd_write_zeroes
:
451 req
->execute
= nvmet_bdev_execute_write_zeroes
;
454 pr_err("unhandled cmd %d on qid %d\n", cmd
->common
.opcode
,
456 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
457 return NVME_SC_INVALID_OPCODE
| NVME_SC_DNR
;