1 // SPDX-License-Identifier: GPL-2.0
3 * NVMe I/O command implementation.
4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 #include <linux/blkdev.h>
8 #include <linux/blk-integrity.h>
9 #include <linux/memremap.h>
10 #include <linux/module.h>
13 void nvmet_bdev_set_limits(struct block_device
*bdev
, struct nvme_id_ns
*id
)
15 /* Logical blocks per physical block, 0's based. */
16 const __le16 lpp0b
= to0based(bdev_physical_block_size(bdev
) /
17 bdev_logical_block_size(bdev
));
20 * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
21 * NAWUPF, and NACWU are defined for this namespace and should be
22 * used by the host for this namespace instead of the AWUN, AWUPF,
23 * and ACWU fields in the Identify Controller data structure. If
24 * any of these fields are zero that means that the corresponding
25 * field from the identify controller data structure should be used.
33 * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
34 * NOWS are defined for this namespace and should be used by
35 * the host for I/O optimization.
38 /* NPWG = Namespace Preferred Write Granularity. 0's based */
40 /* NPWA = Namespace Preferred Write Alignment. 0's based */
42 /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
43 id
->npdg
= to0based(bdev_discard_granularity(bdev
) /
44 bdev_logical_block_size(bdev
));
45 /* NPDG = Namespace Preferred Deallocate Alignment */
47 /* NOWS = Namespace Optimal Write Size */
48 id
->nows
= to0based(bdev_io_opt(bdev
) / bdev_logical_block_size(bdev
));
51 void nvmet_bdev_ns_disable(struct nvmet_ns
*ns
)
60 static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns
*ns
)
62 struct blk_integrity
*bi
= bdev_get_integrity(ns
->bdev
);
67 if (bi
->csum_type
== BLK_INTEGRITY_CSUM_CRC
) {
68 ns
->metadata_size
= bi
->tuple_size
;
69 if (bi
->flags
& BLK_INTEGRITY_REF_TAG
)
70 ns
->pi_type
= NVME_NS_DPS_PI_TYPE1
;
72 ns
->pi_type
= NVME_NS_DPS_PI_TYPE3
;
74 ns
->metadata_size
= 0;
78 int nvmet_bdev_ns_enable(struct nvmet_ns
*ns
)
83 * When buffered_io namespace attribute is enabled that means user want
84 * this block device to be used as a file, so block device can take
85 * an advantage of cache.
90 ns
->bdev_file
= bdev_file_open_by_path(ns
->device_path
,
91 BLK_OPEN_READ
| BLK_OPEN_WRITE
, NULL
, NULL
);
92 if (IS_ERR(ns
->bdev_file
)) {
93 ret
= PTR_ERR(ns
->bdev_file
);
94 if (ret
!= -ENOTBLK
) {
95 pr_err("failed to open block device %s: (%d)\n",
96 ns
->device_path
, ret
);
101 ns
->bdev
= file_bdev(ns
->bdev_file
);
102 ns
->size
= bdev_nr_bytes(ns
->bdev
);
103 ns
->blksize_shift
= blksize_bits(bdev_logical_block_size(ns
->bdev
));
106 ns
->metadata_size
= 0;
107 if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY
))
108 nvmet_bdev_ns_enable_integrity(ns
);
110 if (bdev_is_zoned(ns
->bdev
)) {
111 if (!nvmet_bdev_zns_enable(ns
)) {
112 nvmet_bdev_ns_disable(ns
);
115 ns
->csi
= NVME_CSI_ZNS
;
121 void nvmet_bdev_ns_revalidate(struct nvmet_ns
*ns
)
123 ns
->size
= bdev_nr_bytes(ns
->bdev
);
126 u16
blk_to_nvme_status(struct nvmet_req
*req
, blk_status_t blk_sts
)
128 u16 status
= NVME_SC_SUCCESS
;
130 if (likely(blk_sts
== BLK_STS_OK
))
133 * Right now there exists M : 1 mapping between block layer error
134 * to the NVMe status code (see nvme_error_status()). For consistency,
135 * when we reverse map we use most appropriate NVMe Status code from
136 * the group of the NVMe staus codes used in the nvme_error_status().
140 status
= NVME_SC_CAP_EXCEEDED
| NVME_STATUS_DNR
;
141 req
->error_loc
= offsetof(struct nvme_rw_command
, length
);
144 status
= NVME_SC_LBA_RANGE
| NVME_STATUS_DNR
;
145 req
->error_loc
= offsetof(struct nvme_rw_command
, slba
);
147 case BLK_STS_NOTSUPP
:
148 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
149 switch (req
->cmd
->common
.opcode
) {
151 case nvme_cmd_write_zeroes
:
152 status
= NVME_SC_ONCS_NOT_SUPPORTED
| NVME_STATUS_DNR
;
155 status
= NVME_SC_INVALID_OPCODE
| NVME_STATUS_DNR
;
159 status
= NVME_SC_ACCESS_DENIED
;
160 req
->error_loc
= offsetof(struct nvme_rw_command
, nsid
);
164 status
= NVME_SC_INTERNAL
| NVME_STATUS_DNR
;
165 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
168 switch (req
->cmd
->common
.opcode
) {
171 req
->error_slba
= le64_to_cpu(req
->cmd
->rw
.slba
);
173 case nvme_cmd_write_zeroes
:
175 le64_to_cpu(req
->cmd
->write_zeroes
.slba
);
183 static void nvmet_bio_done(struct bio
*bio
)
185 struct nvmet_req
*req
= bio
->bi_private
;
187 nvmet_req_complete(req
, blk_to_nvme_status(req
, bio
->bi_status
));
188 nvmet_req_bio_put(req
, bio
);
191 #ifdef CONFIG_BLK_DEV_INTEGRITY
192 static int nvmet_bdev_alloc_bip(struct nvmet_req
*req
, struct bio
*bio
,
193 struct sg_mapping_iter
*miter
)
195 struct blk_integrity
*bi
;
196 struct bio_integrity_payload
*bip
;
200 bi
= bdev_get_integrity(req
->ns
->bdev
);
202 pr_err("Unable to locate bio_integrity\n");
206 bip
= bio_integrity_alloc(bio
, GFP_NOIO
,
207 bio_max_segs(req
->metadata_sg_cnt
));
209 pr_err("Unable to allocate bio_integrity_payload\n");
213 /* virtual start sector must be in integrity interval units */
214 bip_set_seed(bip
, bio
->bi_iter
.bi_sector
>>
215 (bi
->interval_exp
- SECTOR_SHIFT
));
217 resid
= bio_integrity_bytes(bi
, bio_sectors(bio
));
218 while (resid
> 0 && sg_miter_next(miter
)) {
219 len
= min_t(size_t, miter
->length
, resid
);
220 rc
= bio_integrity_add_page(bio
, miter
->page
, len
,
221 offset_in_page(miter
->addr
));
222 if (unlikely(rc
!= len
)) {
223 pr_err("bio_integrity_add_page() failed; %d\n", rc
);
224 sg_miter_stop(miter
);
229 if (len
< miter
->length
)
230 miter
->consumed
-= miter
->length
- len
;
232 sg_miter_stop(miter
);
237 static int nvmet_bdev_alloc_bip(struct nvmet_req
*req
, struct bio
*bio
,
238 struct sg_mapping_iter
*miter
)
242 #endif /* CONFIG_BLK_DEV_INTEGRITY */
244 static void nvmet_bdev_execute_rw(struct nvmet_req
*req
)
246 unsigned int sg_cnt
= req
->sg_cnt
;
248 struct scatterlist
*sg
;
249 struct blk_plug plug
;
253 struct sg_mapping_iter prot_miter
;
254 unsigned int iter_flags
;
255 unsigned int total_len
= nvmet_rw_data_len(req
) + req
->metadata_len
;
257 if (!nvmet_check_transfer_len(req
, total_len
))
261 nvmet_req_complete(req
, 0);
265 if (req
->cmd
->rw
.opcode
== nvme_cmd_write
) {
266 opf
= REQ_OP_WRITE
| REQ_SYNC
| REQ_IDLE
;
267 if (req
->cmd
->rw
.control
& cpu_to_le16(NVME_RW_FUA
))
269 iter_flags
= SG_MITER_TO_SG
;
272 iter_flags
= SG_MITER_FROM_SG
;
275 if (is_pci_p2pdma_page(sg_page(req
->sg
)))
278 sector
= nvmet_lba_to_sect(req
->ns
, req
->cmd
->rw
.slba
);
280 if (nvmet_use_inline_bvec(req
)) {
281 bio
= &req
->b
.inline_bio
;
282 bio_init(bio
, req
->ns
->bdev
, req
->inline_bvec
,
283 ARRAY_SIZE(req
->inline_bvec
), opf
);
285 bio
= bio_alloc(req
->ns
->bdev
, bio_max_segs(sg_cnt
), opf
,
288 bio
->bi_iter
.bi_sector
= sector
;
289 bio
->bi_private
= req
;
290 bio
->bi_end_io
= nvmet_bio_done
;
292 blk_start_plug(&plug
);
293 if (req
->metadata_len
)
294 sg_miter_start(&prot_miter
, req
->metadata_sg
,
295 req
->metadata_sg_cnt
, iter_flags
);
297 for_each_sg(req
->sg
, sg
, req
->sg_cnt
, i
) {
298 while (bio_add_page(bio
, sg_page(sg
), sg
->length
, sg
->offset
)
300 struct bio
*prev
= bio
;
302 if (req
->metadata_len
) {
303 rc
= nvmet_bdev_alloc_bip(req
, bio
,
311 bio
= bio_alloc(req
->ns
->bdev
, bio_max_segs(sg_cnt
),
313 bio
->bi_iter
.bi_sector
= sector
;
315 bio_chain(bio
, prev
);
319 sector
+= sg
->length
>> 9;
323 if (req
->metadata_len
) {
324 rc
= nvmet_bdev_alloc_bip(req
, bio
, &prot_miter
);
332 blk_finish_plug(&plug
);
335 static void nvmet_bdev_execute_flush(struct nvmet_req
*req
)
337 struct bio
*bio
= &req
->b
.inline_bio
;
339 if (!bdev_write_cache(req
->ns
->bdev
)) {
340 nvmet_req_complete(req
, NVME_SC_SUCCESS
);
344 if (!nvmet_check_transfer_len(req
, 0))
347 bio_init(bio
, req
->ns
->bdev
, req
->inline_bvec
,
348 ARRAY_SIZE(req
->inline_bvec
), REQ_OP_WRITE
| REQ_PREFLUSH
);
349 bio
->bi_private
= req
;
350 bio
->bi_end_io
= nvmet_bio_done
;
355 u16
nvmet_bdev_flush(struct nvmet_req
*req
)
357 if (!bdev_write_cache(req
->ns
->bdev
))
360 if (blkdev_issue_flush(req
->ns
->bdev
))
361 return NVME_SC_INTERNAL
| NVME_STATUS_DNR
;
365 static u16
nvmet_bdev_discard_range(struct nvmet_req
*req
,
366 struct nvme_dsm_range
*range
, struct bio
**bio
)
368 struct nvmet_ns
*ns
= req
->ns
;
371 ret
= __blkdev_issue_discard(ns
->bdev
,
372 nvmet_lba_to_sect(ns
, range
->slba
),
373 le32_to_cpu(range
->nlb
) << (ns
->blksize_shift
- 9),
375 if (ret
&& ret
!= -EOPNOTSUPP
) {
376 req
->error_slba
= le64_to_cpu(range
->slba
);
377 return errno_to_nvme_status(req
, ret
);
379 return NVME_SC_SUCCESS
;
382 static void nvmet_bdev_execute_discard(struct nvmet_req
*req
)
384 struct nvme_dsm_range range
;
385 struct bio
*bio
= NULL
;
389 for (i
= 0; i
<= le32_to_cpu(req
->cmd
->dsm
.nr
); i
++) {
390 status
= nvmet_copy_from_sgl(req
, i
* sizeof(range
), &range
,
395 status
= nvmet_bdev_discard_range(req
, &range
, &bio
);
401 bio
->bi_private
= req
;
402 bio
->bi_end_io
= nvmet_bio_done
;
408 nvmet_req_complete(req
, status
);
412 static void nvmet_bdev_execute_dsm(struct nvmet_req
*req
)
414 if (!nvmet_check_data_len_lte(req
, nvmet_dsm_len(req
)))
417 switch (le32_to_cpu(req
->cmd
->dsm
.attributes
)) {
419 nvmet_bdev_execute_discard(req
);
421 case NVME_DSMGMT_IDR
:
422 case NVME_DSMGMT_IDW
:
424 /* Not supported yet */
425 nvmet_req_complete(req
, 0);
430 static void nvmet_bdev_execute_write_zeroes(struct nvmet_req
*req
)
432 struct nvme_write_zeroes_cmd
*write_zeroes
= &req
->cmd
->write_zeroes
;
433 struct bio
*bio
= NULL
;
438 if (!nvmet_check_transfer_len(req
, 0))
441 sector
= nvmet_lba_to_sect(req
->ns
, write_zeroes
->slba
);
442 nr_sector
= (((sector_t
)le16_to_cpu(write_zeroes
->length
) + 1) <<
443 (req
->ns
->blksize_shift
- 9));
445 ret
= __blkdev_issue_zeroout(req
->ns
->bdev
, sector
, nr_sector
,
446 GFP_KERNEL
, &bio
, 0);
448 bio
->bi_private
= req
;
449 bio
->bi_end_io
= nvmet_bio_done
;
452 nvmet_req_complete(req
, errno_to_nvme_status(req
, ret
));
456 u16
nvmet_bdev_parse_io_cmd(struct nvmet_req
*req
)
458 switch (req
->cmd
->common
.opcode
) {
461 req
->execute
= nvmet_bdev_execute_rw
;
462 if (req
->sq
->ctrl
->pi_support
&& nvmet_ns_has_pi(req
->ns
))
463 req
->metadata_len
= nvmet_rw_metadata_len(req
);
466 req
->execute
= nvmet_bdev_execute_flush
;
469 req
->execute
= nvmet_bdev_execute_dsm
;
471 case nvme_cmd_write_zeroes
:
472 req
->execute
= nvmet_bdev_execute_write_zeroes
;
475 return nvmet_report_invalid_opcode(req
);