1 // SPDX-License-Identifier: GPL-2.0
3 * nvme-lightnvm.c - LightNVM NVMe device
5 * Copyright (C) 2014-2015 IT University of Copenhagen
6 * Initial release: Matias Bjorling <mb@lightnvm.io>
11 #include <linux/nvme.h>
12 #include <linux/bitops.h>
13 #include <linux/lightnvm.h>
14 #include <linux/vmalloc.h>
15 #include <linux/sched/sysctl.h>
16 #include <uapi/linux/lightnvm.h>
18 enum nvme_nvm_admin_opcode
{
19 nvme_nvm_admin_identity
= 0xe2,
20 nvme_nvm_admin_get_bb_tbl
= 0xf2,
21 nvme_nvm_admin_set_bb_tbl
= 0xf1,
24 enum nvme_nvm_log_page
{
25 NVME_NVM_LOG_REPORT_CHUNK
= 0xca,
28 struct nvme_nvm_ph_rw
{
44 struct nvme_nvm_erase_blk
{
59 struct nvme_nvm_identity
{
70 struct nvme_nvm_getbbtbl
{
82 struct nvme_nvm_setbbtbl
{
97 struct nvme_nvm_command
{
99 struct nvme_common_command common
;
100 struct nvme_nvm_ph_rw ph_rw
;
101 struct nvme_nvm_erase_blk erase
;
102 struct nvme_nvm_identity identity
;
103 struct nvme_nvm_getbbtbl get_bb
;
104 struct nvme_nvm_setbbtbl set_bb
;
108 struct nvme_nvm_id12_grp
{
134 struct nvme_nvm_id12_addrf
{
150 struct nvme_nvm_id12
{
157 struct nvme_nvm_id12_addrf ppaf
;
159 struct nvme_nvm_id12_grp grp
;
163 struct nvme_nvm_bb_tbl
{
177 struct nvme_nvm_id20_addrf
{
185 struct nvme_nvm_id20
{
190 struct nvme_nvm_id20_addrf lbaf
;
205 /* Write data requirements */
213 /* Performance related metrics */
225 /* Vendor specific */
229 struct nvme_nvm_chk_meta
{
240 * Check we didn't inadvertently grow the command struct
242 static inline void _nvme_nvm_check_size(void)
244 BUILD_BUG_ON(sizeof(struct nvme_nvm_identity
) != 64);
245 BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw
) != 64);
246 BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk
) != 64);
247 BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl
) != 64);
248 BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl
) != 64);
249 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp
) != 960);
250 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf
) != 16);
251 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12
) != NVME_IDENTIFY_DATA_SIZE
);
252 BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl
) != 64);
253 BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf
) != 8);
254 BUILD_BUG_ON(sizeof(struct nvme_nvm_id20
) != NVME_IDENTIFY_DATA_SIZE
);
255 BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta
) != 32);
256 BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta
) !=
257 sizeof(struct nvm_chk_meta
));
260 static void nvme_nvm_set_addr_12(struct nvm_addrf_12
*dst
,
261 struct nvme_nvm_id12_addrf
*src
)
263 dst
->ch_len
= src
->ch_len
;
264 dst
->lun_len
= src
->lun_len
;
265 dst
->blk_len
= src
->blk_len
;
266 dst
->pg_len
= src
->pg_len
;
267 dst
->pln_len
= src
->pln_len
;
268 dst
->sec_len
= src
->sec_len
;
270 dst
->ch_offset
= src
->ch_offset
;
271 dst
->lun_offset
= src
->lun_offset
;
272 dst
->blk_offset
= src
->blk_offset
;
273 dst
->pg_offset
= src
->pg_offset
;
274 dst
->pln_offset
= src
->pln_offset
;
275 dst
->sec_offset
= src
->sec_offset
;
277 dst
->ch_mask
= ((1ULL << dst
->ch_len
) - 1) << dst
->ch_offset
;
278 dst
->lun_mask
= ((1ULL << dst
->lun_len
) - 1) << dst
->lun_offset
;
279 dst
->blk_mask
= ((1ULL << dst
->blk_len
) - 1) << dst
->blk_offset
;
280 dst
->pg_mask
= ((1ULL << dst
->pg_len
) - 1) << dst
->pg_offset
;
281 dst
->pln_mask
= ((1ULL << dst
->pln_len
) - 1) << dst
->pln_offset
;
282 dst
->sec_mask
= ((1ULL << dst
->sec_len
) - 1) << dst
->sec_offset
;
285 static int nvme_nvm_setup_12(struct nvme_nvm_id12
*id
,
288 struct nvme_nvm_id12_grp
*src
;
289 int sec_per_pg
, sec_per_pl
, pg_per_blk
;
296 if (src
->mtype
!= 0) {
297 pr_err("nvm: memory type not supported\n");
301 /* 1.2 spec. only reports a single version id - unfold */
302 geo
->major_ver_id
= id
->ver_id
;
303 geo
->minor_ver_id
= 2;
305 /* Set compacted version for upper layers */
306 geo
->version
= NVM_OCSSD_SPEC_12
;
308 geo
->num_ch
= src
->num_ch
;
309 geo
->num_lun
= src
->num_lun
;
310 geo
->all_luns
= geo
->num_ch
* geo
->num_lun
;
312 geo
->num_chk
= le16_to_cpu(src
->num_chk
);
314 geo
->csecs
= le16_to_cpu(src
->csecs
);
315 geo
->sos
= le16_to_cpu(src
->sos
);
317 pg_per_blk
= le16_to_cpu(src
->num_pg
);
318 sec_per_pg
= le16_to_cpu(src
->fpg_sz
) / geo
->csecs
;
319 sec_per_pl
= sec_per_pg
* src
->num_pln
;
320 geo
->clba
= sec_per_pl
* pg_per_blk
;
322 geo
->all_chunks
= geo
->all_luns
* geo
->num_chk
;
323 geo
->total_secs
= geo
->clba
* geo
->all_chunks
;
325 geo
->ws_min
= sec_per_pg
;
326 geo
->ws_opt
= sec_per_pg
;
327 geo
->mw_cunits
= geo
->ws_opt
<< 3; /* default to MLC safe values */
329 /* Do not impose values for maximum number of open blocks as it is
330 * unspecified in 1.2. Users of 1.2 must be aware of this and eventually
331 * specify these values through a quirk if restrictions apply.
333 geo
->maxoc
= geo
->all_luns
* geo
->num_chk
;
334 geo
->maxocpu
= geo
->num_chk
;
336 geo
->mccap
= le32_to_cpu(src
->mccap
);
338 geo
->trdt
= le32_to_cpu(src
->trdt
);
339 geo
->trdm
= le32_to_cpu(src
->trdm
);
340 geo
->tprt
= le32_to_cpu(src
->tprt
);
341 geo
->tprm
= le32_to_cpu(src
->tprm
);
342 geo
->tbet
= le32_to_cpu(src
->tbet
);
343 geo
->tbem
= le32_to_cpu(src
->tbem
);
345 /* 1.2 compatibility */
346 geo
->vmnt
= id
->vmnt
;
347 geo
->cap
= le32_to_cpu(id
->cap
);
348 geo
->dom
= le32_to_cpu(id
->dom
);
350 geo
->mtype
= src
->mtype
;
351 geo
->fmtype
= src
->fmtype
;
353 geo
->cpar
= le16_to_cpu(src
->cpar
);
354 geo
->mpos
= le32_to_cpu(src
->mpos
);
356 geo
->pln_mode
= NVM_PLANE_SINGLE
;
358 if (geo
->mpos
& 0x020202) {
359 geo
->pln_mode
= NVM_PLANE_DOUBLE
;
361 } else if (geo
->mpos
& 0x040404) {
362 geo
->pln_mode
= NVM_PLANE_QUAD
;
366 geo
->num_pln
= src
->num_pln
;
367 geo
->num_pg
= le16_to_cpu(src
->num_pg
);
368 geo
->fpg_sz
= le16_to_cpu(src
->fpg_sz
);
370 nvme_nvm_set_addr_12((struct nvm_addrf_12
*)&geo
->addrf
, &id
->ppaf
);
375 static void nvme_nvm_set_addr_20(struct nvm_addrf
*dst
,
376 struct nvme_nvm_id20_addrf
*src
)
378 dst
->ch_len
= src
->grp_len
;
379 dst
->lun_len
= src
->pu_len
;
380 dst
->chk_len
= src
->chk_len
;
381 dst
->sec_len
= src
->lba_len
;
384 dst
->chk_offset
= dst
->sec_len
;
385 dst
->lun_offset
= dst
->chk_offset
+ dst
->chk_len
;
386 dst
->ch_offset
= dst
->lun_offset
+ dst
->lun_len
;
388 dst
->ch_mask
= ((1ULL << dst
->ch_len
) - 1) << dst
->ch_offset
;
389 dst
->lun_mask
= ((1ULL << dst
->lun_len
) - 1) << dst
->lun_offset
;
390 dst
->chk_mask
= ((1ULL << dst
->chk_len
) - 1) << dst
->chk_offset
;
391 dst
->sec_mask
= ((1ULL << dst
->sec_len
) - 1) << dst
->sec_offset
;
394 static int nvme_nvm_setup_20(struct nvme_nvm_id20
*id
,
397 geo
->major_ver_id
= id
->mjr
;
398 geo
->minor_ver_id
= id
->mnr
;
400 /* Set compacted version for upper layers */
401 geo
->version
= NVM_OCSSD_SPEC_20
;
403 geo
->num_ch
= le16_to_cpu(id
->num_grp
);
404 geo
->num_lun
= le16_to_cpu(id
->num_pu
);
405 geo
->all_luns
= geo
->num_ch
* geo
->num_lun
;
407 geo
->num_chk
= le32_to_cpu(id
->num_chk
);
408 geo
->clba
= le32_to_cpu(id
->clba
);
410 geo
->all_chunks
= geo
->all_luns
* geo
->num_chk
;
411 geo
->total_secs
= geo
->clba
* geo
->all_chunks
;
413 geo
->ws_min
= le32_to_cpu(id
->ws_min
);
414 geo
->ws_opt
= le32_to_cpu(id
->ws_opt
);
415 geo
->mw_cunits
= le32_to_cpu(id
->mw_cunits
);
416 geo
->maxoc
= le32_to_cpu(id
->maxoc
);
417 geo
->maxocpu
= le32_to_cpu(id
->maxocpu
);
419 geo
->trdt
= le32_to_cpu(id
->trdt
);
420 geo
->trdm
= le32_to_cpu(id
->trdm
);
421 geo
->tprt
= le32_to_cpu(id
->twrt
);
422 geo
->tprm
= le32_to_cpu(id
->twrm
);
423 geo
->tbet
= le32_to_cpu(id
->tcrst
);
424 geo
->tbem
= le32_to_cpu(id
->tcrsm
);
426 nvme_nvm_set_addr_20(&geo
->addrf
, &id
->lbaf
);
431 static int nvme_nvm_identity(struct nvm_dev
*nvmdev
)
433 struct nvme_ns
*ns
= nvmdev
->q
->queuedata
;
434 struct nvme_nvm_id12
*id
;
435 struct nvme_nvm_command c
= {};
438 c
.identity
.opcode
= nvme_nvm_admin_identity
;
439 c
.identity
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
441 id
= kmalloc(sizeof(struct nvme_nvm_id12
), GFP_KERNEL
);
445 ret
= nvme_submit_sync_cmd(ns
->ctrl
->admin_q
, (struct nvme_command
*)&c
,
446 id
, sizeof(struct nvme_nvm_id12
));
453 * The 1.2 and 2.0 specifications share the first byte in their geometry
454 * command to make it possible to know what version a device implements.
456 switch (id
->ver_id
) {
458 ret
= nvme_nvm_setup_12(id
, &nvmdev
->geo
);
461 ret
= nvme_nvm_setup_20((struct nvme_nvm_id20
*)id
,
465 dev_err(ns
->ctrl
->device
, "OCSSD revision not supported (%d)\n",
475 static int nvme_nvm_get_bb_tbl(struct nvm_dev
*nvmdev
, struct ppa_addr ppa
,
478 struct request_queue
*q
= nvmdev
->q
;
479 struct nvm_geo
*geo
= &nvmdev
->geo
;
480 struct nvme_ns
*ns
= q
->queuedata
;
481 struct nvme_ctrl
*ctrl
= ns
->ctrl
;
482 struct nvme_nvm_command c
= {};
483 struct nvme_nvm_bb_tbl
*bb_tbl
;
484 int nr_blks
= geo
->num_chk
* geo
->num_pln
;
485 int tblsz
= sizeof(struct nvme_nvm_bb_tbl
) + nr_blks
;
488 c
.get_bb
.opcode
= nvme_nvm_admin_get_bb_tbl
;
489 c
.get_bb
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
490 c
.get_bb
.spba
= cpu_to_le64(ppa
.ppa
);
492 bb_tbl
= kzalloc(tblsz
, GFP_KERNEL
);
496 ret
= nvme_submit_sync_cmd(ctrl
->admin_q
, (struct nvme_command
*)&c
,
499 dev_err(ctrl
->device
, "get bad block table failed (%d)\n", ret
);
504 if (bb_tbl
->tblid
[0] != 'B' || bb_tbl
->tblid
[1] != 'B' ||
505 bb_tbl
->tblid
[2] != 'L' || bb_tbl
->tblid
[3] != 'T') {
506 dev_err(ctrl
->device
, "bbt format mismatch\n");
511 if (le16_to_cpu(bb_tbl
->verid
) != 1) {
513 dev_err(ctrl
->device
, "bbt version not supported\n");
517 if (le32_to_cpu(bb_tbl
->tblks
) != nr_blks
) {
519 dev_err(ctrl
->device
,
520 "bbt unsuspected blocks returned (%u!=%u)",
521 le32_to_cpu(bb_tbl
->tblks
), nr_blks
);
525 memcpy(blks
, bb_tbl
->blk
, geo
->num_chk
* geo
->num_pln
);
531 static int nvme_nvm_set_bb_tbl(struct nvm_dev
*nvmdev
, struct ppa_addr
*ppas
,
532 int nr_ppas
, int type
)
534 struct nvme_ns
*ns
= nvmdev
->q
->queuedata
;
535 struct nvme_nvm_command c
= {};
538 c
.set_bb
.opcode
= nvme_nvm_admin_set_bb_tbl
;
539 c
.set_bb
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
540 c
.set_bb
.spba
= cpu_to_le64(ppas
->ppa
);
541 c
.set_bb
.nlb
= cpu_to_le16(nr_ppas
- 1);
542 c
.set_bb
.value
= type
;
544 ret
= nvme_submit_sync_cmd(ns
->ctrl
->admin_q
, (struct nvme_command
*)&c
,
547 dev_err(ns
->ctrl
->device
, "set bad block table failed (%d)\n",
553 * Expect the lba in device format
555 static int nvme_nvm_get_chk_meta(struct nvm_dev
*ndev
,
556 sector_t slba
, int nchks
,
557 struct nvm_chk_meta
*meta
)
559 struct nvm_geo
*geo
= &ndev
->geo
;
560 struct nvme_ns
*ns
= ndev
->q
->queuedata
;
561 struct nvme_ctrl
*ctrl
= ns
->ctrl
;
562 struct nvme_nvm_chk_meta
*dev_meta
, *dev_meta_off
;
564 size_t left
= nchks
* sizeof(struct nvme_nvm_chk_meta
);
565 size_t log_pos
, offset
, len
;
570 * limit requests to maximum 256K to avoid issuing arbitrary large
571 * requests when the device does not specific a maximum transfer size.
573 max_len
= min_t(unsigned int, ctrl
->max_hw_sectors
<< 9, 256 * 1024);
575 dev_meta
= kmalloc(max_len
, GFP_KERNEL
);
579 /* Normalize lba address space to obtain log offset */
581 ppa
= dev_to_generic_addr(ndev
, ppa
);
584 log_pos
+= ppa
.m
.pu
* geo
->num_chk
;
585 log_pos
+= ppa
.m
.grp
* geo
->num_lun
* geo
->num_chk
;
587 offset
= log_pos
* sizeof(struct nvme_nvm_chk_meta
);
590 len
= min_t(unsigned int, left
, max_len
);
592 memset(dev_meta
, 0, max_len
);
593 dev_meta_off
= dev_meta
;
595 ret
= nvme_get_log(ctrl
, ns
->head
->ns_id
,
596 NVME_NVM_LOG_REPORT_CHUNK
, 0, dev_meta
, len
,
599 dev_err(ctrl
->device
, "Get REPORT CHUNK log error\n");
603 for (i
= 0; i
< len
; i
+= sizeof(struct nvme_nvm_chk_meta
)) {
604 meta
->state
= dev_meta_off
->state
;
605 meta
->type
= dev_meta_off
->type
;
606 meta
->wi
= dev_meta_off
->wi
;
607 meta
->slba
= le64_to_cpu(dev_meta_off
->slba
);
608 meta
->cnlb
= le64_to_cpu(dev_meta_off
->cnlb
);
609 meta
->wp
= le64_to_cpu(dev_meta_off
->wp
);
624 static inline void nvme_nvm_rqtocmd(struct nvm_rq
*rqd
, struct nvme_ns
*ns
,
625 struct nvme_nvm_command
*c
)
627 c
->ph_rw
.opcode
= rqd
->opcode
;
628 c
->ph_rw
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
629 c
->ph_rw
.spba
= cpu_to_le64(rqd
->ppa_addr
.ppa
);
630 c
->ph_rw
.metadata
= cpu_to_le64(rqd
->dma_meta_list
);
631 c
->ph_rw
.control
= cpu_to_le16(rqd
->flags
);
632 c
->ph_rw
.length
= cpu_to_le16(rqd
->nr_ppas
- 1);
635 static void nvme_nvm_end_io(struct request
*rq
, blk_status_t status
)
637 struct nvm_rq
*rqd
= rq
->end_io_data
;
639 rqd
->ppa_status
= le64_to_cpu(nvme_req(rq
)->result
.u64
);
640 rqd
->error
= nvme_req(rq
)->status
;
643 kfree(nvme_req(rq
)->cmd
);
644 blk_mq_free_request(rq
);
647 static struct request
*nvme_nvm_alloc_request(struct request_queue
*q
,
649 struct nvme_nvm_command
*cmd
)
651 struct nvme_ns
*ns
= q
->queuedata
;
654 nvme_nvm_rqtocmd(rqd
, ns
, cmd
);
656 rq
= nvme_alloc_request(q
, (struct nvme_command
*)cmd
, 0, NVME_QID_ANY
);
660 rq
->cmd_flags
&= ~REQ_FAILFAST_DRIVER
;
663 blk_rq_append_bio(rq
, &rqd
->bio
);
665 rq
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, IOPRIO_NORM
);
670 static int nvme_nvm_submit_io(struct nvm_dev
*dev
, struct nvm_rq
*rqd
,
673 struct nvm_geo
*geo
= &dev
->geo
;
674 struct request_queue
*q
= dev
->q
;
675 struct nvme_nvm_command
*cmd
;
679 cmd
= kzalloc(sizeof(struct nvme_nvm_command
), GFP_KERNEL
);
683 rq
= nvme_nvm_alloc_request(q
, rqd
, cmd
);
690 ret
= blk_rq_map_kern(q
, rq
, buf
, geo
->csecs
* rqd
->nr_ppas
,
696 rq
->end_io_data
= rqd
;
698 blk_execute_rq_nowait(q
, NULL
, rq
, 0, nvme_nvm_end_io
);
707 static void *nvme_nvm_create_dma_pool(struct nvm_dev
*nvmdev
, char *name
,
710 struct nvme_ns
*ns
= nvmdev
->q
->queuedata
;
712 return dma_pool_create(name
, ns
->ctrl
->dev
, size
, PAGE_SIZE
, 0);
715 static void nvme_nvm_destroy_dma_pool(void *pool
)
717 struct dma_pool
*dma_pool
= pool
;
719 dma_pool_destroy(dma_pool
);
722 static void *nvme_nvm_dev_dma_alloc(struct nvm_dev
*dev
, void *pool
,
723 gfp_t mem_flags
, dma_addr_t
*dma_handler
)
725 return dma_pool_alloc(pool
, mem_flags
, dma_handler
);
728 static void nvme_nvm_dev_dma_free(void *pool
, void *addr
,
729 dma_addr_t dma_handler
)
731 dma_pool_free(pool
, addr
, dma_handler
);
734 static struct nvm_dev_ops nvme_nvm_dev_ops
= {
735 .identity
= nvme_nvm_identity
,
737 .get_bb_tbl
= nvme_nvm_get_bb_tbl
,
738 .set_bb_tbl
= nvme_nvm_set_bb_tbl
,
740 .get_chk_meta
= nvme_nvm_get_chk_meta
,
742 .submit_io
= nvme_nvm_submit_io
,
744 .create_dma_pool
= nvme_nvm_create_dma_pool
,
745 .destroy_dma_pool
= nvme_nvm_destroy_dma_pool
,
746 .dev_dma_alloc
= nvme_nvm_dev_dma_alloc
,
747 .dev_dma_free
= nvme_nvm_dev_dma_free
,
750 static int nvme_nvm_submit_user_cmd(struct request_queue
*q
,
752 struct nvme_nvm_command
*vcmd
,
753 void __user
*ubuf
, unsigned int bufflen
,
754 void __user
*meta_buf
, unsigned int meta_len
,
755 void __user
*ppa_buf
, unsigned int ppa_len
,
756 u32
*result
, u64
*status
, unsigned int timeout
)
758 bool write
= nvme_is_write((struct nvme_command
*)vcmd
);
759 struct nvm_dev
*dev
= ns
->ndev
;
760 struct gendisk
*disk
= ns
->disk
;
762 struct bio
*bio
= NULL
;
763 __le64
*ppa_list
= NULL
;
765 __le64
*metadata
= NULL
;
766 dma_addr_t metadata_dma
;
767 DECLARE_COMPLETION_ONSTACK(wait
);
770 rq
= nvme_alloc_request(q
, (struct nvme_command
*)vcmd
, 0,
777 rq
->timeout
= timeout
? timeout
: ADMIN_TIMEOUT
;
779 if (ppa_buf
&& ppa_len
) {
780 ppa_list
= dma_pool_alloc(dev
->dma_pool
, GFP_KERNEL
, &ppa_dma
);
785 if (copy_from_user(ppa_list
, (void __user
*)ppa_buf
,
786 sizeof(u64
) * (ppa_len
+ 1))) {
790 vcmd
->ph_rw
.spba
= cpu_to_le64(ppa_dma
);
792 vcmd
->ph_rw
.spba
= cpu_to_le64((uintptr_t)ppa_buf
);
795 if (ubuf
&& bufflen
) {
796 ret
= blk_rq_map_user(q
, rq
, NULL
, ubuf
, bufflen
, GFP_KERNEL
);
801 if (meta_buf
&& meta_len
) {
802 metadata
= dma_pool_alloc(dev
->dma_pool
, GFP_KERNEL
,
810 if (copy_from_user(metadata
,
811 (void __user
*)meta_buf
,
817 vcmd
->ph_rw
.metadata
= cpu_to_le64(metadata_dma
);
823 blk_execute_rq(q
, NULL
, rq
, 0);
825 if (nvme_req(rq
)->flags
& NVME_REQ_CANCELLED
)
827 else if (nvme_req(rq
)->status
& 0x7ff)
830 *result
= nvme_req(rq
)->status
& 0x7ff;
832 *status
= le64_to_cpu(nvme_req(rq
)->result
.u64
);
834 if (metadata
&& !ret
&& !write
) {
835 if (copy_to_user(meta_buf
, (void *)metadata
, meta_len
))
839 if (meta_buf
&& meta_len
)
840 dma_pool_free(dev
->dma_pool
, metadata
, metadata_dma
);
843 blk_rq_unmap_user(bio
);
845 if (ppa_buf
&& ppa_len
)
846 dma_pool_free(dev
->dma_pool
, ppa_list
, ppa_dma
);
848 blk_mq_free_request(rq
);
853 static int nvme_nvm_submit_vio(struct nvme_ns
*ns
,
854 struct nvm_user_vio __user
*uvio
)
856 struct nvm_user_vio vio
;
857 struct nvme_nvm_command c
;
861 if (copy_from_user(&vio
, uvio
, sizeof(vio
)))
866 memset(&c
, 0, sizeof(c
));
867 c
.ph_rw
.opcode
= vio
.opcode
;
868 c
.ph_rw
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
869 c
.ph_rw
.control
= cpu_to_le16(vio
.control
);
870 c
.ph_rw
.length
= cpu_to_le16(vio
.nppas
);
872 length
= (vio
.nppas
+ 1) << ns
->lba_shift
;
874 ret
= nvme_nvm_submit_user_cmd(ns
->queue
, ns
, &c
,
875 (void __user
*)(uintptr_t)vio
.addr
, length
,
876 (void __user
*)(uintptr_t)vio
.metadata
,
878 (void __user
*)(uintptr_t)vio
.ppa_list
, vio
.nppas
,
879 &vio
.result
, &vio
.status
, 0);
881 if (ret
&& copy_to_user(uvio
, &vio
, sizeof(vio
)))
887 static int nvme_nvm_user_vcmd(struct nvme_ns
*ns
, int admin
,
888 struct nvm_passthru_vio __user
*uvcmd
)
890 struct nvm_passthru_vio vcmd
;
891 struct nvme_nvm_command c
;
892 struct request_queue
*q
;
893 unsigned int timeout
= 0;
896 if (copy_from_user(&vcmd
, uvcmd
, sizeof(vcmd
)))
898 if ((vcmd
.opcode
!= 0xF2) && (!capable(CAP_SYS_ADMIN
)))
903 memset(&c
, 0, sizeof(c
));
904 c
.common
.opcode
= vcmd
.opcode
;
905 c
.common
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
906 c
.common
.cdw2
[0] = cpu_to_le32(vcmd
.cdw2
);
907 c
.common
.cdw2
[1] = cpu_to_le32(vcmd
.cdw3
);
909 c
.ph_rw
.length
= cpu_to_le16(vcmd
.nppas
);
910 c
.ph_rw
.control
= cpu_to_le16(vcmd
.control
);
911 c
.common
.cdw13
= cpu_to_le32(vcmd
.cdw13
);
912 c
.common
.cdw14
= cpu_to_le32(vcmd
.cdw14
);
913 c
.common
.cdw15
= cpu_to_le32(vcmd
.cdw15
);
916 timeout
= msecs_to_jiffies(vcmd
.timeout_ms
);
918 q
= admin
? ns
->ctrl
->admin_q
: ns
->queue
;
920 ret
= nvme_nvm_submit_user_cmd(q
, ns
,
921 (struct nvme_nvm_command
*)&c
,
922 (void __user
*)(uintptr_t)vcmd
.addr
, vcmd
.data_len
,
923 (void __user
*)(uintptr_t)vcmd
.metadata
,
925 (void __user
*)(uintptr_t)vcmd
.ppa_list
, vcmd
.nppas
,
926 &vcmd
.result
, &vcmd
.status
, timeout
);
928 if (ret
&& copy_to_user(uvcmd
, &vcmd
, sizeof(vcmd
)))
934 int nvme_nvm_ioctl(struct nvme_ns
*ns
, unsigned int cmd
, unsigned long arg
)
937 case NVME_NVM_IOCTL_ADMIN_VIO
:
938 return nvme_nvm_user_vcmd(ns
, 1, (void __user
*)arg
);
939 case NVME_NVM_IOCTL_IO_VIO
:
940 return nvme_nvm_user_vcmd(ns
, 0, (void __user
*)arg
);
941 case NVME_NVM_IOCTL_SUBMIT_VIO
:
942 return nvme_nvm_submit_vio(ns
, (void __user
*)arg
);
948 int nvme_nvm_register(struct nvme_ns
*ns
, char *disk_name
, int node
)
950 struct request_queue
*q
= ns
->queue
;
954 _nvme_nvm_check_size();
956 dev
= nvm_alloc_dev(node
);
960 /* Note that csecs and sos will be overridden if it is a 1.2 drive. */
962 geo
->csecs
= 1 << ns
->lba_shift
;
965 geo
->mdts
= ns
->ctrl
->max_hw_sectors
;
968 memcpy(dev
->name
, disk_name
, DISK_NAME_LEN
);
969 dev
->ops
= &nvme_nvm_dev_ops
;
970 dev
->private_data
= ns
;
973 return nvm_register(dev
);
976 void nvme_nvm_unregister(struct nvme_ns
*ns
)
978 nvm_unregister(ns
->ndev
);
981 static ssize_t
nvm_dev_attr_show(struct device
*dev
,
982 struct device_attribute
*dattr
, char *page
)
984 struct nvme_ns
*ns
= nvme_get_ns_from_dev(dev
);
985 struct nvm_dev
*ndev
= ns
->ndev
;
986 struct nvm_geo
*geo
= &ndev
->geo
;
987 struct attribute
*attr
;
994 if (strcmp(attr
->name
, "version") == 0) {
995 if (geo
->major_ver_id
== 1)
996 return scnprintf(page
, PAGE_SIZE
, "%u\n",
999 return scnprintf(page
, PAGE_SIZE
, "%u.%u\n",
1002 } else if (strcmp(attr
->name
, "capabilities") == 0) {
1003 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->cap
);
1004 } else if (strcmp(attr
->name
, "read_typ") == 0) {
1005 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->trdt
);
1006 } else if (strcmp(attr
->name
, "read_max") == 0) {
1007 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->trdm
);
1009 return scnprintf(page
,
1011 "Unhandled attr(%s) in `%s`\n",
1012 attr
->name
, __func__
);
1016 static ssize_t
nvm_dev_attr_show_ppaf(struct nvm_addrf_12
*ppaf
, char *page
)
1018 return scnprintf(page
, PAGE_SIZE
,
1019 "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
1020 ppaf
->ch_offset
, ppaf
->ch_len
,
1021 ppaf
->lun_offset
, ppaf
->lun_len
,
1022 ppaf
->pln_offset
, ppaf
->pln_len
,
1023 ppaf
->blk_offset
, ppaf
->blk_len
,
1024 ppaf
->pg_offset
, ppaf
->pg_len
,
1025 ppaf
->sec_offset
, ppaf
->sec_len
);
1028 static ssize_t
nvm_dev_attr_show_12(struct device
*dev
,
1029 struct device_attribute
*dattr
, char *page
)
1031 struct nvme_ns
*ns
= nvme_get_ns_from_dev(dev
);
1032 struct nvm_dev
*ndev
= ns
->ndev
;
1033 struct nvm_geo
*geo
= &ndev
->geo
;
1034 struct attribute
*attr
;
1039 attr
= &dattr
->attr
;
1041 if (strcmp(attr
->name
, "vendor_opcode") == 0) {
1042 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->vmnt
);
1043 } else if (strcmp(attr
->name
, "device_mode") == 0) {
1044 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->dom
);
1045 /* kept for compatibility */
1046 } else if (strcmp(attr
->name
, "media_manager") == 0) {
1047 return scnprintf(page
, PAGE_SIZE
, "%s\n", "gennvm");
1048 } else if (strcmp(attr
->name
, "ppa_format") == 0) {
1049 return nvm_dev_attr_show_ppaf((void *)&geo
->addrf
, page
);
1050 } else if (strcmp(attr
->name
, "media_type") == 0) { /* u8 */
1051 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->mtype
);
1052 } else if (strcmp(attr
->name
, "flash_media_type") == 0) {
1053 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->fmtype
);
1054 } else if (strcmp(attr
->name
, "num_channels") == 0) {
1055 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_ch
);
1056 } else if (strcmp(attr
->name
, "num_luns") == 0) {
1057 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_lun
);
1058 } else if (strcmp(attr
->name
, "num_planes") == 0) {
1059 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_pln
);
1060 } else if (strcmp(attr
->name
, "num_blocks") == 0) { /* u16 */
1061 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_chk
);
1062 } else if (strcmp(attr
->name
, "num_pages") == 0) {
1063 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_pg
);
1064 } else if (strcmp(attr
->name
, "page_size") == 0) {
1065 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->fpg_sz
);
1066 } else if (strcmp(attr
->name
, "hw_sector_size") == 0) {
1067 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->csecs
);
1068 } else if (strcmp(attr
->name
, "oob_sector_size") == 0) {/* u32 */
1069 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->sos
);
1070 } else if (strcmp(attr
->name
, "prog_typ") == 0) {
1071 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tprt
);
1072 } else if (strcmp(attr
->name
, "prog_max") == 0) {
1073 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tprm
);
1074 } else if (strcmp(attr
->name
, "erase_typ") == 0) {
1075 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tbet
);
1076 } else if (strcmp(attr
->name
, "erase_max") == 0) {
1077 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tbem
);
1078 } else if (strcmp(attr
->name
, "multiplane_modes") == 0) {
1079 return scnprintf(page
, PAGE_SIZE
, "0x%08x\n", geo
->mpos
);
1080 } else if (strcmp(attr
->name
, "media_capabilities") == 0) {
1081 return scnprintf(page
, PAGE_SIZE
, "0x%08x\n", geo
->mccap
);
1082 } else if (strcmp(attr
->name
, "max_phys_secs") == 0) {
1083 return scnprintf(page
, PAGE_SIZE
, "%u\n", NVM_MAX_VLBA
);
1085 return scnprintf(page
, PAGE_SIZE
,
1086 "Unhandled attr(%s) in `%s`\n",
1087 attr
->name
, __func__
);
1091 static ssize_t
nvm_dev_attr_show_20(struct device
*dev
,
1092 struct device_attribute
*dattr
, char *page
)
1094 struct nvme_ns
*ns
= nvme_get_ns_from_dev(dev
);
1095 struct nvm_dev
*ndev
= ns
->ndev
;
1096 struct nvm_geo
*geo
= &ndev
->geo
;
1097 struct attribute
*attr
;
1102 attr
= &dattr
->attr
;
1104 if (strcmp(attr
->name
, "groups") == 0) {
1105 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_ch
);
1106 } else if (strcmp(attr
->name
, "punits") == 0) {
1107 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_lun
);
1108 } else if (strcmp(attr
->name
, "chunks") == 0) {
1109 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_chk
);
1110 } else if (strcmp(attr
->name
, "clba") == 0) {
1111 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->clba
);
1112 } else if (strcmp(attr
->name
, "ws_min") == 0) {
1113 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->ws_min
);
1114 } else if (strcmp(attr
->name
, "ws_opt") == 0) {
1115 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->ws_opt
);
1116 } else if (strcmp(attr
->name
, "maxoc") == 0) {
1117 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->maxoc
);
1118 } else if (strcmp(attr
->name
, "maxocpu") == 0) {
1119 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->maxocpu
);
1120 } else if (strcmp(attr
->name
, "mw_cunits") == 0) {
1121 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->mw_cunits
);
1122 } else if (strcmp(attr
->name
, "write_typ") == 0) {
1123 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tprt
);
1124 } else if (strcmp(attr
->name
, "write_max") == 0) {
1125 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tprm
);
1126 } else if (strcmp(attr
->name
, "reset_typ") == 0) {
1127 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tbet
);
1128 } else if (strcmp(attr
->name
, "reset_max") == 0) {
1129 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tbem
);
1131 return scnprintf(page
, PAGE_SIZE
,
1132 "Unhandled attr(%s) in `%s`\n",
1133 attr
->name
, __func__
);
1137 #define NVM_DEV_ATTR_RO(_name) \
1138 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL)
1139 #define NVM_DEV_ATTR_12_RO(_name) \
1140 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL)
1141 #define NVM_DEV_ATTR_20_RO(_name) \
1142 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL)
1144 /* general attributes */
1145 static NVM_DEV_ATTR_RO(version
);
1146 static NVM_DEV_ATTR_RO(capabilities
);
1148 static NVM_DEV_ATTR_RO(read_typ
);
1149 static NVM_DEV_ATTR_RO(read_max
);
1152 static NVM_DEV_ATTR_12_RO(vendor_opcode
);
1153 static NVM_DEV_ATTR_12_RO(device_mode
);
1154 static NVM_DEV_ATTR_12_RO(ppa_format
);
1155 static NVM_DEV_ATTR_12_RO(media_manager
);
1156 static NVM_DEV_ATTR_12_RO(media_type
);
1157 static NVM_DEV_ATTR_12_RO(flash_media_type
);
1158 static NVM_DEV_ATTR_12_RO(num_channels
);
1159 static NVM_DEV_ATTR_12_RO(num_luns
);
1160 static NVM_DEV_ATTR_12_RO(num_planes
);
1161 static NVM_DEV_ATTR_12_RO(num_blocks
);
1162 static NVM_DEV_ATTR_12_RO(num_pages
);
1163 static NVM_DEV_ATTR_12_RO(page_size
);
1164 static NVM_DEV_ATTR_12_RO(hw_sector_size
);
1165 static NVM_DEV_ATTR_12_RO(oob_sector_size
);
1166 static NVM_DEV_ATTR_12_RO(prog_typ
);
1167 static NVM_DEV_ATTR_12_RO(prog_max
);
1168 static NVM_DEV_ATTR_12_RO(erase_typ
);
1169 static NVM_DEV_ATTR_12_RO(erase_max
);
1170 static NVM_DEV_ATTR_12_RO(multiplane_modes
);
1171 static NVM_DEV_ATTR_12_RO(media_capabilities
);
1172 static NVM_DEV_ATTR_12_RO(max_phys_secs
);
1175 static NVM_DEV_ATTR_20_RO(groups
);
1176 static NVM_DEV_ATTR_20_RO(punits
);
1177 static NVM_DEV_ATTR_20_RO(chunks
);
1178 static NVM_DEV_ATTR_20_RO(clba
);
1179 static NVM_DEV_ATTR_20_RO(ws_min
);
1180 static NVM_DEV_ATTR_20_RO(ws_opt
);
1181 static NVM_DEV_ATTR_20_RO(maxoc
);
1182 static NVM_DEV_ATTR_20_RO(maxocpu
);
1183 static NVM_DEV_ATTR_20_RO(mw_cunits
);
1184 static NVM_DEV_ATTR_20_RO(write_typ
);
1185 static NVM_DEV_ATTR_20_RO(write_max
);
1186 static NVM_DEV_ATTR_20_RO(reset_typ
);
1187 static NVM_DEV_ATTR_20_RO(reset_max
);
1189 static struct attribute
*nvm_dev_attrs
[] = {
1190 /* version agnostic attrs */
1191 &dev_attr_version
.attr
,
1192 &dev_attr_capabilities
.attr
,
1193 &dev_attr_read_typ
.attr
,
1194 &dev_attr_read_max
.attr
,
1197 &dev_attr_vendor_opcode
.attr
,
1198 &dev_attr_device_mode
.attr
,
1199 &dev_attr_media_manager
.attr
,
1200 &dev_attr_ppa_format
.attr
,
1201 &dev_attr_media_type
.attr
,
1202 &dev_attr_flash_media_type
.attr
,
1203 &dev_attr_num_channels
.attr
,
1204 &dev_attr_num_luns
.attr
,
1205 &dev_attr_num_planes
.attr
,
1206 &dev_attr_num_blocks
.attr
,
1207 &dev_attr_num_pages
.attr
,
1208 &dev_attr_page_size
.attr
,
1209 &dev_attr_hw_sector_size
.attr
,
1210 &dev_attr_oob_sector_size
.attr
,
1211 &dev_attr_prog_typ
.attr
,
1212 &dev_attr_prog_max
.attr
,
1213 &dev_attr_erase_typ
.attr
,
1214 &dev_attr_erase_max
.attr
,
1215 &dev_attr_multiplane_modes
.attr
,
1216 &dev_attr_media_capabilities
.attr
,
1217 &dev_attr_max_phys_secs
.attr
,
1220 &dev_attr_groups
.attr
,
1221 &dev_attr_punits
.attr
,
1222 &dev_attr_chunks
.attr
,
1223 &dev_attr_clba
.attr
,
1224 &dev_attr_ws_min
.attr
,
1225 &dev_attr_ws_opt
.attr
,
1226 &dev_attr_maxoc
.attr
,
1227 &dev_attr_maxocpu
.attr
,
1228 &dev_attr_mw_cunits
.attr
,
1230 &dev_attr_write_typ
.attr
,
1231 &dev_attr_write_max
.attr
,
1232 &dev_attr_reset_typ
.attr
,
1233 &dev_attr_reset_max
.attr
,
1238 static umode_t
nvm_dev_attrs_visible(struct kobject
*kobj
,
1239 struct attribute
*attr
, int index
)
1241 struct device
*dev
= container_of(kobj
, struct device
, kobj
);
1242 struct gendisk
*disk
= dev_to_disk(dev
);
1243 struct nvme_ns
*ns
= disk
->private_data
;
1244 struct nvm_dev
*ndev
= ns
->ndev
;
1245 struct device_attribute
*dev_attr
=
1246 container_of(attr
, typeof(*dev_attr
), attr
);
1251 if (dev_attr
->show
== nvm_dev_attr_show
)
1254 switch (ndev
->geo
.major_ver_id
) {
1256 if (dev_attr
->show
== nvm_dev_attr_show_12
)
1260 if (dev_attr
->show
== nvm_dev_attr_show_20
)
1268 const struct attribute_group nvme_nvm_attr_group
= {
1270 .attrs
= nvm_dev_attrs
,
1271 .is_visible
= nvm_dev_attrs_visible
,