2 * nvme-lightnvm.c - LightNVM NVMe device
4 * Copyright (C) 2014-2015 IT University of Copenhagen
5 * Initial release: Matias Bjorling <mb@lightnvm.io>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
25 #include <linux/nvme.h>
26 #include <linux/bitops.h>
27 #include <linux/lightnvm.h>
28 #include <linux/vmalloc.h>
29 #include <linux/sched/sysctl.h>
30 #include <uapi/linux/lightnvm.h>
32 enum nvme_nvm_admin_opcode
{
33 nvme_nvm_admin_identity
= 0xe2,
34 nvme_nvm_admin_get_bb_tbl
= 0xf2,
35 nvme_nvm_admin_set_bb_tbl
= 0xf1,
38 enum nvme_nvm_log_page
{
39 NVME_NVM_LOG_REPORT_CHUNK
= 0xca,
42 struct nvme_nvm_ph_rw
{
58 struct nvme_nvm_erase_blk
{
73 struct nvme_nvm_identity
{
84 struct nvme_nvm_getbbtbl
{
96 struct nvme_nvm_setbbtbl
{
111 struct nvme_nvm_command
{
113 struct nvme_common_command common
;
114 struct nvme_nvm_ph_rw ph_rw
;
115 struct nvme_nvm_erase_blk erase
;
116 struct nvme_nvm_identity identity
;
117 struct nvme_nvm_getbbtbl get_bb
;
118 struct nvme_nvm_setbbtbl set_bb
;
122 struct nvme_nvm_id12_grp
{
148 struct nvme_nvm_id12_addrf
{
164 struct nvme_nvm_id12
{
171 struct nvme_nvm_id12_addrf ppaf
;
173 struct nvme_nvm_id12_grp grp
;
177 struct nvme_nvm_bb_tbl
{
191 struct nvme_nvm_id20_addrf
{
199 struct nvme_nvm_id20
{
204 struct nvme_nvm_id20_addrf lbaf
;
219 /* Write data requirements */
227 /* Performance related metrics */
239 /* Vendor specific */
243 struct nvme_nvm_chk_meta
{
254 * Check we didn't inadvertently grow the command struct
256 static inline void _nvme_nvm_check_size(void)
258 BUILD_BUG_ON(sizeof(struct nvme_nvm_identity
) != 64);
259 BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw
) != 64);
260 BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk
) != 64);
261 BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl
) != 64);
262 BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl
) != 64);
263 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp
) != 960);
264 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf
) != 16);
265 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12
) != NVME_IDENTIFY_DATA_SIZE
);
266 BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl
) != 64);
267 BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf
) != 8);
268 BUILD_BUG_ON(sizeof(struct nvme_nvm_id20
) != NVME_IDENTIFY_DATA_SIZE
);
269 BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta
) != 32);
270 BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta
) !=
271 sizeof(struct nvm_chk_meta
));
274 static void nvme_nvm_set_addr_12(struct nvm_addrf_12
*dst
,
275 struct nvme_nvm_id12_addrf
*src
)
277 dst
->ch_len
= src
->ch_len
;
278 dst
->lun_len
= src
->lun_len
;
279 dst
->blk_len
= src
->blk_len
;
280 dst
->pg_len
= src
->pg_len
;
281 dst
->pln_len
= src
->pln_len
;
282 dst
->sec_len
= src
->sec_len
;
284 dst
->ch_offset
= src
->ch_offset
;
285 dst
->lun_offset
= src
->lun_offset
;
286 dst
->blk_offset
= src
->blk_offset
;
287 dst
->pg_offset
= src
->pg_offset
;
288 dst
->pln_offset
= src
->pln_offset
;
289 dst
->sec_offset
= src
->sec_offset
;
291 dst
->ch_mask
= ((1ULL << dst
->ch_len
) - 1) << dst
->ch_offset
;
292 dst
->lun_mask
= ((1ULL << dst
->lun_len
) - 1) << dst
->lun_offset
;
293 dst
->blk_mask
= ((1ULL << dst
->blk_len
) - 1) << dst
->blk_offset
;
294 dst
->pg_mask
= ((1ULL << dst
->pg_len
) - 1) << dst
->pg_offset
;
295 dst
->pln_mask
= ((1ULL << dst
->pln_len
) - 1) << dst
->pln_offset
;
296 dst
->sec_mask
= ((1ULL << dst
->sec_len
) - 1) << dst
->sec_offset
;
299 static int nvme_nvm_setup_12(struct nvme_nvm_id12
*id
,
302 struct nvme_nvm_id12_grp
*src
;
303 int sec_per_pg
, sec_per_pl
, pg_per_blk
;
310 if (src
->mtype
!= 0) {
311 pr_err("nvm: memory type not supported\n");
315 /* 1.2 spec. only reports a single version id - unfold */
316 geo
->major_ver_id
= id
->ver_id
;
317 geo
->minor_ver_id
= 2;
319 /* Set compacted version for upper layers */
320 geo
->version
= NVM_OCSSD_SPEC_12
;
322 geo
->num_ch
= src
->num_ch
;
323 geo
->num_lun
= src
->num_lun
;
324 geo
->all_luns
= geo
->num_ch
* geo
->num_lun
;
326 geo
->num_chk
= le16_to_cpu(src
->num_chk
);
328 geo
->csecs
= le16_to_cpu(src
->csecs
);
329 geo
->sos
= le16_to_cpu(src
->sos
);
331 pg_per_blk
= le16_to_cpu(src
->num_pg
);
332 sec_per_pg
= le16_to_cpu(src
->fpg_sz
) / geo
->csecs
;
333 sec_per_pl
= sec_per_pg
* src
->num_pln
;
334 geo
->clba
= sec_per_pl
* pg_per_blk
;
336 geo
->all_chunks
= geo
->all_luns
* geo
->num_chk
;
337 geo
->total_secs
= geo
->clba
* geo
->all_chunks
;
339 geo
->ws_min
= sec_per_pg
;
340 geo
->ws_opt
= sec_per_pg
;
341 geo
->mw_cunits
= geo
->ws_opt
<< 3; /* default to MLC safe values */
343 /* Do not impose values for maximum number of open blocks as it is
344 * unspecified in 1.2. Users of 1.2 must be aware of this and eventually
345 * specify these values through a quirk if restrictions apply.
347 geo
->maxoc
= geo
->all_luns
* geo
->num_chk
;
348 geo
->maxocpu
= geo
->num_chk
;
350 geo
->mccap
= le32_to_cpu(src
->mccap
);
352 geo
->trdt
= le32_to_cpu(src
->trdt
);
353 geo
->trdm
= le32_to_cpu(src
->trdm
);
354 geo
->tprt
= le32_to_cpu(src
->tprt
);
355 geo
->tprm
= le32_to_cpu(src
->tprm
);
356 geo
->tbet
= le32_to_cpu(src
->tbet
);
357 geo
->tbem
= le32_to_cpu(src
->tbem
);
359 /* 1.2 compatibility */
360 geo
->vmnt
= id
->vmnt
;
361 geo
->cap
= le32_to_cpu(id
->cap
);
362 geo
->dom
= le32_to_cpu(id
->dom
);
364 geo
->mtype
= src
->mtype
;
365 geo
->fmtype
= src
->fmtype
;
367 geo
->cpar
= le16_to_cpu(src
->cpar
);
368 geo
->mpos
= le32_to_cpu(src
->mpos
);
370 geo
->pln_mode
= NVM_PLANE_SINGLE
;
372 if (geo
->mpos
& 0x020202) {
373 geo
->pln_mode
= NVM_PLANE_DOUBLE
;
375 } else if (geo
->mpos
& 0x040404) {
376 geo
->pln_mode
= NVM_PLANE_QUAD
;
380 geo
->num_pln
= src
->num_pln
;
381 geo
->num_pg
= le16_to_cpu(src
->num_pg
);
382 geo
->fpg_sz
= le16_to_cpu(src
->fpg_sz
);
384 nvme_nvm_set_addr_12((struct nvm_addrf_12
*)&geo
->addrf
, &id
->ppaf
);
389 static void nvme_nvm_set_addr_20(struct nvm_addrf
*dst
,
390 struct nvme_nvm_id20_addrf
*src
)
392 dst
->ch_len
= src
->grp_len
;
393 dst
->lun_len
= src
->pu_len
;
394 dst
->chk_len
= src
->chk_len
;
395 dst
->sec_len
= src
->lba_len
;
398 dst
->chk_offset
= dst
->sec_len
;
399 dst
->lun_offset
= dst
->chk_offset
+ dst
->chk_len
;
400 dst
->ch_offset
= dst
->lun_offset
+ dst
->lun_len
;
402 dst
->ch_mask
= ((1ULL << dst
->ch_len
) - 1) << dst
->ch_offset
;
403 dst
->lun_mask
= ((1ULL << dst
->lun_len
) - 1) << dst
->lun_offset
;
404 dst
->chk_mask
= ((1ULL << dst
->chk_len
) - 1) << dst
->chk_offset
;
405 dst
->sec_mask
= ((1ULL << dst
->sec_len
) - 1) << dst
->sec_offset
;
408 static int nvme_nvm_setup_20(struct nvme_nvm_id20
*id
,
411 geo
->major_ver_id
= id
->mjr
;
412 geo
->minor_ver_id
= id
->mnr
;
414 /* Set compacted version for upper layers */
415 geo
->version
= NVM_OCSSD_SPEC_20
;
417 geo
->num_ch
= le16_to_cpu(id
->num_grp
);
418 geo
->num_lun
= le16_to_cpu(id
->num_pu
);
419 geo
->all_luns
= geo
->num_ch
* geo
->num_lun
;
421 geo
->num_chk
= le32_to_cpu(id
->num_chk
);
422 geo
->clba
= le32_to_cpu(id
->clba
);
424 geo
->all_chunks
= geo
->all_luns
* geo
->num_chk
;
425 geo
->total_secs
= geo
->clba
* geo
->all_chunks
;
427 geo
->ws_min
= le32_to_cpu(id
->ws_min
);
428 geo
->ws_opt
= le32_to_cpu(id
->ws_opt
);
429 geo
->mw_cunits
= le32_to_cpu(id
->mw_cunits
);
430 geo
->maxoc
= le32_to_cpu(id
->maxoc
);
431 geo
->maxocpu
= le32_to_cpu(id
->maxocpu
);
433 geo
->trdt
= le32_to_cpu(id
->trdt
);
434 geo
->trdm
= le32_to_cpu(id
->trdm
);
435 geo
->tprt
= le32_to_cpu(id
->twrt
);
436 geo
->tprm
= le32_to_cpu(id
->twrm
);
437 geo
->tbet
= le32_to_cpu(id
->tcrst
);
438 geo
->tbem
= le32_to_cpu(id
->tcrsm
);
440 nvme_nvm_set_addr_20(&geo
->addrf
, &id
->lbaf
);
445 static int nvme_nvm_identity(struct nvm_dev
*nvmdev
)
447 struct nvme_ns
*ns
= nvmdev
->q
->queuedata
;
448 struct nvme_nvm_id12
*id
;
449 struct nvme_nvm_command c
= {};
452 c
.identity
.opcode
= nvme_nvm_admin_identity
;
453 c
.identity
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
455 id
= kmalloc(sizeof(struct nvme_nvm_id12
), GFP_KERNEL
);
459 ret
= nvme_submit_sync_cmd(ns
->ctrl
->admin_q
, (struct nvme_command
*)&c
,
460 id
, sizeof(struct nvme_nvm_id12
));
467 * The 1.2 and 2.0 specifications share the first byte in their geometry
468 * command to make it possible to know what version a device implements.
470 switch (id
->ver_id
) {
472 ret
= nvme_nvm_setup_12(id
, &nvmdev
->geo
);
475 ret
= nvme_nvm_setup_20((struct nvme_nvm_id20
*)id
,
479 dev_err(ns
->ctrl
->device
, "OCSSD revision not supported (%d)\n",
489 static int nvme_nvm_get_bb_tbl(struct nvm_dev
*nvmdev
, struct ppa_addr ppa
,
492 struct request_queue
*q
= nvmdev
->q
;
493 struct nvm_geo
*geo
= &nvmdev
->geo
;
494 struct nvme_ns
*ns
= q
->queuedata
;
495 struct nvme_ctrl
*ctrl
= ns
->ctrl
;
496 struct nvme_nvm_command c
= {};
497 struct nvme_nvm_bb_tbl
*bb_tbl
;
498 int nr_blks
= geo
->num_chk
* geo
->num_pln
;
499 int tblsz
= sizeof(struct nvme_nvm_bb_tbl
) + nr_blks
;
502 c
.get_bb
.opcode
= nvme_nvm_admin_get_bb_tbl
;
503 c
.get_bb
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
504 c
.get_bb
.spba
= cpu_to_le64(ppa
.ppa
);
506 bb_tbl
= kzalloc(tblsz
, GFP_KERNEL
);
510 ret
= nvme_submit_sync_cmd(ctrl
->admin_q
, (struct nvme_command
*)&c
,
513 dev_err(ctrl
->device
, "get bad block table failed (%d)\n", ret
);
518 if (bb_tbl
->tblid
[0] != 'B' || bb_tbl
->tblid
[1] != 'B' ||
519 bb_tbl
->tblid
[2] != 'L' || bb_tbl
->tblid
[3] != 'T') {
520 dev_err(ctrl
->device
, "bbt format mismatch\n");
525 if (le16_to_cpu(bb_tbl
->verid
) != 1) {
527 dev_err(ctrl
->device
, "bbt version not supported\n");
531 if (le32_to_cpu(bb_tbl
->tblks
) != nr_blks
) {
533 dev_err(ctrl
->device
,
534 "bbt unsuspected blocks returned (%u!=%u)",
535 le32_to_cpu(bb_tbl
->tblks
), nr_blks
);
539 memcpy(blks
, bb_tbl
->blk
, geo
->num_chk
* geo
->num_pln
);
545 static int nvme_nvm_set_bb_tbl(struct nvm_dev
*nvmdev
, struct ppa_addr
*ppas
,
546 int nr_ppas
, int type
)
548 struct nvme_ns
*ns
= nvmdev
->q
->queuedata
;
549 struct nvme_nvm_command c
= {};
552 c
.set_bb
.opcode
= nvme_nvm_admin_set_bb_tbl
;
553 c
.set_bb
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
554 c
.set_bb
.spba
= cpu_to_le64(ppas
->ppa
);
555 c
.set_bb
.nlb
= cpu_to_le16(nr_ppas
- 1);
556 c
.set_bb
.value
= type
;
558 ret
= nvme_submit_sync_cmd(ns
->ctrl
->admin_q
, (struct nvme_command
*)&c
,
561 dev_err(ns
->ctrl
->device
, "set bad block table failed (%d)\n",
567 * Expect the lba in device format
569 static int nvme_nvm_get_chk_meta(struct nvm_dev
*ndev
,
570 sector_t slba
, int nchks
,
571 struct nvm_chk_meta
*meta
)
573 struct nvm_geo
*geo
= &ndev
->geo
;
574 struct nvme_ns
*ns
= ndev
->q
->queuedata
;
575 struct nvme_ctrl
*ctrl
= ns
->ctrl
;
576 struct nvme_nvm_chk_meta
*dev_meta
, *dev_meta_off
;
578 size_t left
= nchks
* sizeof(struct nvme_nvm_chk_meta
);
579 size_t log_pos
, offset
, len
;
583 * limit requests to maximum 256K to avoid issuing arbitrary large
584 * requests when the device does not specific a maximum transfer size.
586 max_len
= min_t(unsigned int, ctrl
->max_hw_sectors
<< 9, 256 * 1024);
588 dev_meta
= kmalloc(max_len
, GFP_KERNEL
);
592 /* Normalize lba address space to obtain log offset */
594 ppa
= dev_to_generic_addr(ndev
, ppa
);
597 log_pos
+= ppa
.m
.pu
* geo
->num_chk
;
598 log_pos
+= ppa
.m
.grp
* geo
->num_lun
* geo
->num_chk
;
600 offset
= log_pos
* sizeof(struct nvme_nvm_chk_meta
);
603 len
= min_t(unsigned int, left
, max_len
);
605 memset(dev_meta
, 0, max_len
);
606 dev_meta_off
= dev_meta
;
608 ret
= nvme_get_log(ctrl
, ns
->head
->ns_id
,
609 NVME_NVM_LOG_REPORT_CHUNK
, 0, dev_meta
, len
,
612 dev_err(ctrl
->device
, "Get REPORT CHUNK log error\n");
616 for (i
= 0; i
< len
; i
+= sizeof(struct nvme_nvm_chk_meta
)) {
617 meta
->state
= dev_meta_off
->state
;
618 meta
->type
= dev_meta_off
->type
;
619 meta
->wi
= dev_meta_off
->wi
;
620 meta
->slba
= le64_to_cpu(dev_meta_off
->slba
);
621 meta
->cnlb
= le64_to_cpu(dev_meta_off
->cnlb
);
622 meta
->wp
= le64_to_cpu(dev_meta_off
->wp
);
637 static inline void nvme_nvm_rqtocmd(struct nvm_rq
*rqd
, struct nvme_ns
*ns
,
638 struct nvme_nvm_command
*c
)
640 c
->ph_rw
.opcode
= rqd
->opcode
;
641 c
->ph_rw
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
642 c
->ph_rw
.spba
= cpu_to_le64(rqd
->ppa_addr
.ppa
);
643 c
->ph_rw
.metadata
= cpu_to_le64(rqd
->dma_meta_list
);
644 c
->ph_rw
.control
= cpu_to_le16(rqd
->flags
);
645 c
->ph_rw
.length
= cpu_to_le16(rqd
->nr_ppas
- 1);
648 static void nvme_nvm_end_io(struct request
*rq
, blk_status_t status
)
650 struct nvm_rq
*rqd
= rq
->end_io_data
;
652 rqd
->ppa_status
= le64_to_cpu(nvme_req(rq
)->result
.u64
);
653 rqd
->error
= nvme_req(rq
)->status
;
656 kfree(nvme_req(rq
)->cmd
);
657 blk_mq_free_request(rq
);
660 static struct request
*nvme_nvm_alloc_request(struct request_queue
*q
,
662 struct nvme_nvm_command
*cmd
)
664 struct nvme_ns
*ns
= q
->queuedata
;
667 nvme_nvm_rqtocmd(rqd
, ns
, cmd
);
669 rq
= nvme_alloc_request(q
, (struct nvme_command
*)cmd
, 0, NVME_QID_ANY
);
673 rq
->cmd_flags
&= ~REQ_FAILFAST_DRIVER
;
676 blk_init_request_from_bio(rq
, rqd
->bio
);
678 rq
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, IOPRIO_NORM
);
683 static int nvme_nvm_submit_io(struct nvm_dev
*dev
, struct nvm_rq
*rqd
)
685 struct request_queue
*q
= dev
->q
;
686 struct nvme_nvm_command
*cmd
;
689 cmd
= kzalloc(sizeof(struct nvme_nvm_command
), GFP_KERNEL
);
693 rq
= nvme_nvm_alloc_request(q
, rqd
, cmd
);
699 rq
->end_io_data
= rqd
;
701 blk_execute_rq_nowait(q
, NULL
, rq
, 0, nvme_nvm_end_io
);
706 static int nvme_nvm_submit_io_sync(struct nvm_dev
*dev
, struct nvm_rq
*rqd
)
708 struct request_queue
*q
= dev
->q
;
710 struct nvme_nvm_command cmd
;
713 memset(&cmd
, 0, sizeof(struct nvme_nvm_command
));
715 rq
= nvme_nvm_alloc_request(q
, rqd
, &cmd
);
719 /* I/Os can fail and the error is signaled through rqd. Callers must
720 * handle the error accordingly.
722 blk_execute_rq(q
, NULL
, rq
, 0);
723 if (nvme_req(rq
)->flags
& NVME_REQ_CANCELLED
)
726 rqd
->ppa_status
= le64_to_cpu(nvme_req(rq
)->result
.u64
);
727 rqd
->error
= nvme_req(rq
)->status
;
729 blk_mq_free_request(rq
);
734 static void *nvme_nvm_create_dma_pool(struct nvm_dev
*nvmdev
, char *name
)
736 struct nvme_ns
*ns
= nvmdev
->q
->queuedata
;
738 return dma_pool_create(name
, ns
->ctrl
->dev
, PAGE_SIZE
, PAGE_SIZE
, 0);
741 static void nvme_nvm_destroy_dma_pool(void *pool
)
743 struct dma_pool
*dma_pool
= pool
;
745 dma_pool_destroy(dma_pool
);
748 static void *nvme_nvm_dev_dma_alloc(struct nvm_dev
*dev
, void *pool
,
749 gfp_t mem_flags
, dma_addr_t
*dma_handler
)
751 return dma_pool_alloc(pool
, mem_flags
, dma_handler
);
754 static void nvme_nvm_dev_dma_free(void *pool
, void *addr
,
755 dma_addr_t dma_handler
)
757 dma_pool_free(pool
, addr
, dma_handler
);
760 static struct nvm_dev_ops nvme_nvm_dev_ops
= {
761 .identity
= nvme_nvm_identity
,
763 .get_bb_tbl
= nvme_nvm_get_bb_tbl
,
764 .set_bb_tbl
= nvme_nvm_set_bb_tbl
,
766 .get_chk_meta
= nvme_nvm_get_chk_meta
,
768 .submit_io
= nvme_nvm_submit_io
,
769 .submit_io_sync
= nvme_nvm_submit_io_sync
,
771 .create_dma_pool
= nvme_nvm_create_dma_pool
,
772 .destroy_dma_pool
= nvme_nvm_destroy_dma_pool
,
773 .dev_dma_alloc
= nvme_nvm_dev_dma_alloc
,
774 .dev_dma_free
= nvme_nvm_dev_dma_free
,
777 static int nvme_nvm_submit_user_cmd(struct request_queue
*q
,
779 struct nvme_nvm_command
*vcmd
,
780 void __user
*ubuf
, unsigned int bufflen
,
781 void __user
*meta_buf
, unsigned int meta_len
,
782 void __user
*ppa_buf
, unsigned int ppa_len
,
783 u32
*result
, u64
*status
, unsigned int timeout
)
785 bool write
= nvme_is_write((struct nvme_command
*)vcmd
);
786 struct nvm_dev
*dev
= ns
->ndev
;
787 struct gendisk
*disk
= ns
->disk
;
789 struct bio
*bio
= NULL
;
790 __le64
*ppa_list
= NULL
;
792 __le64
*metadata
= NULL
;
793 dma_addr_t metadata_dma
;
794 DECLARE_COMPLETION_ONSTACK(wait
);
797 rq
= nvme_alloc_request(q
, (struct nvme_command
*)vcmd
, 0,
804 rq
->timeout
= timeout
? timeout
: ADMIN_TIMEOUT
;
806 if (ppa_buf
&& ppa_len
) {
807 ppa_list
= dma_pool_alloc(dev
->dma_pool
, GFP_KERNEL
, &ppa_dma
);
812 if (copy_from_user(ppa_list
, (void __user
*)ppa_buf
,
813 sizeof(u64
) * (ppa_len
+ 1))) {
817 vcmd
->ph_rw
.spba
= cpu_to_le64(ppa_dma
);
819 vcmd
->ph_rw
.spba
= cpu_to_le64((uintptr_t)ppa_buf
);
822 if (ubuf
&& bufflen
) {
823 ret
= blk_rq_map_user(q
, rq
, NULL
, ubuf
, bufflen
, GFP_KERNEL
);
828 if (meta_buf
&& meta_len
) {
829 metadata
= dma_pool_alloc(dev
->dma_pool
, GFP_KERNEL
,
837 if (copy_from_user(metadata
,
838 (void __user
*)meta_buf
,
844 vcmd
->ph_rw
.metadata
= cpu_to_le64(metadata_dma
);
850 blk_execute_rq(q
, NULL
, rq
, 0);
852 if (nvme_req(rq
)->flags
& NVME_REQ_CANCELLED
)
854 else if (nvme_req(rq
)->status
& 0x7ff)
857 *result
= nvme_req(rq
)->status
& 0x7ff;
859 *status
= le64_to_cpu(nvme_req(rq
)->result
.u64
);
861 if (metadata
&& !ret
&& !write
) {
862 if (copy_to_user(meta_buf
, (void *)metadata
, meta_len
))
866 if (meta_buf
&& meta_len
)
867 dma_pool_free(dev
->dma_pool
, metadata
, metadata_dma
);
870 blk_rq_unmap_user(bio
);
872 if (ppa_buf
&& ppa_len
)
873 dma_pool_free(dev
->dma_pool
, ppa_list
, ppa_dma
);
875 blk_mq_free_request(rq
);
880 static int nvme_nvm_submit_vio(struct nvme_ns
*ns
,
881 struct nvm_user_vio __user
*uvio
)
883 struct nvm_user_vio vio
;
884 struct nvme_nvm_command c
;
888 if (copy_from_user(&vio
, uvio
, sizeof(vio
)))
893 memset(&c
, 0, sizeof(c
));
894 c
.ph_rw
.opcode
= vio
.opcode
;
895 c
.ph_rw
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
896 c
.ph_rw
.control
= cpu_to_le16(vio
.control
);
897 c
.ph_rw
.length
= cpu_to_le16(vio
.nppas
);
899 length
= (vio
.nppas
+ 1) << ns
->lba_shift
;
901 ret
= nvme_nvm_submit_user_cmd(ns
->queue
, ns
, &c
,
902 (void __user
*)(uintptr_t)vio
.addr
, length
,
903 (void __user
*)(uintptr_t)vio
.metadata
,
905 (void __user
*)(uintptr_t)vio
.ppa_list
, vio
.nppas
,
906 &vio
.result
, &vio
.status
, 0);
908 if (ret
&& copy_to_user(uvio
, &vio
, sizeof(vio
)))
914 static int nvme_nvm_user_vcmd(struct nvme_ns
*ns
, int admin
,
915 struct nvm_passthru_vio __user
*uvcmd
)
917 struct nvm_passthru_vio vcmd
;
918 struct nvme_nvm_command c
;
919 struct request_queue
*q
;
920 unsigned int timeout
= 0;
923 if (copy_from_user(&vcmd
, uvcmd
, sizeof(vcmd
)))
925 if ((vcmd
.opcode
!= 0xF2) && (!capable(CAP_SYS_ADMIN
)))
930 memset(&c
, 0, sizeof(c
));
931 c
.common
.opcode
= vcmd
.opcode
;
932 c
.common
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
933 c
.common
.cdw2
[0] = cpu_to_le32(vcmd
.cdw2
);
934 c
.common
.cdw2
[1] = cpu_to_le32(vcmd
.cdw3
);
936 c
.ph_rw
.length
= cpu_to_le16(vcmd
.nppas
);
937 c
.ph_rw
.control
= cpu_to_le16(vcmd
.control
);
938 c
.common
.cdw10
[3] = cpu_to_le32(vcmd
.cdw13
);
939 c
.common
.cdw10
[4] = cpu_to_le32(vcmd
.cdw14
);
940 c
.common
.cdw10
[5] = cpu_to_le32(vcmd
.cdw15
);
943 timeout
= msecs_to_jiffies(vcmd
.timeout_ms
);
945 q
= admin
? ns
->ctrl
->admin_q
: ns
->queue
;
947 ret
= nvme_nvm_submit_user_cmd(q
, ns
,
948 (struct nvme_nvm_command
*)&c
,
949 (void __user
*)(uintptr_t)vcmd
.addr
, vcmd
.data_len
,
950 (void __user
*)(uintptr_t)vcmd
.metadata
,
952 (void __user
*)(uintptr_t)vcmd
.ppa_list
, vcmd
.nppas
,
953 &vcmd
.result
, &vcmd
.status
, timeout
);
955 if (ret
&& copy_to_user(uvcmd
, &vcmd
, sizeof(vcmd
)))
961 int nvme_nvm_ioctl(struct nvme_ns
*ns
, unsigned int cmd
, unsigned long arg
)
964 case NVME_NVM_IOCTL_ADMIN_VIO
:
965 return nvme_nvm_user_vcmd(ns
, 1, (void __user
*)arg
);
966 case NVME_NVM_IOCTL_IO_VIO
:
967 return nvme_nvm_user_vcmd(ns
, 0, (void __user
*)arg
);
968 case NVME_NVM_IOCTL_SUBMIT_VIO
:
969 return nvme_nvm_submit_vio(ns
, (void __user
*)arg
);
975 void nvme_nvm_update_nvm_info(struct nvme_ns
*ns
)
977 struct nvm_dev
*ndev
= ns
->ndev
;
978 struct nvm_geo
*geo
= &ndev
->geo
;
980 if (geo
->version
== NVM_OCSSD_SPEC_12
)
983 geo
->csecs
= 1 << ns
->lba_shift
;
987 int nvme_nvm_register(struct nvme_ns
*ns
, char *disk_name
, int node
)
989 struct request_queue
*q
= ns
->queue
;
992 _nvme_nvm_check_size();
994 dev
= nvm_alloc_dev(node
);
999 memcpy(dev
->name
, disk_name
, DISK_NAME_LEN
);
1000 dev
->ops
= &nvme_nvm_dev_ops
;
1001 dev
->private_data
= ns
;
1004 return nvm_register(dev
);
1007 void nvme_nvm_unregister(struct nvme_ns
*ns
)
1009 nvm_unregister(ns
->ndev
);
1012 static ssize_t
nvm_dev_attr_show(struct device
*dev
,
1013 struct device_attribute
*dattr
, char *page
)
1015 struct nvme_ns
*ns
= nvme_get_ns_from_dev(dev
);
1016 struct nvm_dev
*ndev
= ns
->ndev
;
1017 struct nvm_geo
*geo
= &ndev
->geo
;
1018 struct attribute
*attr
;
1023 attr
= &dattr
->attr
;
1025 if (strcmp(attr
->name
, "version") == 0) {
1026 if (geo
->major_ver_id
== 1)
1027 return scnprintf(page
, PAGE_SIZE
, "%u\n",
1030 return scnprintf(page
, PAGE_SIZE
, "%u.%u\n",
1033 } else if (strcmp(attr
->name
, "capabilities") == 0) {
1034 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->cap
);
1035 } else if (strcmp(attr
->name
, "read_typ") == 0) {
1036 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->trdt
);
1037 } else if (strcmp(attr
->name
, "read_max") == 0) {
1038 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->trdm
);
1040 return scnprintf(page
,
1042 "Unhandled attr(%s) in `%s`\n",
1043 attr
->name
, __func__
);
1047 static ssize_t
nvm_dev_attr_show_ppaf(struct nvm_addrf_12
*ppaf
, char *page
)
1049 return scnprintf(page
, PAGE_SIZE
,
1050 "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
1051 ppaf
->ch_offset
, ppaf
->ch_len
,
1052 ppaf
->lun_offset
, ppaf
->lun_len
,
1053 ppaf
->pln_offset
, ppaf
->pln_len
,
1054 ppaf
->blk_offset
, ppaf
->blk_len
,
1055 ppaf
->pg_offset
, ppaf
->pg_len
,
1056 ppaf
->sec_offset
, ppaf
->sec_len
);
1059 static ssize_t
nvm_dev_attr_show_12(struct device
*dev
,
1060 struct device_attribute
*dattr
, char *page
)
1062 struct nvme_ns
*ns
= nvme_get_ns_from_dev(dev
);
1063 struct nvm_dev
*ndev
= ns
->ndev
;
1064 struct nvm_geo
*geo
= &ndev
->geo
;
1065 struct attribute
*attr
;
1070 attr
= &dattr
->attr
;
1072 if (strcmp(attr
->name
, "vendor_opcode") == 0) {
1073 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->vmnt
);
1074 } else if (strcmp(attr
->name
, "device_mode") == 0) {
1075 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->dom
);
1076 /* kept for compatibility */
1077 } else if (strcmp(attr
->name
, "media_manager") == 0) {
1078 return scnprintf(page
, PAGE_SIZE
, "%s\n", "gennvm");
1079 } else if (strcmp(attr
->name
, "ppa_format") == 0) {
1080 return nvm_dev_attr_show_ppaf((void *)&geo
->addrf
, page
);
1081 } else if (strcmp(attr
->name
, "media_type") == 0) { /* u8 */
1082 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->mtype
);
1083 } else if (strcmp(attr
->name
, "flash_media_type") == 0) {
1084 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->fmtype
);
1085 } else if (strcmp(attr
->name
, "num_channels") == 0) {
1086 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_ch
);
1087 } else if (strcmp(attr
->name
, "num_luns") == 0) {
1088 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_lun
);
1089 } else if (strcmp(attr
->name
, "num_planes") == 0) {
1090 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_pln
);
1091 } else if (strcmp(attr
->name
, "num_blocks") == 0) { /* u16 */
1092 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_chk
);
1093 } else if (strcmp(attr
->name
, "num_pages") == 0) {
1094 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_pg
);
1095 } else if (strcmp(attr
->name
, "page_size") == 0) {
1096 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->fpg_sz
);
1097 } else if (strcmp(attr
->name
, "hw_sector_size") == 0) {
1098 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->csecs
);
1099 } else if (strcmp(attr
->name
, "oob_sector_size") == 0) {/* u32 */
1100 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->sos
);
1101 } else if (strcmp(attr
->name
, "prog_typ") == 0) {
1102 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tprt
);
1103 } else if (strcmp(attr
->name
, "prog_max") == 0) {
1104 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tprm
);
1105 } else if (strcmp(attr
->name
, "erase_typ") == 0) {
1106 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tbet
);
1107 } else if (strcmp(attr
->name
, "erase_max") == 0) {
1108 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tbem
);
1109 } else if (strcmp(attr
->name
, "multiplane_modes") == 0) {
1110 return scnprintf(page
, PAGE_SIZE
, "0x%08x\n", geo
->mpos
);
1111 } else if (strcmp(attr
->name
, "media_capabilities") == 0) {
1112 return scnprintf(page
, PAGE_SIZE
, "0x%08x\n", geo
->mccap
);
1113 } else if (strcmp(attr
->name
, "max_phys_secs") == 0) {
1114 return scnprintf(page
, PAGE_SIZE
, "%u\n", NVM_MAX_VLBA
);
1116 return scnprintf(page
, PAGE_SIZE
,
1117 "Unhandled attr(%s) in `%s`\n",
1118 attr
->name
, __func__
);
1122 static ssize_t
nvm_dev_attr_show_20(struct device
*dev
,
1123 struct device_attribute
*dattr
, char *page
)
1125 struct nvme_ns
*ns
= nvme_get_ns_from_dev(dev
);
1126 struct nvm_dev
*ndev
= ns
->ndev
;
1127 struct nvm_geo
*geo
= &ndev
->geo
;
1128 struct attribute
*attr
;
1133 attr
= &dattr
->attr
;
1135 if (strcmp(attr
->name
, "groups") == 0) {
1136 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_ch
);
1137 } else if (strcmp(attr
->name
, "punits") == 0) {
1138 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_lun
);
1139 } else if (strcmp(attr
->name
, "chunks") == 0) {
1140 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_chk
);
1141 } else if (strcmp(attr
->name
, "clba") == 0) {
1142 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->clba
);
1143 } else if (strcmp(attr
->name
, "ws_min") == 0) {
1144 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->ws_min
);
1145 } else if (strcmp(attr
->name
, "ws_opt") == 0) {
1146 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->ws_opt
);
1147 } else if (strcmp(attr
->name
, "maxoc") == 0) {
1148 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->maxoc
);
1149 } else if (strcmp(attr
->name
, "maxocpu") == 0) {
1150 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->maxocpu
);
1151 } else if (strcmp(attr
->name
, "mw_cunits") == 0) {
1152 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->mw_cunits
);
1153 } else if (strcmp(attr
->name
, "write_typ") == 0) {
1154 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tprt
);
1155 } else if (strcmp(attr
->name
, "write_max") == 0) {
1156 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tprm
);
1157 } else if (strcmp(attr
->name
, "reset_typ") == 0) {
1158 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tbet
);
1159 } else if (strcmp(attr
->name
, "reset_max") == 0) {
1160 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tbem
);
1162 return scnprintf(page
, PAGE_SIZE
,
1163 "Unhandled attr(%s) in `%s`\n",
1164 attr
->name
, __func__
);
1168 #define NVM_DEV_ATTR_RO(_name) \
1169 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL)
1170 #define NVM_DEV_ATTR_12_RO(_name) \
1171 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL)
1172 #define NVM_DEV_ATTR_20_RO(_name) \
1173 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL)
1175 /* general attributes */
1176 static NVM_DEV_ATTR_RO(version
);
1177 static NVM_DEV_ATTR_RO(capabilities
);
1179 static NVM_DEV_ATTR_RO(read_typ
);
1180 static NVM_DEV_ATTR_RO(read_max
);
1183 static NVM_DEV_ATTR_12_RO(vendor_opcode
);
1184 static NVM_DEV_ATTR_12_RO(device_mode
);
1185 static NVM_DEV_ATTR_12_RO(ppa_format
);
1186 static NVM_DEV_ATTR_12_RO(media_manager
);
1187 static NVM_DEV_ATTR_12_RO(media_type
);
1188 static NVM_DEV_ATTR_12_RO(flash_media_type
);
1189 static NVM_DEV_ATTR_12_RO(num_channels
);
1190 static NVM_DEV_ATTR_12_RO(num_luns
);
1191 static NVM_DEV_ATTR_12_RO(num_planes
);
1192 static NVM_DEV_ATTR_12_RO(num_blocks
);
1193 static NVM_DEV_ATTR_12_RO(num_pages
);
1194 static NVM_DEV_ATTR_12_RO(page_size
);
1195 static NVM_DEV_ATTR_12_RO(hw_sector_size
);
1196 static NVM_DEV_ATTR_12_RO(oob_sector_size
);
1197 static NVM_DEV_ATTR_12_RO(prog_typ
);
1198 static NVM_DEV_ATTR_12_RO(prog_max
);
1199 static NVM_DEV_ATTR_12_RO(erase_typ
);
1200 static NVM_DEV_ATTR_12_RO(erase_max
);
1201 static NVM_DEV_ATTR_12_RO(multiplane_modes
);
1202 static NVM_DEV_ATTR_12_RO(media_capabilities
);
1203 static NVM_DEV_ATTR_12_RO(max_phys_secs
);
1206 static NVM_DEV_ATTR_20_RO(groups
);
1207 static NVM_DEV_ATTR_20_RO(punits
);
1208 static NVM_DEV_ATTR_20_RO(chunks
);
1209 static NVM_DEV_ATTR_20_RO(clba
);
1210 static NVM_DEV_ATTR_20_RO(ws_min
);
1211 static NVM_DEV_ATTR_20_RO(ws_opt
);
1212 static NVM_DEV_ATTR_20_RO(maxoc
);
1213 static NVM_DEV_ATTR_20_RO(maxocpu
);
1214 static NVM_DEV_ATTR_20_RO(mw_cunits
);
1215 static NVM_DEV_ATTR_20_RO(write_typ
);
1216 static NVM_DEV_ATTR_20_RO(write_max
);
1217 static NVM_DEV_ATTR_20_RO(reset_typ
);
1218 static NVM_DEV_ATTR_20_RO(reset_max
);
1220 static struct attribute
*nvm_dev_attrs
[] = {
1221 /* version agnostic attrs */
1222 &dev_attr_version
.attr
,
1223 &dev_attr_capabilities
.attr
,
1224 &dev_attr_read_typ
.attr
,
1225 &dev_attr_read_max
.attr
,
1228 &dev_attr_vendor_opcode
.attr
,
1229 &dev_attr_device_mode
.attr
,
1230 &dev_attr_media_manager
.attr
,
1231 &dev_attr_ppa_format
.attr
,
1232 &dev_attr_media_type
.attr
,
1233 &dev_attr_flash_media_type
.attr
,
1234 &dev_attr_num_channels
.attr
,
1235 &dev_attr_num_luns
.attr
,
1236 &dev_attr_num_planes
.attr
,
1237 &dev_attr_num_blocks
.attr
,
1238 &dev_attr_num_pages
.attr
,
1239 &dev_attr_page_size
.attr
,
1240 &dev_attr_hw_sector_size
.attr
,
1241 &dev_attr_oob_sector_size
.attr
,
1242 &dev_attr_prog_typ
.attr
,
1243 &dev_attr_prog_max
.attr
,
1244 &dev_attr_erase_typ
.attr
,
1245 &dev_attr_erase_max
.attr
,
1246 &dev_attr_multiplane_modes
.attr
,
1247 &dev_attr_media_capabilities
.attr
,
1248 &dev_attr_max_phys_secs
.attr
,
1251 &dev_attr_groups
.attr
,
1252 &dev_attr_punits
.attr
,
1253 &dev_attr_chunks
.attr
,
1254 &dev_attr_clba
.attr
,
1255 &dev_attr_ws_min
.attr
,
1256 &dev_attr_ws_opt
.attr
,
1257 &dev_attr_maxoc
.attr
,
1258 &dev_attr_maxocpu
.attr
,
1259 &dev_attr_mw_cunits
.attr
,
1261 &dev_attr_write_typ
.attr
,
1262 &dev_attr_write_max
.attr
,
1263 &dev_attr_reset_typ
.attr
,
1264 &dev_attr_reset_max
.attr
,
1269 static umode_t
nvm_dev_attrs_visible(struct kobject
*kobj
,
1270 struct attribute
*attr
, int index
)
1272 struct device
*dev
= container_of(kobj
, struct device
, kobj
);
1273 struct gendisk
*disk
= dev_to_disk(dev
);
1274 struct nvme_ns
*ns
= disk
->private_data
;
1275 struct nvm_dev
*ndev
= ns
->ndev
;
1276 struct device_attribute
*dev_attr
=
1277 container_of(attr
, typeof(*dev_attr
), attr
);
1282 if (dev_attr
->show
== nvm_dev_attr_show
)
1285 switch (ndev
->geo
.major_ver_id
) {
1287 if (dev_attr
->show
== nvm_dev_attr_show_12
)
1291 if (dev_attr
->show
== nvm_dev_attr_show_20
)
1299 const struct attribute_group nvme_nvm_attr_group
= {
1301 .attrs
= nvm_dev_attrs
,
1302 .is_visible
= nvm_dev_attrs_visible
,