2 * nvme-lightnvm.c - LightNVM NVMe device
4 * Copyright (C) 2014-2015 IT University of Copenhagen
5 * Initial release: Matias Bjorling <mb@lightnvm.io>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
25 #include <linux/nvme.h>
26 #include <linux/bitops.h>
27 #include <linux/lightnvm.h>
28 #include <linux/vmalloc.h>
29 #include <linux/sched/sysctl.h>
30 #include <uapi/linux/lightnvm.h>
32 enum nvme_nvm_admin_opcode
{
33 nvme_nvm_admin_identity
= 0xe2,
34 nvme_nvm_admin_get_bb_tbl
= 0xf2,
35 nvme_nvm_admin_set_bb_tbl
= 0xf1,
38 enum nvme_nvm_log_page
{
39 NVME_NVM_LOG_REPORT_CHUNK
= 0xca,
42 struct nvme_nvm_ph_rw
{
58 struct nvme_nvm_erase_blk
{
73 struct nvme_nvm_identity
{
84 struct nvme_nvm_getbbtbl
{
96 struct nvme_nvm_setbbtbl
{
111 struct nvme_nvm_command
{
113 struct nvme_common_command common
;
114 struct nvme_nvm_ph_rw ph_rw
;
115 struct nvme_nvm_erase_blk erase
;
116 struct nvme_nvm_identity identity
;
117 struct nvme_nvm_getbbtbl get_bb
;
118 struct nvme_nvm_setbbtbl set_bb
;
122 struct nvme_nvm_id12_grp
{
148 struct nvme_nvm_id12_addrf
{
164 struct nvme_nvm_id12
{
171 struct nvme_nvm_id12_addrf ppaf
;
173 struct nvme_nvm_id12_grp grp
;
177 struct nvme_nvm_bb_tbl
{
191 struct nvme_nvm_id20_addrf
{
199 struct nvme_nvm_id20
{
204 struct nvme_nvm_id20_addrf lbaf
;
219 /* Write data requirements */
227 /* Performance related metrics */
239 /* Vendor specific */
243 struct nvme_nvm_chk_meta
{
254 * Check we didn't inadvertently grow the command struct
256 static inline void _nvme_nvm_check_size(void)
258 BUILD_BUG_ON(sizeof(struct nvme_nvm_identity
) != 64);
259 BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw
) != 64);
260 BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk
) != 64);
261 BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl
) != 64);
262 BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl
) != 64);
263 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp
) != 960);
264 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf
) != 16);
265 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12
) != NVME_IDENTIFY_DATA_SIZE
);
266 BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl
) != 64);
267 BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf
) != 8);
268 BUILD_BUG_ON(sizeof(struct nvme_nvm_id20
) != NVME_IDENTIFY_DATA_SIZE
);
269 BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta
) != 32);
270 BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta
) !=
271 sizeof(struct nvm_chk_meta
));
274 static void nvme_nvm_set_addr_12(struct nvm_addrf_12
*dst
,
275 struct nvme_nvm_id12_addrf
*src
)
277 dst
->ch_len
= src
->ch_len
;
278 dst
->lun_len
= src
->lun_len
;
279 dst
->blk_len
= src
->blk_len
;
280 dst
->pg_len
= src
->pg_len
;
281 dst
->pln_len
= src
->pln_len
;
282 dst
->sec_len
= src
->sec_len
;
284 dst
->ch_offset
= src
->ch_offset
;
285 dst
->lun_offset
= src
->lun_offset
;
286 dst
->blk_offset
= src
->blk_offset
;
287 dst
->pg_offset
= src
->pg_offset
;
288 dst
->pln_offset
= src
->pln_offset
;
289 dst
->sec_offset
= src
->sec_offset
;
291 dst
->ch_mask
= ((1ULL << dst
->ch_len
) - 1) << dst
->ch_offset
;
292 dst
->lun_mask
= ((1ULL << dst
->lun_len
) - 1) << dst
->lun_offset
;
293 dst
->blk_mask
= ((1ULL << dst
->blk_len
) - 1) << dst
->blk_offset
;
294 dst
->pg_mask
= ((1ULL << dst
->pg_len
) - 1) << dst
->pg_offset
;
295 dst
->pln_mask
= ((1ULL << dst
->pln_len
) - 1) << dst
->pln_offset
;
296 dst
->sec_mask
= ((1ULL << dst
->sec_len
) - 1) << dst
->sec_offset
;
299 static int nvme_nvm_setup_12(struct nvme_nvm_id12
*id
,
302 struct nvme_nvm_id12_grp
*src
;
303 int sec_per_pg
, sec_per_pl
, pg_per_blk
;
310 if (src
->mtype
!= 0) {
311 pr_err("nvm: memory type not supported\n");
315 /* 1.2 spec. only reports a single version id - unfold */
316 geo
->major_ver_id
= id
->ver_id
;
317 geo
->minor_ver_id
= 2;
319 /* Set compacted version for upper layers */
320 geo
->version
= NVM_OCSSD_SPEC_12
;
322 geo
->num_ch
= src
->num_ch
;
323 geo
->num_lun
= src
->num_lun
;
324 geo
->all_luns
= geo
->num_ch
* geo
->num_lun
;
326 geo
->num_chk
= le16_to_cpu(src
->num_chk
);
328 geo
->csecs
= le16_to_cpu(src
->csecs
);
329 geo
->sos
= le16_to_cpu(src
->sos
);
331 pg_per_blk
= le16_to_cpu(src
->num_pg
);
332 sec_per_pg
= le16_to_cpu(src
->fpg_sz
) / geo
->csecs
;
333 sec_per_pl
= sec_per_pg
* src
->num_pln
;
334 geo
->clba
= sec_per_pl
* pg_per_blk
;
336 geo
->all_chunks
= geo
->all_luns
* geo
->num_chk
;
337 geo
->total_secs
= geo
->clba
* geo
->all_chunks
;
339 geo
->ws_min
= sec_per_pg
;
340 geo
->ws_opt
= sec_per_pg
;
341 geo
->mw_cunits
= geo
->ws_opt
<< 3; /* default to MLC safe values */
343 /* Do not impose values for maximum number of open blocks as it is
344 * unspecified in 1.2. Users of 1.2 must be aware of this and eventually
345 * specify these values through a quirk if restrictions apply.
347 geo
->maxoc
= geo
->all_luns
* geo
->num_chk
;
348 geo
->maxocpu
= geo
->num_chk
;
350 geo
->mccap
= le32_to_cpu(src
->mccap
);
352 geo
->trdt
= le32_to_cpu(src
->trdt
);
353 geo
->trdm
= le32_to_cpu(src
->trdm
);
354 geo
->tprt
= le32_to_cpu(src
->tprt
);
355 geo
->tprm
= le32_to_cpu(src
->tprm
);
356 geo
->tbet
= le32_to_cpu(src
->tbet
);
357 geo
->tbem
= le32_to_cpu(src
->tbem
);
359 /* 1.2 compatibility */
360 geo
->vmnt
= id
->vmnt
;
361 geo
->cap
= le32_to_cpu(id
->cap
);
362 geo
->dom
= le32_to_cpu(id
->dom
);
364 geo
->mtype
= src
->mtype
;
365 geo
->fmtype
= src
->fmtype
;
367 geo
->cpar
= le16_to_cpu(src
->cpar
);
368 geo
->mpos
= le32_to_cpu(src
->mpos
);
370 geo
->pln_mode
= NVM_PLANE_SINGLE
;
372 if (geo
->mpos
& 0x020202) {
373 geo
->pln_mode
= NVM_PLANE_DOUBLE
;
375 } else if (geo
->mpos
& 0x040404) {
376 geo
->pln_mode
= NVM_PLANE_QUAD
;
380 geo
->num_pln
= src
->num_pln
;
381 geo
->num_pg
= le16_to_cpu(src
->num_pg
);
382 geo
->fpg_sz
= le16_to_cpu(src
->fpg_sz
);
384 nvme_nvm_set_addr_12((struct nvm_addrf_12
*)&geo
->addrf
, &id
->ppaf
);
389 static void nvme_nvm_set_addr_20(struct nvm_addrf
*dst
,
390 struct nvme_nvm_id20_addrf
*src
)
392 dst
->ch_len
= src
->grp_len
;
393 dst
->lun_len
= src
->pu_len
;
394 dst
->chk_len
= src
->chk_len
;
395 dst
->sec_len
= src
->lba_len
;
398 dst
->chk_offset
= dst
->sec_len
;
399 dst
->lun_offset
= dst
->chk_offset
+ dst
->chk_len
;
400 dst
->ch_offset
= dst
->lun_offset
+ dst
->lun_len
;
402 dst
->ch_mask
= ((1ULL << dst
->ch_len
) - 1) << dst
->ch_offset
;
403 dst
->lun_mask
= ((1ULL << dst
->lun_len
) - 1) << dst
->lun_offset
;
404 dst
->chk_mask
= ((1ULL << dst
->chk_len
) - 1) << dst
->chk_offset
;
405 dst
->sec_mask
= ((1ULL << dst
->sec_len
) - 1) << dst
->sec_offset
;
408 static int nvme_nvm_setup_20(struct nvme_nvm_id20
*id
,
411 geo
->major_ver_id
= id
->mjr
;
412 geo
->minor_ver_id
= id
->mnr
;
414 /* Set compacted version for upper layers */
415 geo
->version
= NVM_OCSSD_SPEC_20
;
417 if (!(geo
->major_ver_id
== 2 && geo
->minor_ver_id
== 0)) {
418 pr_err("nvm: OCSSD version not supported (v%d.%d)\n",
419 geo
->major_ver_id
, geo
->minor_ver_id
);
423 geo
->num_ch
= le16_to_cpu(id
->num_grp
);
424 geo
->num_lun
= le16_to_cpu(id
->num_pu
);
425 geo
->all_luns
= geo
->num_ch
* geo
->num_lun
;
427 geo
->num_chk
= le32_to_cpu(id
->num_chk
);
428 geo
->clba
= le32_to_cpu(id
->clba
);
430 geo
->all_chunks
= geo
->all_luns
* geo
->num_chk
;
431 geo
->total_secs
= geo
->clba
* geo
->all_chunks
;
433 geo
->ws_min
= le32_to_cpu(id
->ws_min
);
434 geo
->ws_opt
= le32_to_cpu(id
->ws_opt
);
435 geo
->mw_cunits
= le32_to_cpu(id
->mw_cunits
);
436 geo
->maxoc
= le32_to_cpu(id
->maxoc
);
437 geo
->maxocpu
= le32_to_cpu(id
->maxocpu
);
439 geo
->trdt
= le32_to_cpu(id
->trdt
);
440 geo
->trdm
= le32_to_cpu(id
->trdm
);
441 geo
->tprt
= le32_to_cpu(id
->twrt
);
442 geo
->tprm
= le32_to_cpu(id
->twrm
);
443 geo
->tbet
= le32_to_cpu(id
->tcrst
);
444 geo
->tbem
= le32_to_cpu(id
->tcrsm
);
446 nvme_nvm_set_addr_20(&geo
->addrf
, &id
->lbaf
);
451 static int nvme_nvm_identity(struct nvm_dev
*nvmdev
)
453 struct nvme_ns
*ns
= nvmdev
->q
->queuedata
;
454 struct nvme_nvm_id12
*id
;
455 struct nvme_nvm_command c
= {};
458 c
.identity
.opcode
= nvme_nvm_admin_identity
;
459 c
.identity
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
461 id
= kmalloc(sizeof(struct nvme_nvm_id12
), GFP_KERNEL
);
465 ret
= nvme_submit_sync_cmd(ns
->ctrl
->admin_q
, (struct nvme_command
*)&c
,
466 id
, sizeof(struct nvme_nvm_id12
));
473 * The 1.2 and 2.0 specifications share the first byte in their geometry
474 * command to make it possible to know what version a device implements.
476 switch (id
->ver_id
) {
478 ret
= nvme_nvm_setup_12(id
, &nvmdev
->geo
);
481 ret
= nvme_nvm_setup_20((struct nvme_nvm_id20
*)id
,
485 dev_err(ns
->ctrl
->device
, "OCSSD revision not supported (%d)\n",
495 static int nvme_nvm_get_bb_tbl(struct nvm_dev
*nvmdev
, struct ppa_addr ppa
,
498 struct request_queue
*q
= nvmdev
->q
;
499 struct nvm_geo
*geo
= &nvmdev
->geo
;
500 struct nvme_ns
*ns
= q
->queuedata
;
501 struct nvme_ctrl
*ctrl
= ns
->ctrl
;
502 struct nvme_nvm_command c
= {};
503 struct nvme_nvm_bb_tbl
*bb_tbl
;
504 int nr_blks
= geo
->num_chk
* geo
->num_pln
;
505 int tblsz
= sizeof(struct nvme_nvm_bb_tbl
) + nr_blks
;
508 c
.get_bb
.opcode
= nvme_nvm_admin_get_bb_tbl
;
509 c
.get_bb
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
510 c
.get_bb
.spba
= cpu_to_le64(ppa
.ppa
);
512 bb_tbl
= kzalloc(tblsz
, GFP_KERNEL
);
516 ret
= nvme_submit_sync_cmd(ctrl
->admin_q
, (struct nvme_command
*)&c
,
519 dev_err(ctrl
->device
, "get bad block table failed (%d)\n", ret
);
524 if (bb_tbl
->tblid
[0] != 'B' || bb_tbl
->tblid
[1] != 'B' ||
525 bb_tbl
->tblid
[2] != 'L' || bb_tbl
->tblid
[3] != 'T') {
526 dev_err(ctrl
->device
, "bbt format mismatch\n");
531 if (le16_to_cpu(bb_tbl
->verid
) != 1) {
533 dev_err(ctrl
->device
, "bbt version not supported\n");
537 if (le32_to_cpu(bb_tbl
->tblks
) != nr_blks
) {
539 dev_err(ctrl
->device
,
540 "bbt unsuspected blocks returned (%u!=%u)",
541 le32_to_cpu(bb_tbl
->tblks
), nr_blks
);
545 memcpy(blks
, bb_tbl
->blk
, geo
->num_chk
* geo
->num_pln
);
551 static int nvme_nvm_set_bb_tbl(struct nvm_dev
*nvmdev
, struct ppa_addr
*ppas
,
552 int nr_ppas
, int type
)
554 struct nvme_ns
*ns
= nvmdev
->q
->queuedata
;
555 struct nvme_nvm_command c
= {};
558 c
.set_bb
.opcode
= nvme_nvm_admin_set_bb_tbl
;
559 c
.set_bb
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
560 c
.set_bb
.spba
= cpu_to_le64(ppas
->ppa
);
561 c
.set_bb
.nlb
= cpu_to_le16(nr_ppas
- 1);
562 c
.set_bb
.value
= type
;
564 ret
= nvme_submit_sync_cmd(ns
->ctrl
->admin_q
, (struct nvme_command
*)&c
,
567 dev_err(ns
->ctrl
->device
, "set bad block table failed (%d)\n",
573 * Expect the lba in device format
575 static int nvme_nvm_get_chk_meta(struct nvm_dev
*ndev
,
576 struct nvm_chk_meta
*meta
,
577 sector_t slba
, int nchks
)
579 struct nvm_geo
*geo
= &ndev
->geo
;
580 struct nvme_ns
*ns
= ndev
->q
->queuedata
;
581 struct nvme_ctrl
*ctrl
= ns
->ctrl
;
582 struct nvme_nvm_chk_meta
*dev_meta
= (struct nvme_nvm_chk_meta
*)meta
;
584 size_t left
= nchks
* sizeof(struct nvme_nvm_chk_meta
);
585 size_t log_pos
, offset
, len
;
588 /* Normalize lba address space to obtain log offset */
590 ppa
= dev_to_generic_addr(ndev
, ppa
);
593 log_pos
+= ppa
.m
.pu
* geo
->num_chk
;
594 log_pos
+= ppa
.m
.grp
* geo
->num_lun
* geo
->num_chk
;
596 offset
= log_pos
* sizeof(struct nvme_nvm_chk_meta
);
599 len
= min_t(unsigned int, left
, ctrl
->max_hw_sectors
<< 9);
601 ret
= nvme_get_log_ext(ctrl
, ns
, NVME_NVM_LOG_REPORT_CHUNK
,
602 dev_meta
, len
, offset
);
604 dev_err(ctrl
->device
, "Get REPORT CHUNK log error\n");
608 for (i
= 0; i
< len
; i
+= sizeof(struct nvme_nvm_chk_meta
)) {
609 meta
->state
= dev_meta
->state
;
610 meta
->type
= dev_meta
->type
;
611 meta
->wi
= dev_meta
->wi
;
612 meta
->slba
= le64_to_cpu(dev_meta
->slba
);
613 meta
->cnlb
= le64_to_cpu(dev_meta
->cnlb
);
614 meta
->wp
= le64_to_cpu(dev_meta
->wp
);
627 static inline void nvme_nvm_rqtocmd(struct nvm_rq
*rqd
, struct nvme_ns
*ns
,
628 struct nvme_nvm_command
*c
)
630 c
->ph_rw
.opcode
= rqd
->opcode
;
631 c
->ph_rw
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
632 c
->ph_rw
.spba
= cpu_to_le64(rqd
->ppa_addr
.ppa
);
633 c
->ph_rw
.metadata
= cpu_to_le64(rqd
->dma_meta_list
);
634 c
->ph_rw
.control
= cpu_to_le16(rqd
->flags
);
635 c
->ph_rw
.length
= cpu_to_le16(rqd
->nr_ppas
- 1);
638 static void nvme_nvm_end_io(struct request
*rq
, blk_status_t status
)
640 struct nvm_rq
*rqd
= rq
->end_io_data
;
642 rqd
->ppa_status
= le64_to_cpu(nvme_req(rq
)->result
.u64
);
643 rqd
->error
= nvme_req(rq
)->status
;
646 kfree(nvme_req(rq
)->cmd
);
647 blk_mq_free_request(rq
);
650 static struct request
*nvme_nvm_alloc_request(struct request_queue
*q
,
652 struct nvme_nvm_command
*cmd
)
654 struct nvme_ns
*ns
= q
->queuedata
;
657 nvme_nvm_rqtocmd(rqd
, ns
, cmd
);
659 rq
= nvme_alloc_request(q
, (struct nvme_command
*)cmd
, 0, NVME_QID_ANY
);
663 rq
->cmd_flags
&= ~REQ_FAILFAST_DRIVER
;
666 blk_init_request_from_bio(rq
, rqd
->bio
);
668 rq
->ioprio
= IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE
, IOPRIO_NORM
);
675 static int nvme_nvm_submit_io(struct nvm_dev
*dev
, struct nvm_rq
*rqd
)
677 struct request_queue
*q
= dev
->q
;
678 struct nvme_nvm_command
*cmd
;
681 cmd
= kzalloc(sizeof(struct nvme_nvm_command
), GFP_KERNEL
);
685 rq
= nvme_nvm_alloc_request(q
, rqd
, cmd
);
691 rq
->end_io_data
= rqd
;
693 blk_execute_rq_nowait(q
, NULL
, rq
, 0, nvme_nvm_end_io
);
698 static int nvme_nvm_submit_io_sync(struct nvm_dev
*dev
, struct nvm_rq
*rqd
)
700 struct request_queue
*q
= dev
->q
;
702 struct nvme_nvm_command cmd
;
705 memset(&cmd
, 0, sizeof(struct nvme_nvm_command
));
707 rq
= nvme_nvm_alloc_request(q
, rqd
, &cmd
);
711 /* I/Os can fail and the error is signaled through rqd. Callers must
712 * handle the error accordingly.
714 blk_execute_rq(q
, NULL
, rq
, 0);
715 if (nvme_req(rq
)->flags
& NVME_REQ_CANCELLED
)
718 rqd
->ppa_status
= le64_to_cpu(nvme_req(rq
)->result
.u64
);
719 rqd
->error
= nvme_req(rq
)->status
;
721 blk_mq_free_request(rq
);
726 static void *nvme_nvm_create_dma_pool(struct nvm_dev
*nvmdev
, char *name
)
728 struct nvme_ns
*ns
= nvmdev
->q
->queuedata
;
730 return dma_pool_create(name
, ns
->ctrl
->dev
, PAGE_SIZE
, PAGE_SIZE
, 0);
733 static void nvme_nvm_destroy_dma_pool(void *pool
)
735 struct dma_pool
*dma_pool
= pool
;
737 dma_pool_destroy(dma_pool
);
740 static void *nvme_nvm_dev_dma_alloc(struct nvm_dev
*dev
, void *pool
,
741 gfp_t mem_flags
, dma_addr_t
*dma_handler
)
743 return dma_pool_alloc(pool
, mem_flags
, dma_handler
);
746 static void nvme_nvm_dev_dma_free(void *pool
, void *addr
,
747 dma_addr_t dma_handler
)
749 dma_pool_free(pool
, addr
, dma_handler
);
752 static struct nvm_dev_ops nvme_nvm_dev_ops
= {
753 .identity
= nvme_nvm_identity
,
755 .get_bb_tbl
= nvme_nvm_get_bb_tbl
,
756 .set_bb_tbl
= nvme_nvm_set_bb_tbl
,
758 .get_chk_meta
= nvme_nvm_get_chk_meta
,
760 .submit_io
= nvme_nvm_submit_io
,
761 .submit_io_sync
= nvme_nvm_submit_io_sync
,
763 .create_dma_pool
= nvme_nvm_create_dma_pool
,
764 .destroy_dma_pool
= nvme_nvm_destroy_dma_pool
,
765 .dev_dma_alloc
= nvme_nvm_dev_dma_alloc
,
766 .dev_dma_free
= nvme_nvm_dev_dma_free
,
769 static int nvme_nvm_submit_user_cmd(struct request_queue
*q
,
771 struct nvme_nvm_command
*vcmd
,
772 void __user
*ubuf
, unsigned int bufflen
,
773 void __user
*meta_buf
, unsigned int meta_len
,
774 void __user
*ppa_buf
, unsigned int ppa_len
,
775 u32
*result
, u64
*status
, unsigned int timeout
)
777 bool write
= nvme_is_write((struct nvme_command
*)vcmd
);
778 struct nvm_dev
*dev
= ns
->ndev
;
779 struct gendisk
*disk
= ns
->disk
;
781 struct bio
*bio
= NULL
;
782 __le64
*ppa_list
= NULL
;
784 __le64
*metadata
= NULL
;
785 dma_addr_t metadata_dma
;
786 DECLARE_COMPLETION_ONSTACK(wait
);
789 rq
= nvme_alloc_request(q
, (struct nvme_command
*)vcmd
, 0,
796 rq
->timeout
= timeout
? timeout
: ADMIN_TIMEOUT
;
798 if (ppa_buf
&& ppa_len
) {
799 ppa_list
= dma_pool_alloc(dev
->dma_pool
, GFP_KERNEL
, &ppa_dma
);
804 if (copy_from_user(ppa_list
, (void __user
*)ppa_buf
,
805 sizeof(u64
) * (ppa_len
+ 1))) {
809 vcmd
->ph_rw
.spba
= cpu_to_le64(ppa_dma
);
811 vcmd
->ph_rw
.spba
= cpu_to_le64((uintptr_t)ppa_buf
);
814 if (ubuf
&& bufflen
) {
815 ret
= blk_rq_map_user(q
, rq
, NULL
, ubuf
, bufflen
, GFP_KERNEL
);
820 if (meta_buf
&& meta_len
) {
821 metadata
= dma_pool_alloc(dev
->dma_pool
, GFP_KERNEL
,
829 if (copy_from_user(metadata
,
830 (void __user
*)meta_buf
,
836 vcmd
->ph_rw
.metadata
= cpu_to_le64(metadata_dma
);
842 blk_execute_rq(q
, NULL
, rq
, 0);
844 if (nvme_req(rq
)->flags
& NVME_REQ_CANCELLED
)
846 else if (nvme_req(rq
)->status
& 0x7ff)
849 *result
= nvme_req(rq
)->status
& 0x7ff;
851 *status
= le64_to_cpu(nvme_req(rq
)->result
.u64
);
853 if (metadata
&& !ret
&& !write
) {
854 if (copy_to_user(meta_buf
, (void *)metadata
, meta_len
))
858 if (meta_buf
&& meta_len
)
859 dma_pool_free(dev
->dma_pool
, metadata
, metadata_dma
);
862 blk_rq_unmap_user(bio
);
864 if (ppa_buf
&& ppa_len
)
865 dma_pool_free(dev
->dma_pool
, ppa_list
, ppa_dma
);
867 blk_mq_free_request(rq
);
872 static int nvme_nvm_submit_vio(struct nvme_ns
*ns
,
873 struct nvm_user_vio __user
*uvio
)
875 struct nvm_user_vio vio
;
876 struct nvme_nvm_command c
;
880 if (copy_from_user(&vio
, uvio
, sizeof(vio
)))
885 memset(&c
, 0, sizeof(c
));
886 c
.ph_rw
.opcode
= vio
.opcode
;
887 c
.ph_rw
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
888 c
.ph_rw
.control
= cpu_to_le16(vio
.control
);
889 c
.ph_rw
.length
= cpu_to_le16(vio
.nppas
);
891 length
= (vio
.nppas
+ 1) << ns
->lba_shift
;
893 ret
= nvme_nvm_submit_user_cmd(ns
->queue
, ns
, &c
,
894 (void __user
*)(uintptr_t)vio
.addr
, length
,
895 (void __user
*)(uintptr_t)vio
.metadata
,
897 (void __user
*)(uintptr_t)vio
.ppa_list
, vio
.nppas
,
898 &vio
.result
, &vio
.status
, 0);
900 if (ret
&& copy_to_user(uvio
, &vio
, sizeof(vio
)))
906 static int nvme_nvm_user_vcmd(struct nvme_ns
*ns
, int admin
,
907 struct nvm_passthru_vio __user
*uvcmd
)
909 struct nvm_passthru_vio vcmd
;
910 struct nvme_nvm_command c
;
911 struct request_queue
*q
;
912 unsigned int timeout
= 0;
915 if (copy_from_user(&vcmd
, uvcmd
, sizeof(vcmd
)))
917 if ((vcmd
.opcode
!= 0xF2) && (!capable(CAP_SYS_ADMIN
)))
922 memset(&c
, 0, sizeof(c
));
923 c
.common
.opcode
= vcmd
.opcode
;
924 c
.common
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
925 c
.common
.cdw2
[0] = cpu_to_le32(vcmd
.cdw2
);
926 c
.common
.cdw2
[1] = cpu_to_le32(vcmd
.cdw3
);
928 c
.ph_rw
.length
= cpu_to_le16(vcmd
.nppas
);
929 c
.ph_rw
.control
= cpu_to_le16(vcmd
.control
);
930 c
.common
.cdw10
[3] = cpu_to_le32(vcmd
.cdw13
);
931 c
.common
.cdw10
[4] = cpu_to_le32(vcmd
.cdw14
);
932 c
.common
.cdw10
[5] = cpu_to_le32(vcmd
.cdw15
);
935 timeout
= msecs_to_jiffies(vcmd
.timeout_ms
);
937 q
= admin
? ns
->ctrl
->admin_q
: ns
->queue
;
939 ret
= nvme_nvm_submit_user_cmd(q
, ns
,
940 (struct nvme_nvm_command
*)&c
,
941 (void __user
*)(uintptr_t)vcmd
.addr
, vcmd
.data_len
,
942 (void __user
*)(uintptr_t)vcmd
.metadata
,
944 (void __user
*)(uintptr_t)vcmd
.ppa_list
, vcmd
.nppas
,
945 &vcmd
.result
, &vcmd
.status
, timeout
);
947 if (ret
&& copy_to_user(uvcmd
, &vcmd
, sizeof(vcmd
)))
953 int nvme_nvm_ioctl(struct nvme_ns
*ns
, unsigned int cmd
, unsigned long arg
)
956 case NVME_NVM_IOCTL_ADMIN_VIO
:
957 return nvme_nvm_user_vcmd(ns
, 1, (void __user
*)arg
);
958 case NVME_NVM_IOCTL_IO_VIO
:
959 return nvme_nvm_user_vcmd(ns
, 0, (void __user
*)arg
);
960 case NVME_NVM_IOCTL_SUBMIT_VIO
:
961 return nvme_nvm_submit_vio(ns
, (void __user
*)arg
);
967 void nvme_nvm_update_nvm_info(struct nvme_ns
*ns
)
969 struct nvm_dev
*ndev
= ns
->ndev
;
970 struct nvm_geo
*geo
= &ndev
->geo
;
972 geo
->csecs
= 1 << ns
->lba_shift
;
976 int nvme_nvm_register(struct nvme_ns
*ns
, char *disk_name
, int node
)
978 struct request_queue
*q
= ns
->queue
;
981 _nvme_nvm_check_size();
983 dev
= nvm_alloc_dev(node
);
988 memcpy(dev
->name
, disk_name
, DISK_NAME_LEN
);
989 dev
->ops
= &nvme_nvm_dev_ops
;
990 dev
->private_data
= ns
;
993 return nvm_register(dev
);
996 void nvme_nvm_unregister(struct nvme_ns
*ns
)
998 nvm_unregister(ns
->ndev
);
1001 static ssize_t
nvm_dev_attr_show(struct device
*dev
,
1002 struct device_attribute
*dattr
, char *page
)
1004 struct nvme_ns
*ns
= nvme_get_ns_from_dev(dev
);
1005 struct nvm_dev
*ndev
= ns
->ndev
;
1006 struct nvm_geo
*geo
= &ndev
->geo
;
1007 struct attribute
*attr
;
1012 attr
= &dattr
->attr
;
1014 if (strcmp(attr
->name
, "version") == 0) {
1015 if (geo
->major_ver_id
== 1)
1016 return scnprintf(page
, PAGE_SIZE
, "%u\n",
1019 return scnprintf(page
, PAGE_SIZE
, "%u.%u\n",
1022 } else if (strcmp(attr
->name
, "capabilities") == 0) {
1023 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->cap
);
1024 } else if (strcmp(attr
->name
, "read_typ") == 0) {
1025 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->trdt
);
1026 } else if (strcmp(attr
->name
, "read_max") == 0) {
1027 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->trdm
);
1029 return scnprintf(page
,
1031 "Unhandled attr(%s) in `%s`\n",
1032 attr
->name
, __func__
);
1036 static ssize_t
nvm_dev_attr_show_ppaf(struct nvm_addrf_12
*ppaf
, char *page
)
1038 return scnprintf(page
, PAGE_SIZE
,
1039 "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
1040 ppaf
->ch_offset
, ppaf
->ch_len
,
1041 ppaf
->lun_offset
, ppaf
->lun_len
,
1042 ppaf
->pln_offset
, ppaf
->pln_len
,
1043 ppaf
->blk_offset
, ppaf
->blk_len
,
1044 ppaf
->pg_offset
, ppaf
->pg_len
,
1045 ppaf
->sec_offset
, ppaf
->sec_len
);
1048 static ssize_t
nvm_dev_attr_show_12(struct device
*dev
,
1049 struct device_attribute
*dattr
, char *page
)
1051 struct nvme_ns
*ns
= nvme_get_ns_from_dev(dev
);
1052 struct nvm_dev
*ndev
= ns
->ndev
;
1053 struct nvm_geo
*geo
= &ndev
->geo
;
1054 struct attribute
*attr
;
1059 attr
= &dattr
->attr
;
1061 if (strcmp(attr
->name
, "vendor_opcode") == 0) {
1062 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->vmnt
);
1063 } else if (strcmp(attr
->name
, "device_mode") == 0) {
1064 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->dom
);
1065 /* kept for compatibility */
1066 } else if (strcmp(attr
->name
, "media_manager") == 0) {
1067 return scnprintf(page
, PAGE_SIZE
, "%s\n", "gennvm");
1068 } else if (strcmp(attr
->name
, "ppa_format") == 0) {
1069 return nvm_dev_attr_show_ppaf((void *)&geo
->addrf
, page
);
1070 } else if (strcmp(attr
->name
, "media_type") == 0) { /* u8 */
1071 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->mtype
);
1072 } else if (strcmp(attr
->name
, "flash_media_type") == 0) {
1073 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->fmtype
);
1074 } else if (strcmp(attr
->name
, "num_channels") == 0) {
1075 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_ch
);
1076 } else if (strcmp(attr
->name
, "num_luns") == 0) {
1077 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_lun
);
1078 } else if (strcmp(attr
->name
, "num_planes") == 0) {
1079 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_pln
);
1080 } else if (strcmp(attr
->name
, "num_blocks") == 0) { /* u16 */
1081 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_chk
);
1082 } else if (strcmp(attr
->name
, "num_pages") == 0) {
1083 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_pg
);
1084 } else if (strcmp(attr
->name
, "page_size") == 0) {
1085 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->fpg_sz
);
1086 } else if (strcmp(attr
->name
, "hw_sector_size") == 0) {
1087 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->csecs
);
1088 } else if (strcmp(attr
->name
, "oob_sector_size") == 0) {/* u32 */
1089 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->sos
);
1090 } else if (strcmp(attr
->name
, "prog_typ") == 0) {
1091 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tprt
);
1092 } else if (strcmp(attr
->name
, "prog_max") == 0) {
1093 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tprm
);
1094 } else if (strcmp(attr
->name
, "erase_typ") == 0) {
1095 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tbet
);
1096 } else if (strcmp(attr
->name
, "erase_max") == 0) {
1097 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tbem
);
1098 } else if (strcmp(attr
->name
, "multiplane_modes") == 0) {
1099 return scnprintf(page
, PAGE_SIZE
, "0x%08x\n", geo
->mpos
);
1100 } else if (strcmp(attr
->name
, "media_capabilities") == 0) {
1101 return scnprintf(page
, PAGE_SIZE
, "0x%08x\n", geo
->mccap
);
1102 } else if (strcmp(attr
->name
, "max_phys_secs") == 0) {
1103 return scnprintf(page
, PAGE_SIZE
, "%u\n", NVM_MAX_VLBA
);
1105 return scnprintf(page
, PAGE_SIZE
,
1106 "Unhandled attr(%s) in `%s`\n",
1107 attr
->name
, __func__
);
1111 static ssize_t
nvm_dev_attr_show_20(struct device
*dev
,
1112 struct device_attribute
*dattr
, char *page
)
1114 struct nvme_ns
*ns
= nvme_get_ns_from_dev(dev
);
1115 struct nvm_dev
*ndev
= ns
->ndev
;
1116 struct nvm_geo
*geo
= &ndev
->geo
;
1117 struct attribute
*attr
;
1122 attr
= &dattr
->attr
;
1124 if (strcmp(attr
->name
, "groups") == 0) {
1125 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_ch
);
1126 } else if (strcmp(attr
->name
, "punits") == 0) {
1127 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_lun
);
1128 } else if (strcmp(attr
->name
, "chunks") == 0) {
1129 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->num_chk
);
1130 } else if (strcmp(attr
->name
, "clba") == 0) {
1131 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->clba
);
1132 } else if (strcmp(attr
->name
, "ws_min") == 0) {
1133 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->ws_min
);
1134 } else if (strcmp(attr
->name
, "ws_opt") == 0) {
1135 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->ws_opt
);
1136 } else if (strcmp(attr
->name
, "maxoc") == 0) {
1137 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->maxoc
);
1138 } else if (strcmp(attr
->name
, "maxocpu") == 0) {
1139 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->maxocpu
);
1140 } else if (strcmp(attr
->name
, "mw_cunits") == 0) {
1141 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->mw_cunits
);
1142 } else if (strcmp(attr
->name
, "write_typ") == 0) {
1143 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tprt
);
1144 } else if (strcmp(attr
->name
, "write_max") == 0) {
1145 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tprm
);
1146 } else if (strcmp(attr
->name
, "reset_typ") == 0) {
1147 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tbet
);
1148 } else if (strcmp(attr
->name
, "reset_max") == 0) {
1149 return scnprintf(page
, PAGE_SIZE
, "%u\n", geo
->tbem
);
1151 return scnprintf(page
, PAGE_SIZE
,
1152 "Unhandled attr(%s) in `%s`\n",
1153 attr
->name
, __func__
);
1157 #define NVM_DEV_ATTR_RO(_name) \
1158 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL)
1159 #define NVM_DEV_ATTR_12_RO(_name) \
1160 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL)
1161 #define NVM_DEV_ATTR_20_RO(_name) \
1162 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL)
1164 /* general attributes */
1165 static NVM_DEV_ATTR_RO(version
);
1166 static NVM_DEV_ATTR_RO(capabilities
);
1168 static NVM_DEV_ATTR_RO(read_typ
);
1169 static NVM_DEV_ATTR_RO(read_max
);
1172 static NVM_DEV_ATTR_12_RO(vendor_opcode
);
1173 static NVM_DEV_ATTR_12_RO(device_mode
);
1174 static NVM_DEV_ATTR_12_RO(ppa_format
);
1175 static NVM_DEV_ATTR_12_RO(media_manager
);
1176 static NVM_DEV_ATTR_12_RO(media_type
);
1177 static NVM_DEV_ATTR_12_RO(flash_media_type
);
1178 static NVM_DEV_ATTR_12_RO(num_channels
);
1179 static NVM_DEV_ATTR_12_RO(num_luns
);
1180 static NVM_DEV_ATTR_12_RO(num_planes
);
1181 static NVM_DEV_ATTR_12_RO(num_blocks
);
1182 static NVM_DEV_ATTR_12_RO(num_pages
);
1183 static NVM_DEV_ATTR_12_RO(page_size
);
1184 static NVM_DEV_ATTR_12_RO(hw_sector_size
);
1185 static NVM_DEV_ATTR_12_RO(oob_sector_size
);
1186 static NVM_DEV_ATTR_12_RO(prog_typ
);
1187 static NVM_DEV_ATTR_12_RO(prog_max
);
1188 static NVM_DEV_ATTR_12_RO(erase_typ
);
1189 static NVM_DEV_ATTR_12_RO(erase_max
);
1190 static NVM_DEV_ATTR_12_RO(multiplane_modes
);
1191 static NVM_DEV_ATTR_12_RO(media_capabilities
);
1192 static NVM_DEV_ATTR_12_RO(max_phys_secs
);
1194 static struct attribute
*nvm_dev_attrs_12
[] = {
1195 &dev_attr_version
.attr
,
1196 &dev_attr_capabilities
.attr
,
1198 &dev_attr_vendor_opcode
.attr
,
1199 &dev_attr_device_mode
.attr
,
1200 &dev_attr_media_manager
.attr
,
1201 &dev_attr_ppa_format
.attr
,
1202 &dev_attr_media_type
.attr
,
1203 &dev_attr_flash_media_type
.attr
,
1204 &dev_attr_num_channels
.attr
,
1205 &dev_attr_num_luns
.attr
,
1206 &dev_attr_num_planes
.attr
,
1207 &dev_attr_num_blocks
.attr
,
1208 &dev_attr_num_pages
.attr
,
1209 &dev_attr_page_size
.attr
,
1210 &dev_attr_hw_sector_size
.attr
,
1211 &dev_attr_oob_sector_size
.attr
,
1212 &dev_attr_read_typ
.attr
,
1213 &dev_attr_read_max
.attr
,
1214 &dev_attr_prog_typ
.attr
,
1215 &dev_attr_prog_max
.attr
,
1216 &dev_attr_erase_typ
.attr
,
1217 &dev_attr_erase_max
.attr
,
1218 &dev_attr_multiplane_modes
.attr
,
1219 &dev_attr_media_capabilities
.attr
,
1220 &dev_attr_max_phys_secs
.attr
,
1225 static const struct attribute_group nvm_dev_attr_group_12
= {
1227 .attrs
= nvm_dev_attrs_12
,
1231 static NVM_DEV_ATTR_20_RO(groups
);
1232 static NVM_DEV_ATTR_20_RO(punits
);
1233 static NVM_DEV_ATTR_20_RO(chunks
);
1234 static NVM_DEV_ATTR_20_RO(clba
);
1235 static NVM_DEV_ATTR_20_RO(ws_min
);
1236 static NVM_DEV_ATTR_20_RO(ws_opt
);
1237 static NVM_DEV_ATTR_20_RO(maxoc
);
1238 static NVM_DEV_ATTR_20_RO(maxocpu
);
1239 static NVM_DEV_ATTR_20_RO(mw_cunits
);
1240 static NVM_DEV_ATTR_20_RO(write_typ
);
1241 static NVM_DEV_ATTR_20_RO(write_max
);
1242 static NVM_DEV_ATTR_20_RO(reset_typ
);
1243 static NVM_DEV_ATTR_20_RO(reset_max
);
1245 static struct attribute
*nvm_dev_attrs_20
[] = {
1246 &dev_attr_version
.attr
,
1247 &dev_attr_capabilities
.attr
,
1249 &dev_attr_groups
.attr
,
1250 &dev_attr_punits
.attr
,
1251 &dev_attr_chunks
.attr
,
1252 &dev_attr_clba
.attr
,
1253 &dev_attr_ws_min
.attr
,
1254 &dev_attr_ws_opt
.attr
,
1255 &dev_attr_maxoc
.attr
,
1256 &dev_attr_maxocpu
.attr
,
1257 &dev_attr_mw_cunits
.attr
,
1259 &dev_attr_read_typ
.attr
,
1260 &dev_attr_read_max
.attr
,
1261 &dev_attr_write_typ
.attr
,
1262 &dev_attr_write_max
.attr
,
1263 &dev_attr_reset_typ
.attr
,
1264 &dev_attr_reset_max
.attr
,
1269 static const struct attribute_group nvm_dev_attr_group_20
= {
1271 .attrs
= nvm_dev_attrs_20
,
1274 int nvme_nvm_register_sysfs(struct nvme_ns
*ns
)
1276 struct nvm_dev
*ndev
= ns
->ndev
;
1277 struct nvm_geo
*geo
= &ndev
->geo
;
1282 switch (geo
->major_ver_id
) {
1284 return sysfs_create_group(&disk_to_dev(ns
->disk
)->kobj
,
1285 &nvm_dev_attr_group_12
);
1287 return sysfs_create_group(&disk_to_dev(ns
->disk
)->kobj
,
1288 &nvm_dev_attr_group_20
);
1294 void nvme_nvm_unregister_sysfs(struct nvme_ns
*ns
)
1296 struct nvm_dev
*ndev
= ns
->ndev
;
1297 struct nvm_geo
*geo
= &ndev
->geo
;
1299 switch (geo
->major_ver_id
) {
1301 sysfs_remove_group(&disk_to_dev(ns
->disk
)->kobj
,
1302 &nvm_dev_attr_group_12
);
1305 sysfs_remove_group(&disk_to_dev(ns
->disk
)->kobj
,
1306 &nvm_dev_attr_group_20
);