2 * nvme-lightnvm.c - LightNVM NVMe device
4 * Copyright (C) 2014-2015 IT University of Copenhagen
5 * Initial release: Matias Bjorling <mb@lightnvm.io>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
25 #include <linux/nvme.h>
26 #include <linux/bitops.h>
27 #include <linux/lightnvm.h>
28 #include <linux/vmalloc.h>
30 enum nvme_nvm_admin_opcode
{
31 nvme_nvm_admin_identity
= 0xe2,
32 nvme_nvm_admin_get_l2p_tbl
= 0xea,
33 nvme_nvm_admin_get_bb_tbl
= 0xf2,
34 nvme_nvm_admin_set_bb_tbl
= 0xf1,
37 struct nvme_nvm_hb_rw
{
53 struct nvme_nvm_ph_rw
{
69 struct nvme_nvm_identity
{
81 struct nvme_nvm_l2ptbl
{
94 struct nvme_nvm_getbbtbl
{
106 struct nvme_nvm_setbbtbl
{
121 struct nvme_nvm_erase_blk
{
136 struct nvme_nvm_command
{
138 struct nvme_common_command common
;
139 struct nvme_nvm_identity identity
;
140 struct nvme_nvm_hb_rw hb_rw
;
141 struct nvme_nvm_ph_rw ph_rw
;
142 struct nvme_nvm_l2ptbl l2p
;
143 struct nvme_nvm_getbbtbl get_bb
;
144 struct nvme_nvm_setbbtbl set_bb
;
145 struct nvme_nvm_erase_blk erase
;
149 struct nvme_nvm_completion
{
150 __le64 result
; /* Used by LightNVM to return ppa completions */
151 __le16 sq_head
; /* how much of this queue may be reclaimed */
152 __le16 sq_id
; /* submission queue that generated this entry */
153 __u16 command_id
; /* of the command which completed */
154 __le16 status
; /* did the command fail, and if so, why? */
157 #define NVME_NVM_LP_MLC_PAIRS 886
158 struct nvme_nvm_lp_mlc
{
160 __u8 pairs
[NVME_NVM_LP_MLC_PAIRS
];
163 struct nvme_nvm_lp_tbl
{
165 struct nvme_nvm_lp_mlc mlc
;
168 struct nvme_nvm_id_group
{
192 struct nvme_nvm_lp_tbl lptbl
;
195 struct nvme_nvm_addr_format
{
218 struct nvme_nvm_addr_format ppaf
;
220 struct nvme_nvm_id_group groups
[4];
223 struct nvme_nvm_bb_tbl
{
238 * Check we didn't inadvertently grow the command struct
240 static inline void _nvme_nvm_check_size(void)
242 BUILD_BUG_ON(sizeof(struct nvme_nvm_identity
) != 64);
243 BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw
) != 64);
244 BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw
) != 64);
245 BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl
) != 64);
246 BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl
) != 64);
247 BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl
) != 64);
248 BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk
) != 64);
249 BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group
) != 960);
250 BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format
) != 128);
251 BUILD_BUG_ON(sizeof(struct nvme_nvm_id
) != 4096);
252 BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl
) != 512);
255 static int init_grps(struct nvm_id
*nvm_id
, struct nvme_nvm_id
*nvme_nvm_id
)
257 struct nvme_nvm_id_group
*src
;
258 struct nvm_id_group
*dst
;
261 end
= min_t(u32
, 4, nvm_id
->cgrps
);
263 for (i
= 0; i
< end
; i
++) {
264 src
= &nvme_nvm_id
->groups
[i
];
265 dst
= &nvm_id
->groups
[i
];
267 dst
->mtype
= src
->mtype
;
268 dst
->fmtype
= src
->fmtype
;
269 dst
->num_ch
= src
->num_ch
;
270 dst
->num_lun
= src
->num_lun
;
271 dst
->num_pln
= src
->num_pln
;
273 dst
->num_pg
= le16_to_cpu(src
->num_pg
);
274 dst
->num_blk
= le16_to_cpu(src
->num_blk
);
275 dst
->fpg_sz
= le16_to_cpu(src
->fpg_sz
);
276 dst
->csecs
= le16_to_cpu(src
->csecs
);
277 dst
->sos
= le16_to_cpu(src
->sos
);
279 dst
->trdt
= le32_to_cpu(src
->trdt
);
280 dst
->trdm
= le32_to_cpu(src
->trdm
);
281 dst
->tprt
= le32_to_cpu(src
->tprt
);
282 dst
->tprm
= le32_to_cpu(src
->tprm
);
283 dst
->tbet
= le32_to_cpu(src
->tbet
);
284 dst
->tbem
= le32_to_cpu(src
->tbem
);
285 dst
->mpos
= le32_to_cpu(src
->mpos
);
286 dst
->mccap
= le32_to_cpu(src
->mccap
);
288 dst
->cpar
= le16_to_cpu(src
->cpar
);
290 if (dst
->fmtype
== NVM_ID_FMTYPE_MLC
) {
291 memcpy(dst
->lptbl
.id
, src
->lptbl
.id
, 8);
292 dst
->lptbl
.mlc
.num_pairs
=
293 le16_to_cpu(src
->lptbl
.mlc
.num_pairs
);
295 if (dst
->lptbl
.mlc
.num_pairs
> NVME_NVM_LP_MLC_PAIRS
) {
296 pr_err("nvm: number of MLC pairs not supported\n");
300 memcpy(dst
->lptbl
.mlc
.pairs
, src
->lptbl
.mlc
.pairs
,
301 dst
->lptbl
.mlc
.num_pairs
);
308 static int nvme_nvm_identity(struct nvm_dev
*nvmdev
, struct nvm_id
*nvm_id
)
310 struct nvme_ns
*ns
= nvmdev
->q
->queuedata
;
311 struct nvme_nvm_id
*nvme_nvm_id
;
312 struct nvme_nvm_command c
= {};
315 c
.identity
.opcode
= nvme_nvm_admin_identity
;
316 c
.identity
.nsid
= cpu_to_le32(ns
->ns_id
);
317 c
.identity
.chnl_off
= 0;
319 nvme_nvm_id
= kmalloc(sizeof(struct nvme_nvm_id
), GFP_KERNEL
);
323 ret
= nvme_submit_sync_cmd(ns
->ctrl
->admin_q
, (struct nvme_command
*)&c
,
324 nvme_nvm_id
, sizeof(struct nvme_nvm_id
));
330 nvm_id
->ver_id
= nvme_nvm_id
->ver_id
;
331 nvm_id
->vmnt
= nvme_nvm_id
->vmnt
;
332 nvm_id
->cgrps
= nvme_nvm_id
->cgrps
;
333 nvm_id
->cap
= le32_to_cpu(nvme_nvm_id
->cap
);
334 nvm_id
->dom
= le32_to_cpu(nvme_nvm_id
->dom
);
335 memcpy(&nvm_id
->ppaf
, &nvme_nvm_id
->ppaf
,
336 sizeof(struct nvme_nvm_addr_format
));
338 ret
= init_grps(nvm_id
, nvme_nvm_id
);
344 static int nvme_nvm_get_l2p_tbl(struct nvm_dev
*nvmdev
, u64 slba
, u32 nlb
,
345 nvm_l2p_update_fn
*update_l2p
, void *priv
)
347 struct nvme_ns
*ns
= nvmdev
->q
->queuedata
;
348 struct nvme_nvm_command c
= {};
349 u32 len
= queue_max_hw_sectors(ns
->ctrl
->admin_q
) << 9;
350 u32 nlb_pr_rq
= len
/ sizeof(u64
);
355 c
.l2p
.opcode
= nvme_nvm_admin_get_l2p_tbl
;
356 c
.l2p
.nsid
= cpu_to_le32(ns
->ns_id
);
357 entries
= kmalloc(len
, GFP_KERNEL
);
362 u32 cmd_nlb
= min(nlb_pr_rq
, nlb
);
364 c
.l2p
.slba
= cpu_to_le64(cmd_slba
);
365 c
.l2p
.nlb
= cpu_to_le32(cmd_nlb
);
367 ret
= nvme_submit_sync_cmd(ns
->ctrl
->admin_q
,
368 (struct nvme_command
*)&c
, entries
, len
);
370 dev_err(ns
->ctrl
->dev
, "L2P table transfer failed (%d)\n",
376 if (update_l2p(cmd_slba
, cmd_nlb
, entries
, priv
)) {
390 static void nvme_nvm_bb_tbl_fold(struct nvm_dev
*nvmdev
,
391 int nr_dst_blks
, u8
*dst_blks
,
392 int nr_src_blks
, u8
*src_blks
)
394 int blk
, offset
, pl
, blktype
;
396 for (blk
= 0; blk
< nr_dst_blks
; blk
++) {
397 offset
= blk
* nvmdev
->plane_mode
;
398 blktype
= src_blks
[offset
];
400 /* Bad blocks on any planes take precedence over other types */
401 for (pl
= 0; pl
< nvmdev
->plane_mode
; pl
++) {
402 if (src_blks
[offset
+ pl
] &
403 (NVM_BLK_T_BAD
|NVM_BLK_T_GRWN_BAD
)) {
404 blktype
= src_blks
[offset
+ pl
];
409 dst_blks
[blk
] = blktype
;
413 static int nvme_nvm_get_bb_tbl(struct nvm_dev
*nvmdev
, struct ppa_addr ppa
,
414 int nr_dst_blks
, nvm_bb_update_fn
*update_bbtbl
,
417 struct request_queue
*q
= nvmdev
->q
;
418 struct nvme_ns
*ns
= q
->queuedata
;
419 struct nvme_ctrl
*ctrl
= ns
->ctrl
;
420 struct nvme_nvm_command c
= {};
421 struct nvme_nvm_bb_tbl
*bb_tbl
;
423 int nr_src_blks
= nr_dst_blks
* nvmdev
->plane_mode
;
424 int tblsz
= sizeof(struct nvme_nvm_bb_tbl
) + nr_src_blks
;
427 c
.get_bb
.opcode
= nvme_nvm_admin_get_bb_tbl
;
428 c
.get_bb
.nsid
= cpu_to_le32(ns
->ns_id
);
429 c
.get_bb
.spba
= cpu_to_le64(ppa
.ppa
);
431 bb_tbl
= kzalloc(tblsz
, GFP_KERNEL
);
435 dst_blks
= kzalloc(nr_dst_blks
, GFP_KERNEL
);
441 ret
= nvme_submit_sync_cmd(ctrl
->admin_q
, (struct nvme_command
*)&c
,
444 dev_err(ctrl
->dev
, "get bad block table failed (%d)\n", ret
);
449 if (bb_tbl
->tblid
[0] != 'B' || bb_tbl
->tblid
[1] != 'B' ||
450 bb_tbl
->tblid
[2] != 'L' || bb_tbl
->tblid
[3] != 'T') {
451 dev_err(ctrl
->dev
, "bbt format mismatch\n");
456 if (le16_to_cpu(bb_tbl
->verid
) != 1) {
458 dev_err(ctrl
->dev
, "bbt version not supported\n");
462 if (le32_to_cpu(bb_tbl
->tblks
) != nr_src_blks
) {
464 dev_err(ctrl
->dev
, "bbt unsuspected blocks returned (%u!=%u)",
465 le32_to_cpu(bb_tbl
->tblks
), nr_src_blks
);
469 nvme_nvm_bb_tbl_fold(nvmdev
, nr_dst_blks
, dst_blks
,
470 nr_src_blks
, bb_tbl
->blk
);
472 ppa
= dev_to_generic_addr(nvmdev
, ppa
);
473 ret
= update_bbtbl(ppa
, nr_dst_blks
, dst_blks
, priv
);
481 static int nvme_nvm_set_bb_tbl(struct nvm_dev
*nvmdev
, struct nvm_rq
*rqd
,
484 struct nvme_ns
*ns
= nvmdev
->q
->queuedata
;
485 struct nvme_nvm_command c
= {};
488 c
.set_bb
.opcode
= nvme_nvm_admin_set_bb_tbl
;
489 c
.set_bb
.nsid
= cpu_to_le32(ns
->ns_id
);
490 c
.set_bb
.spba
= cpu_to_le64(rqd
->ppa_addr
.ppa
);
491 c
.set_bb
.nlb
= cpu_to_le16(rqd
->nr_pages
- 1);
492 c
.set_bb
.value
= type
;
494 ret
= nvme_submit_sync_cmd(ns
->ctrl
->admin_q
, (struct nvme_command
*)&c
,
497 dev_err(ns
->ctrl
->dev
, "set bad block table failed (%d)\n", ret
);
501 static inline void nvme_nvm_rqtocmd(struct request
*rq
, struct nvm_rq
*rqd
,
502 struct nvme_ns
*ns
, struct nvme_nvm_command
*c
)
504 c
->ph_rw
.opcode
= rqd
->opcode
;
505 c
->ph_rw
.nsid
= cpu_to_le32(ns
->ns_id
);
506 c
->ph_rw
.spba
= cpu_to_le64(rqd
->ppa_addr
.ppa
);
507 c
->ph_rw
.control
= cpu_to_le16(rqd
->flags
);
508 c
->ph_rw
.length
= cpu_to_le16(rqd
->nr_pages
- 1);
510 if (rqd
->opcode
== NVM_OP_HBWRITE
|| rqd
->opcode
== NVM_OP_HBREAD
)
511 c
->hb_rw
.slba
= cpu_to_le64(nvme_block_nr(ns
,
512 rqd
->bio
->bi_iter
.bi_sector
));
515 static void nvme_nvm_end_io(struct request
*rq
, int error
)
517 struct nvm_rq
*rqd
= rq
->end_io_data
;
518 struct nvme_nvm_completion
*cqe
= rq
->special
;
521 rqd
->ppa_status
= le64_to_cpu(cqe
->result
);
523 nvm_end_io(rqd
, error
);
526 blk_mq_free_request(rq
);
529 static int nvme_nvm_submit_io(struct nvm_dev
*dev
, struct nvm_rq
*rqd
)
531 struct request_queue
*q
= dev
->q
;
532 struct nvme_ns
*ns
= q
->queuedata
;
534 struct bio
*bio
= rqd
->bio
;
535 struct nvme_nvm_command
*cmd
;
537 rq
= blk_mq_alloc_request(q
, bio_rw(bio
), 0);
541 cmd
= kzalloc(sizeof(struct nvme_nvm_command
) +
542 sizeof(struct nvme_nvm_completion
), GFP_KERNEL
);
544 blk_mq_free_request(rq
);
548 rq
->cmd_type
= REQ_TYPE_DRV_PRIV
;
549 rq
->ioprio
= bio_prio(bio
);
551 if (bio_has_data(bio
))
552 rq
->nr_phys_segments
= bio_phys_segments(q
, bio
);
554 rq
->__data_len
= bio
->bi_iter
.bi_size
;
555 rq
->bio
= rq
->biotail
= bio
;
557 nvme_nvm_rqtocmd(rq
, rqd
, ns
, cmd
);
559 rq
->cmd
= (unsigned char *)cmd
;
560 rq
->cmd_len
= sizeof(struct nvme_nvm_command
);
561 rq
->special
= cmd
+ 1;
563 rq
->end_io_data
= rqd
;
565 blk_execute_rq_nowait(q
, NULL
, rq
, 0, nvme_nvm_end_io
);
570 static int nvme_nvm_erase_block(struct nvm_dev
*dev
, struct nvm_rq
*rqd
)
572 struct request_queue
*q
= dev
->q
;
573 struct nvme_ns
*ns
= q
->queuedata
;
574 struct nvme_nvm_command c
= {};
576 c
.erase
.opcode
= NVM_OP_ERASE
;
577 c
.erase
.nsid
= cpu_to_le32(ns
->ns_id
);
578 c
.erase
.spba
= cpu_to_le64(rqd
->ppa_addr
.ppa
);
579 c
.erase
.length
= cpu_to_le16(rqd
->nr_pages
- 1);
581 return nvme_submit_sync_cmd(q
, (struct nvme_command
*)&c
, NULL
, 0);
584 static void *nvme_nvm_create_dma_pool(struct nvm_dev
*nvmdev
, char *name
)
586 struct nvme_ns
*ns
= nvmdev
->q
->queuedata
;
588 return dma_pool_create(name
, ns
->ctrl
->dev
, PAGE_SIZE
, PAGE_SIZE
, 0);
591 static void nvme_nvm_destroy_dma_pool(void *pool
)
593 struct dma_pool
*dma_pool
= pool
;
595 dma_pool_destroy(dma_pool
);
598 static void *nvme_nvm_dev_dma_alloc(struct nvm_dev
*dev
, void *pool
,
599 gfp_t mem_flags
, dma_addr_t
*dma_handler
)
601 return dma_pool_alloc(pool
, mem_flags
, dma_handler
);
604 static void nvme_nvm_dev_dma_free(void *pool
, void *ppa_list
,
605 dma_addr_t dma_handler
)
607 dma_pool_free(pool
, ppa_list
, dma_handler
);
610 static struct nvm_dev_ops nvme_nvm_dev_ops
= {
611 .identity
= nvme_nvm_identity
,
613 .get_l2p_tbl
= nvme_nvm_get_l2p_tbl
,
615 .get_bb_tbl
= nvme_nvm_get_bb_tbl
,
616 .set_bb_tbl
= nvme_nvm_set_bb_tbl
,
618 .submit_io
= nvme_nvm_submit_io
,
619 .erase_block
= nvme_nvm_erase_block
,
621 .create_dma_pool
= nvme_nvm_create_dma_pool
,
622 .destroy_dma_pool
= nvme_nvm_destroy_dma_pool
,
623 .dev_dma_alloc
= nvme_nvm_dev_dma_alloc
,
624 .dev_dma_free
= nvme_nvm_dev_dma_free
,
629 int nvme_nvm_register(struct request_queue
*q
, char *disk_name
)
631 return nvm_register(q
, disk_name
, &nvme_nvm_dev_ops
);
634 void nvme_nvm_unregister(struct request_queue
*q
, char *disk_name
)
636 nvm_unregister(disk_name
);
639 /* move to shared place when used in multiple places. */
640 #define PCI_VENDOR_ID_CNEX 0x1d1d
641 #define PCI_DEVICE_ID_CNEX_WL 0x2807
642 #define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f
644 int nvme_nvm_ns_supported(struct nvme_ns
*ns
, struct nvme_id_ns
*id
)
646 struct nvme_ctrl
*ctrl
= ns
->ctrl
;
647 /* XXX: this is poking into PCI structures from generic code! */
648 struct pci_dev
*pdev
= to_pci_dev(ctrl
->dev
);
650 /* QEMU NVMe simulator - PCI ID + Vendor specific bit */
651 if (pdev
->vendor
== PCI_VENDOR_ID_CNEX
&&
652 pdev
->device
== PCI_DEVICE_ID_CNEX_QEMU
&&
656 /* CNEX Labs - PCI ID + Vendor specific bit */
657 if (pdev
->vendor
== PCI_VENDOR_ID_CNEX
&&
658 pdev
->device
== PCI_DEVICE_ID_CNEX_WL
&&