2 * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
3 * Copyright (C) 2016 CNEX Labs
4 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
5 * Matias Bjorling <matias@cnexlabs.com>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * Implementation of a physical block-device target for Open-channel SSDs.
18 * pblk-init.c - pblk's initialization.
23 static struct kmem_cache
*pblk_ws_cache
, *pblk_rec_cache
, *pblk_g_rq_cache
,
25 static DECLARE_RWSEM(pblk_lock
);
26 struct bio_set
*pblk_bio_set
;
28 static int pblk_rw_io(struct request_queue
*q
, struct pblk
*pblk
,
33 /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
34 * constraint. Writes can be of arbitrary size.
36 if (bio_data_dir(bio
) == READ
) {
37 blk_queue_split(q
, &bio
);
38 ret
= pblk_submit_read(pblk
, bio
);
39 if (ret
== NVM_IO_DONE
&& bio_flagged(bio
, BIO_CLONED
))
45 /* Prevent deadlock in the case of a modest LUN configuration and large
46 * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
47 * available for user I/O.
49 if (pblk_get_secs(bio
) > pblk_rl_max_io(&pblk
->rl
))
50 blk_queue_split(q
, &bio
);
52 return pblk_write_to_cache(pblk
, bio
, PBLK_IOTYPE_USER
);
55 static blk_qc_t
pblk_make_rq(struct request_queue
*q
, struct bio
*bio
)
57 struct pblk
*pblk
= q
->queuedata
;
59 if (bio_op(bio
) == REQ_OP_DISCARD
) {
60 pblk_discard(pblk
, bio
);
61 if (!(bio
->bi_opf
& REQ_PREFLUSH
)) {
67 switch (pblk_rw_io(q
, pblk
, bio
)) {
79 static size_t pblk_trans_map_size(struct pblk
*pblk
)
83 if (pblk
->ppaf_bitsize
< 32)
86 return entry_size
* pblk
->rl
.nr_secs
;
89 #ifdef CONFIG_NVM_DEBUG
90 static u32
pblk_l2p_crc(struct pblk
*pblk
)
95 map_size
= pblk_trans_map_size(pblk
);
96 crc
= crc32_le(crc
, pblk
->trans_map
, map_size
);
101 static void pblk_l2p_free(struct pblk
*pblk
)
103 vfree(pblk
->trans_map
);
106 static int pblk_l2p_init(struct pblk
*pblk
)
112 map_size
= pblk_trans_map_size(pblk
);
113 pblk
->trans_map
= vmalloc(map_size
);
114 if (!pblk
->trans_map
)
117 pblk_ppa_set_empty(&ppa
);
119 for (i
= 0; i
< pblk
->rl
.nr_secs
; i
++)
120 pblk_trans_map_set(pblk
, i
, ppa
);
125 static void pblk_rwb_free(struct pblk
*pblk
)
127 if (pblk_rb_tear_down_check(&pblk
->rwb
))
128 pr_err("pblk: write buffer error on tear down\n");
130 pblk_rb_data_free(&pblk
->rwb
);
131 vfree(pblk_rb_entries_ref(&pblk
->rwb
));
134 static int pblk_rwb_init(struct pblk
*pblk
)
136 struct nvm_tgt_dev
*dev
= pblk
->dev
;
137 struct nvm_geo
*geo
= &dev
->geo
;
138 struct pblk_rb_entry
*entries
;
139 unsigned long nr_entries
;
140 unsigned int power_size
, power_seg_sz
;
142 nr_entries
= pblk_rb_calculate_size(pblk
->pgs_in_buffer
);
144 entries
= vzalloc(nr_entries
* sizeof(struct pblk_rb_entry
));
148 power_size
= get_count_order(nr_entries
);
149 power_seg_sz
= get_count_order(geo
->sec_size
);
151 return pblk_rb_init(&pblk
->rwb
, entries
, power_size
, power_seg_sz
);
154 /* Minimum pages needed within a lun */
155 #define ADDR_POOL_SIZE 64
157 static int pblk_set_ppaf(struct pblk
*pblk
)
159 struct nvm_tgt_dev
*dev
= pblk
->dev
;
160 struct nvm_geo
*geo
= &dev
->geo
;
161 struct nvm_addr_format ppaf
= geo
->ppaf
;
164 /* Re-calculate channel and lun format to adapt to configuration */
165 power_len
= get_count_order(geo
->nr_chnls
);
166 if (1 << power_len
!= geo
->nr_chnls
) {
167 pr_err("pblk: supports only power-of-two channel config.\n");
170 ppaf
.ch_len
= power_len
;
172 power_len
= get_count_order(geo
->nr_luns
);
173 if (1 << power_len
!= geo
->nr_luns
) {
174 pr_err("pblk: supports only power-of-two LUN config.\n");
177 ppaf
.lun_len
= power_len
;
179 pblk
->ppaf
.sec_offset
= 0;
180 pblk
->ppaf
.pln_offset
= ppaf
.sect_len
;
181 pblk
->ppaf
.ch_offset
= pblk
->ppaf
.pln_offset
+ ppaf
.pln_len
;
182 pblk
->ppaf
.lun_offset
= pblk
->ppaf
.ch_offset
+ ppaf
.ch_len
;
183 pblk
->ppaf
.pg_offset
= pblk
->ppaf
.lun_offset
+ ppaf
.lun_len
;
184 pblk
->ppaf
.blk_offset
= pblk
->ppaf
.pg_offset
+ ppaf
.pg_len
;
185 pblk
->ppaf
.sec_mask
= (1ULL << ppaf
.sect_len
) - 1;
186 pblk
->ppaf
.pln_mask
= ((1ULL << ppaf
.pln_len
) - 1) <<
187 pblk
->ppaf
.pln_offset
;
188 pblk
->ppaf
.ch_mask
= ((1ULL << ppaf
.ch_len
) - 1) <<
189 pblk
->ppaf
.ch_offset
;
190 pblk
->ppaf
.lun_mask
= ((1ULL << ppaf
.lun_len
) - 1) <<
191 pblk
->ppaf
.lun_offset
;
192 pblk
->ppaf
.pg_mask
= ((1ULL << ppaf
.pg_len
) - 1) <<
193 pblk
->ppaf
.pg_offset
;
194 pblk
->ppaf
.blk_mask
= ((1ULL << ppaf
.blk_len
) - 1) <<
195 pblk
->ppaf
.blk_offset
;
197 pblk
->ppaf_bitsize
= pblk
->ppaf
.blk_offset
+ ppaf
.blk_len
;
202 static int pblk_init_global_caches(struct pblk
*pblk
)
204 down_write(&pblk_lock
);
205 pblk_ws_cache
= kmem_cache_create("pblk_blk_ws",
206 sizeof(struct pblk_line_ws
), 0, 0, NULL
);
207 if (!pblk_ws_cache
) {
208 up_write(&pblk_lock
);
212 pblk_rec_cache
= kmem_cache_create("pblk_rec",
213 sizeof(struct pblk_rec_ctx
), 0, 0, NULL
);
214 if (!pblk_rec_cache
) {
215 kmem_cache_destroy(pblk_ws_cache
);
216 up_write(&pblk_lock
);
220 pblk_g_rq_cache
= kmem_cache_create("pblk_g_rq", pblk_g_rq_size
,
222 if (!pblk_g_rq_cache
) {
223 kmem_cache_destroy(pblk_ws_cache
);
224 kmem_cache_destroy(pblk_rec_cache
);
225 up_write(&pblk_lock
);
229 pblk_w_rq_cache
= kmem_cache_create("pblk_w_rq", pblk_w_rq_size
,
231 if (!pblk_w_rq_cache
) {
232 kmem_cache_destroy(pblk_ws_cache
);
233 kmem_cache_destroy(pblk_rec_cache
);
234 kmem_cache_destroy(pblk_g_rq_cache
);
235 up_write(&pblk_lock
);
238 up_write(&pblk_lock
);
243 static void pblk_free_global_caches(struct pblk
*pblk
)
245 kmem_cache_destroy(pblk_ws_cache
);
246 kmem_cache_destroy(pblk_rec_cache
);
247 kmem_cache_destroy(pblk_g_rq_cache
);
248 kmem_cache_destroy(pblk_w_rq_cache
);
251 static int pblk_core_init(struct pblk
*pblk
)
253 struct nvm_tgt_dev
*dev
= pblk
->dev
;
254 struct nvm_geo
*geo
= &dev
->geo
;
256 pblk
->pgs_in_buffer
= NVM_MEM_PAGE_WRITE
* geo
->sec_per_pg
*
257 geo
->nr_planes
* geo
->all_luns
;
259 if (pblk_init_global_caches(pblk
))
262 /* Internal bios can be at most the sectors signaled by the device. */
263 pblk
->page_bio_pool
= mempool_create_page_pool(nvm_max_phys_sects(dev
),
265 if (!pblk
->page_bio_pool
)
266 goto free_global_caches
;
268 pblk
->gen_ws_pool
= mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE
,
270 if (!pblk
->gen_ws_pool
)
271 goto free_page_bio_pool
;
273 pblk
->rec_pool
= mempool_create_slab_pool(geo
->all_luns
,
276 goto free_gen_ws_pool
;
278 pblk
->r_rq_pool
= mempool_create_slab_pool(geo
->all_luns
,
280 if (!pblk
->r_rq_pool
)
283 pblk
->e_rq_pool
= mempool_create_slab_pool(geo
->all_luns
,
285 if (!pblk
->e_rq_pool
)
288 pblk
->w_rq_pool
= mempool_create_slab_pool(geo
->all_luns
,
290 if (!pblk
->w_rq_pool
)
293 pblk
->close_wq
= alloc_workqueue("pblk-close-wq",
294 WQ_MEM_RECLAIM
| WQ_UNBOUND
, PBLK_NR_CLOSE_JOBS
);
298 pblk
->bb_wq
= alloc_workqueue("pblk-bb-wq",
299 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 0);
303 pblk
->r_end_wq
= alloc_workqueue("pblk-read-end-wq",
304 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 0);
308 if (pblk_set_ppaf(pblk
))
311 if (pblk_rwb_init(pblk
))
314 INIT_LIST_HEAD(&pblk
->compl_list
);
318 destroy_workqueue(pblk
->r_end_wq
);
320 destroy_workqueue(pblk
->bb_wq
);
322 destroy_workqueue(pblk
->close_wq
);
324 mempool_destroy(pblk
->w_rq_pool
);
326 mempool_destroy(pblk
->e_rq_pool
);
328 mempool_destroy(pblk
->r_rq_pool
);
330 mempool_destroy(pblk
->rec_pool
);
332 mempool_destroy(pblk
->gen_ws_pool
);
334 mempool_destroy(pblk
->page_bio_pool
);
336 pblk_free_global_caches(pblk
);
340 static void pblk_core_free(struct pblk
*pblk
)
343 destroy_workqueue(pblk
->close_wq
);
346 destroy_workqueue(pblk
->r_end_wq
);
349 destroy_workqueue(pblk
->bb_wq
);
351 mempool_destroy(pblk
->page_bio_pool
);
352 mempool_destroy(pblk
->gen_ws_pool
);
353 mempool_destroy(pblk
->rec_pool
);
354 mempool_destroy(pblk
->r_rq_pool
);
355 mempool_destroy(pblk
->e_rq_pool
);
356 mempool_destroy(pblk
->w_rq_pool
);
360 pblk_free_global_caches(pblk
);
363 static void pblk_luns_free(struct pblk
*pblk
)
368 static void pblk_free_line_bitmaps(struct pblk_line
*line
)
370 kfree(line
->blk_bitmap
);
371 kfree(line
->erase_bitmap
);
374 static void pblk_lines_free(struct pblk
*pblk
)
376 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
377 struct pblk_line
*line
;
380 spin_lock(&l_mg
->free_lock
);
381 for (i
= 0; i
< l_mg
->nr_lines
; i
++) {
382 line
= &pblk
->lines
[i
];
384 pblk_line_free(pblk
, line
);
385 pblk_free_line_bitmaps(line
);
387 spin_unlock(&l_mg
->free_lock
);
390 static void pblk_line_meta_free(struct pblk
*pblk
)
392 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
395 kfree(l_mg
->bb_template
);
397 kfree(l_mg
->vsc_list
);
399 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
400 kfree(l_mg
->sline_meta
[i
]);
401 pblk_mfree(l_mg
->eline_meta
[i
]->buf
, l_mg
->emeta_alloc_type
);
402 kfree(l_mg
->eline_meta
[i
]);
408 static int pblk_bb_discovery(struct nvm_tgt_dev
*dev
, struct pblk_lun
*rlun
)
410 struct nvm_geo
*geo
= &dev
->geo
;
415 nr_blks
= geo
->nr_chks
* geo
->plane_mode
;
416 blks
= kmalloc(nr_blks
, GFP_KERNEL
);
421 ppa
.g
.ch
= rlun
->bppa
.g
.ch
;
422 ppa
.g
.lun
= rlun
->bppa
.g
.lun
;
424 ret
= nvm_get_tgt_bb_tbl(dev
, ppa
, blks
);
428 nr_blks
= nvm_bb_tbl_fold(dev
->parent
, blks
, nr_blks
);
434 rlun
->bb_list
= blks
;
442 static int pblk_bb_line(struct pblk
*pblk
, struct pblk_line
*line
,
445 struct nvm_tgt_dev
*dev
= pblk
->dev
;
446 struct nvm_geo
*geo
= &dev
->geo
;
447 struct pblk_lun
*rlun
;
451 for (i
= 0; i
< blk_per_line
; i
++) {
452 rlun
= &pblk
->luns
[i
];
453 if (rlun
->bb_list
[line
->id
] == NVM_BLK_T_FREE
)
456 set_bit(pblk_ppa_to_pos(geo
, rlun
->bppa
), line
->blk_bitmap
);
463 static int pblk_alloc_line_bitmaps(struct pblk
*pblk
, struct pblk_line
*line
)
465 struct pblk_line_meta
*lm
= &pblk
->lm
;
467 line
->blk_bitmap
= kzalloc(lm
->blk_bitmap_len
, GFP_KERNEL
);
468 if (!line
->blk_bitmap
)
471 line
->erase_bitmap
= kzalloc(lm
->blk_bitmap_len
, GFP_KERNEL
);
472 if (!line
->erase_bitmap
) {
473 kfree(line
->blk_bitmap
);
480 static int pblk_luns_init(struct pblk
*pblk
, struct ppa_addr
*luns
)
482 struct nvm_tgt_dev
*dev
= pblk
->dev
;
483 struct nvm_geo
*geo
= &dev
->geo
;
484 struct pblk_lun
*rlun
;
487 /* TODO: Implement unbalanced LUN support */
488 if (geo
->nr_luns
< 0) {
489 pr_err("pblk: unbalanced LUN config.\n");
493 pblk
->luns
= kcalloc(geo
->all_luns
, sizeof(struct pblk_lun
),
498 for (i
= 0; i
< geo
->all_luns
; i
++) {
499 /* Stripe across channels */
500 int ch
= i
% geo
->nr_chnls
;
501 int lun_raw
= i
/ geo
->nr_chnls
;
502 int lunid
= lun_raw
+ ch
* geo
->nr_luns
;
504 rlun
= &pblk
->luns
[i
];
505 rlun
->bppa
= luns
[lunid
];
507 sema_init(&rlun
->wr_sem
, 1);
509 ret
= pblk_bb_discovery(dev
, rlun
);
512 kfree(pblk
->luns
[i
].bb_list
);
520 static int pblk_lines_configure(struct pblk
*pblk
, int flags
)
522 struct pblk_line
*line
= NULL
;
525 if (!(flags
& NVM_TARGET_FACTORY
)) {
526 line
= pblk_recov_l2p(pblk
);
528 pr_err("pblk: could not recover l2p table\n");
533 #ifdef CONFIG_NVM_DEBUG
534 pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk
));
537 /* Free full lines directly as GC has not been started yet */
538 pblk_gc_free_full_lines(pblk
);
541 /* Configure next line for user data */
542 line
= pblk_line_get_first_data(pblk
);
544 pr_err("pblk: line list corrupted\n");
552 /* See comment over struct line_emeta definition */
553 static unsigned int calc_emeta_len(struct pblk
*pblk
)
555 struct pblk_line_meta
*lm
= &pblk
->lm
;
556 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
557 struct nvm_tgt_dev
*dev
= pblk
->dev
;
558 struct nvm_geo
*geo
= &dev
->geo
;
560 /* Round to sector size so that lba_list starts on its own sector */
561 lm
->emeta_sec
[1] = DIV_ROUND_UP(
562 sizeof(struct line_emeta
) + lm
->blk_bitmap_len
,
564 lm
->emeta_len
[1] = lm
->emeta_sec
[1] * geo
->sec_size
;
566 /* Round to sector size so that vsc_list starts on its own sector */
567 lm
->dsec_per_line
= lm
->sec_per_line
- lm
->emeta_sec
[0];
568 lm
->emeta_sec
[2] = DIV_ROUND_UP(lm
->dsec_per_line
* sizeof(u64
),
570 lm
->emeta_len
[2] = lm
->emeta_sec
[2] * geo
->sec_size
;
572 lm
->emeta_sec
[3] = DIV_ROUND_UP(l_mg
->nr_lines
* sizeof(u32
),
574 lm
->emeta_len
[3] = lm
->emeta_sec
[3] * geo
->sec_size
;
576 lm
->vsc_list_len
= l_mg
->nr_lines
* sizeof(u32
);
578 return (lm
->emeta_len
[1] + lm
->emeta_len
[2] + lm
->emeta_len
[3]);
581 static void pblk_set_provision(struct pblk
*pblk
, long nr_free_blks
)
583 struct nvm_tgt_dev
*dev
= pblk
->dev
;
584 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
585 struct pblk_line_meta
*lm
= &pblk
->lm
;
586 struct nvm_geo
*geo
= &dev
->geo
;
587 sector_t provisioned
;
588 int sec_meta
, blk_meta
;
590 if (geo
->op
== NVM_TARGET_DEFAULT_OP
)
591 pblk
->op
= PBLK_DEFAULT_OP
;
595 provisioned
= nr_free_blks
;
596 provisioned
*= (100 - pblk
->op
);
597 sector_div(provisioned
, 100);
599 pblk
->op_blks
= nr_free_blks
- provisioned
;
601 /* Internally pblk manages all free blocks, but all calculations based
602 * on user capacity consider only provisioned blocks
604 pblk
->rl
.total_blocks
= nr_free_blks
;
605 pblk
->rl
.nr_secs
= nr_free_blks
* geo
->sec_per_chk
;
607 /* Consider sectors used for metadata */
608 sec_meta
= (lm
->smeta_sec
+ lm
->emeta_sec
[0]) * l_mg
->nr_free_lines
;
609 blk_meta
= DIV_ROUND_UP(sec_meta
, geo
->sec_per_chk
);
611 pblk
->capacity
= (provisioned
- blk_meta
) * geo
->sec_per_chk
;
613 atomic_set(&pblk
->rl
.free_blocks
, nr_free_blks
);
614 atomic_set(&pblk
->rl
.free_user_blocks
, nr_free_blks
);
617 static int pblk_lines_alloc_metadata(struct pblk
*pblk
)
619 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
620 struct pblk_line_meta
*lm
= &pblk
->lm
;
623 /* smeta is always small enough to fit on a kmalloc memory allocation,
624 * emeta depends on the number of LUNs allocated to the pblk instance
626 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
627 l_mg
->sline_meta
[i
] = kmalloc(lm
->smeta_len
, GFP_KERNEL
);
628 if (!l_mg
->sline_meta
[i
])
629 goto fail_free_smeta
;
632 /* emeta allocates three different buffers for managing metadata with
633 * in-memory and in-media layouts
635 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
636 struct pblk_emeta
*emeta
;
638 emeta
= kmalloc(sizeof(struct pblk_emeta
), GFP_KERNEL
);
640 goto fail_free_emeta
;
642 if (lm
->emeta_len
[0] > KMALLOC_MAX_CACHE_SIZE
) {
643 l_mg
->emeta_alloc_type
= PBLK_VMALLOC_META
;
645 emeta
->buf
= vmalloc(lm
->emeta_len
[0]);
648 goto fail_free_emeta
;
651 emeta
->nr_entries
= lm
->emeta_sec
[0];
652 l_mg
->eline_meta
[i
] = emeta
;
654 l_mg
->emeta_alloc_type
= PBLK_KMALLOC_META
;
656 emeta
->buf
= kmalloc(lm
->emeta_len
[0], GFP_KERNEL
);
659 goto fail_free_emeta
;
662 emeta
->nr_entries
= lm
->emeta_sec
[0];
663 l_mg
->eline_meta
[i
] = emeta
;
667 l_mg
->vsc_list
= kcalloc(l_mg
->nr_lines
, sizeof(__le32
), GFP_KERNEL
);
669 goto fail_free_emeta
;
671 for (i
= 0; i
< l_mg
->nr_lines
; i
++)
672 l_mg
->vsc_list
[i
] = cpu_to_le32(EMPTY_ENTRY
);
678 if (l_mg
->emeta_alloc_type
== PBLK_VMALLOC_META
)
679 vfree(l_mg
->eline_meta
[i
]->buf
);
681 kfree(l_mg
->eline_meta
[i
]->buf
);
682 kfree(l_mg
->eline_meta
[i
]);
686 for (i
= 0; i
< PBLK_DATA_LINES
; i
++)
687 kfree(l_mg
->sline_meta
[i
]);
692 static int pblk_lines_init(struct pblk
*pblk
)
694 struct nvm_tgt_dev
*dev
= pblk
->dev
;
695 struct nvm_geo
*geo
= &dev
->geo
;
696 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
697 struct pblk_line_meta
*lm
= &pblk
->lm
;
698 struct pblk_line
*line
;
699 unsigned int smeta_len
, emeta_len
;
700 long nr_bad_blks
, nr_free_blks
;
701 int bb_distance
, max_write_ppas
, mod
;
704 pblk
->min_write_pgs
= geo
->sec_per_pl
* (geo
->sec_size
/ PAGE_SIZE
);
705 max_write_ppas
= pblk
->min_write_pgs
* geo
->all_luns
;
706 pblk
->max_write_pgs
= (max_write_ppas
< nvm_max_phys_sects(dev
)) ?
707 max_write_ppas
: nvm_max_phys_sects(dev
);
708 pblk_set_sec_per_write(pblk
, pblk
->min_write_pgs
);
710 if (pblk
->max_write_pgs
> PBLK_MAX_REQ_ADDRS
) {
711 pr_err("pblk: cannot support device max_phys_sect\n");
715 div_u64_rem(geo
->sec_per_chk
, pblk
->min_write_pgs
, &mod
);
717 pr_err("pblk: bad configuration of sectors/pages\n");
721 l_mg
->nr_lines
= geo
->nr_chks
;
722 l_mg
->log_line
= l_mg
->data_line
= NULL
;
723 l_mg
->l_seq_nr
= l_mg
->d_seq_nr
= 0;
724 l_mg
->nr_free_lines
= 0;
725 bitmap_zero(&l_mg
->meta_bitmap
, PBLK_DATA_LINES
);
727 lm
->sec_per_line
= geo
->sec_per_chk
* geo
->all_luns
;
728 lm
->blk_per_line
= geo
->all_luns
;
729 lm
->blk_bitmap_len
= BITS_TO_LONGS(geo
->all_luns
) * sizeof(long);
730 lm
->sec_bitmap_len
= BITS_TO_LONGS(lm
->sec_per_line
) * sizeof(long);
731 lm
->lun_bitmap_len
= BITS_TO_LONGS(geo
->all_luns
) * sizeof(long);
732 lm
->mid_thrs
= lm
->sec_per_line
/ 2;
733 lm
->high_thrs
= lm
->sec_per_line
/ 4;
734 lm
->meta_distance
= (geo
->all_luns
/ 2) * pblk
->min_write_pgs
;
736 /* Calculate necessary pages for smeta. See comment over struct
737 * line_smeta definition
741 lm
->smeta_sec
= i
* geo
->sec_per_pl
;
742 lm
->smeta_len
= lm
->smeta_sec
* geo
->sec_size
;
744 smeta_len
= sizeof(struct line_smeta
) + lm
->lun_bitmap_len
;
745 if (smeta_len
> lm
->smeta_len
) {
750 /* Calculate necessary pages for emeta. See comment over struct
751 * line_emeta definition
755 lm
->emeta_sec
[0] = i
* geo
->sec_per_pl
;
756 lm
->emeta_len
[0] = lm
->emeta_sec
[0] * geo
->sec_size
;
758 emeta_len
= calc_emeta_len(pblk
);
759 if (emeta_len
> lm
->emeta_len
[0]) {
764 lm
->emeta_bb
= geo
->all_luns
> i
? geo
->all_luns
- i
: 0;
766 lm
->min_blk_line
= 1;
767 if (geo
->all_luns
> 1)
768 lm
->min_blk_line
+= DIV_ROUND_UP(lm
->smeta_sec
+
769 lm
->emeta_sec
[0], geo
->sec_per_chk
);
771 if (lm
->min_blk_line
> lm
->blk_per_line
) {
772 pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
778 ret
= pblk_lines_alloc_metadata(pblk
);
782 l_mg
->bb_template
= kzalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
783 if (!l_mg
->bb_template
) {
788 l_mg
->bb_aux
= kzalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
791 goto fail_free_bb_template
;
794 bb_distance
= (geo
->all_luns
) * geo
->sec_per_pl
;
795 for (i
= 0; i
< lm
->sec_per_line
; i
+= bb_distance
)
796 bitmap_set(l_mg
->bb_template
, i
, geo
->sec_per_pl
);
798 INIT_LIST_HEAD(&l_mg
->free_list
);
799 INIT_LIST_HEAD(&l_mg
->corrupt_list
);
800 INIT_LIST_HEAD(&l_mg
->bad_list
);
801 INIT_LIST_HEAD(&l_mg
->gc_full_list
);
802 INIT_LIST_HEAD(&l_mg
->gc_high_list
);
803 INIT_LIST_HEAD(&l_mg
->gc_mid_list
);
804 INIT_LIST_HEAD(&l_mg
->gc_low_list
);
805 INIT_LIST_HEAD(&l_mg
->gc_empty_list
);
807 INIT_LIST_HEAD(&l_mg
->emeta_list
);
809 l_mg
->gc_lists
[0] = &l_mg
->gc_high_list
;
810 l_mg
->gc_lists
[1] = &l_mg
->gc_mid_list
;
811 l_mg
->gc_lists
[2] = &l_mg
->gc_low_list
;
813 spin_lock_init(&l_mg
->free_lock
);
814 spin_lock_init(&l_mg
->close_lock
);
815 spin_lock_init(&l_mg
->gc_lock
);
817 pblk
->lines
= kcalloc(l_mg
->nr_lines
, sizeof(struct pblk_line
),
821 goto fail_free_bb_aux
;
825 for (i
= 0; i
< l_mg
->nr_lines
; i
++) {
828 line
= &pblk
->lines
[i
];
832 line
->type
= PBLK_LINETYPE_FREE
;
833 line
->state
= PBLK_LINESTATE_FREE
;
834 line
->gc_group
= PBLK_LINEGC_NONE
;
835 line
->vsc
= &l_mg
->vsc_list
[i
];
836 spin_lock_init(&line
->lock
);
838 ret
= pblk_alloc_line_bitmaps(pblk
, line
);
840 goto fail_free_lines
;
842 nr_bad_blks
= pblk_bb_line(pblk
, line
, lm
->blk_per_line
);
843 if (nr_bad_blks
< 0 || nr_bad_blks
> lm
->blk_per_line
) {
844 pblk_free_line_bitmaps(line
);
846 goto fail_free_lines
;
849 blk_in_line
= lm
->blk_per_line
- nr_bad_blks
;
850 if (blk_in_line
< lm
->min_blk_line
) {
851 line
->state
= PBLK_LINESTATE_BAD
;
852 list_add_tail(&line
->list
, &l_mg
->bad_list
);
856 nr_free_blks
+= blk_in_line
;
857 atomic_set(&line
->blk_in_line
, blk_in_line
);
859 l_mg
->nr_free_lines
++;
860 list_add_tail(&line
->list
, &l_mg
->free_list
);
863 pblk_set_provision(pblk
, nr_free_blks
);
865 /* Cleanup per-LUN bad block lists - managed within lines on run-time */
866 for (i
= 0; i
< geo
->all_luns
; i
++)
867 kfree(pblk
->luns
[i
].bb_list
);
872 pblk_free_line_bitmaps(&pblk
->lines
[i
]);
875 fail_free_bb_template
:
876 kfree(l_mg
->bb_template
);
878 pblk_line_meta_free(pblk
);
880 for (i
= 0; i
< geo
->all_luns
; i
++)
881 kfree(pblk
->luns
[i
].bb_list
);
886 static int pblk_writer_init(struct pblk
*pblk
)
888 pblk
->writer_ts
= kthread_create(pblk_write_ts
, pblk
, "pblk-writer-t");
889 if (IS_ERR(pblk
->writer_ts
)) {
890 int err
= PTR_ERR(pblk
->writer_ts
);
893 pr_err("pblk: could not allocate writer kthread (%d)\n",
898 timer_setup(&pblk
->wtimer
, pblk_write_timer_fn
, 0);
899 mod_timer(&pblk
->wtimer
, jiffies
+ msecs_to_jiffies(100));
904 static void pblk_writer_stop(struct pblk
*pblk
)
906 /* The pipeline must be stopped and the write buffer emptied before the
907 * write thread is stopped
909 WARN(pblk_rb_read_count(&pblk
->rwb
),
910 "Stopping not fully persisted write buffer\n");
912 WARN(pblk_rb_sync_count(&pblk
->rwb
),
913 "Stopping not fully synced write buffer\n");
916 kthread_stop(pblk
->writer_ts
);
917 del_timer(&pblk
->wtimer
);
920 static void pblk_free(struct pblk
*pblk
)
922 pblk_luns_free(pblk
);
923 pblk_lines_free(pblk
);
924 pblk_line_meta_free(pblk
);
925 pblk_core_free(pblk
);
931 static void pblk_tear_down(struct pblk
*pblk
)
933 pblk_pipeline_stop(pblk
);
934 pblk_writer_stop(pblk
);
935 pblk_rb_sync_l2p(&pblk
->rwb
);
936 pblk_rl_free(&pblk
->rl
);
938 pr_debug("pblk: consistent tear down\n");
941 static void pblk_exit(void *private)
943 struct pblk
*pblk
= private;
945 down_write(&pblk_lock
);
947 pblk_tear_down(pblk
);
949 #ifdef CONFIG_NVM_DEBUG
950 pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk
));
954 up_write(&pblk_lock
);
957 static sector_t
pblk_capacity(void *private)
959 struct pblk
*pblk
= private;
961 return pblk
->capacity
* NR_PHY_IN_LOG
;
964 static void *pblk_init(struct nvm_tgt_dev
*dev
, struct gendisk
*tdisk
,
967 struct nvm_geo
*geo
= &dev
->geo
;
968 struct request_queue
*bqueue
= dev
->q
;
969 struct request_queue
*tqueue
= tdisk
->queue
;
973 if (dev
->identity
.dom
& NVM_RSP_L2P
) {
974 pr_err("pblk: host-side L2P table not supported. (%x)\n",
976 return ERR_PTR(-EINVAL
);
979 pblk
= kzalloc(sizeof(struct pblk
), GFP_KERNEL
);
981 return ERR_PTR(-ENOMEM
);
985 pblk
->state
= PBLK_STATE_RUNNING
;
986 pblk
->gc
.gc_enabled
= 0;
988 spin_lock_init(&pblk
->trans_lock
);
989 spin_lock_init(&pblk
->lock
);
991 if (flags
& NVM_TARGET_FACTORY
)
992 pblk_setup_uuid(pblk
);
994 #ifdef CONFIG_NVM_DEBUG
995 atomic_long_set(&pblk
->inflight_writes
, 0);
996 atomic_long_set(&pblk
->padded_writes
, 0);
997 atomic_long_set(&pblk
->padded_wb
, 0);
998 atomic_long_set(&pblk
->nr_flush
, 0);
999 atomic_long_set(&pblk
->req_writes
, 0);
1000 atomic_long_set(&pblk
->sub_writes
, 0);
1001 atomic_long_set(&pblk
->sync_writes
, 0);
1002 atomic_long_set(&pblk
->inflight_reads
, 0);
1003 atomic_long_set(&pblk
->cache_reads
, 0);
1004 atomic_long_set(&pblk
->sync_reads
, 0);
1005 atomic_long_set(&pblk
->recov_writes
, 0);
1006 atomic_long_set(&pblk
->recov_writes
, 0);
1007 atomic_long_set(&pblk
->recov_gc_writes
, 0);
1008 atomic_long_set(&pblk
->recov_gc_reads
, 0);
1011 atomic_long_set(&pblk
->read_failed
, 0);
1012 atomic_long_set(&pblk
->read_empty
, 0);
1013 atomic_long_set(&pblk
->read_high_ecc
, 0);
1014 atomic_long_set(&pblk
->read_failed_gc
, 0);
1015 atomic_long_set(&pblk
->write_failed
, 0);
1016 atomic_long_set(&pblk
->erase_failed
, 0);
1018 ret
= pblk_luns_init(pblk
, dev
->luns
);
1020 pr_err("pblk: could not initialize luns\n");
1024 ret
= pblk_lines_init(pblk
);
1026 pr_err("pblk: could not initialize lines\n");
1027 goto fail_free_luns
;
1030 ret
= pblk_core_init(pblk
);
1032 pr_err("pblk: could not initialize core\n");
1033 goto fail_free_line_meta
;
1036 ret
= pblk_l2p_init(pblk
);
1038 pr_err("pblk: could not initialize maps\n");
1039 goto fail_free_core
;
1042 ret
= pblk_lines_configure(pblk
, flags
);
1044 pr_err("pblk: could not configure lines\n");
1048 ret
= pblk_writer_init(pblk
);
1051 pr_err("pblk: could not initialize write thread\n");
1052 goto fail_free_lines
;
1055 ret
= pblk_gc_init(pblk
);
1057 pr_err("pblk: could not initialize gc\n");
1058 goto fail_stop_writer
;
1061 /* inherit the size from the underlying device */
1062 blk_queue_logical_block_size(tqueue
, queue_physical_block_size(bqueue
));
1063 blk_queue_max_hw_sectors(tqueue
, queue_max_hw_sectors(bqueue
));
1065 blk_queue_write_cache(tqueue
, true, false);
1067 tqueue
->limits
.discard_granularity
= geo
->sec_per_chk
* geo
->sec_size
;
1068 tqueue
->limits
.discard_alignment
= 0;
1069 blk_queue_max_discard_sectors(tqueue
, UINT_MAX
>> 9);
1070 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD
, tqueue
);
1072 pr_info("pblk(%s): luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
1074 geo
->all_luns
, pblk
->l_mg
.nr_lines
,
1075 (unsigned long long)pblk
->rl
.nr_secs
,
1076 pblk
->rwb
.nr_entries
);
1078 wake_up_process(pblk
->writer_ts
);
1080 /* Check if we need to start GC */
1081 pblk_gc_should_kick(pblk
);
1086 pblk_writer_stop(pblk
);
1088 pblk_lines_free(pblk
);
1090 pblk_l2p_free(pblk
);
1092 pblk_core_free(pblk
);
1093 fail_free_line_meta
:
1094 pblk_line_meta_free(pblk
);
1096 pblk_luns_free(pblk
);
1099 return ERR_PTR(ret
);
1102 /* physical block device target */
1103 static struct nvm_tgt_type tt_pblk
= {
1105 .version
= {1, 0, 0},
1107 .make_rq
= pblk_make_rq
,
1108 .capacity
= pblk_capacity
,
1113 .sysfs_init
= pblk_sysfs_init
,
1114 .sysfs_exit
= pblk_sysfs_exit
,
1115 .owner
= THIS_MODULE
,
1118 static int __init
pblk_module_init(void)
1122 pblk_bio_set
= bioset_create(BIO_POOL_SIZE
, 0, 0);
1125 ret
= nvm_register_tgt_type(&tt_pblk
);
1127 bioset_free(pblk_bio_set
);
1131 static void pblk_module_exit(void)
1133 bioset_free(pblk_bio_set
);
1134 nvm_unregister_tgt_type(&tt_pblk
);
1137 module_init(pblk_module_init
);
1138 module_exit(pblk_module_exit
);
1139 MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
1140 MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
1141 MODULE_LICENSE("GPL v2");
1142 MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");