2 * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
3 * Copyright (C) 2016 CNEX Labs
4 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
5 * Matias Bjorling <matias@cnexlabs.com>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * Implementation of a physical block-device target for Open-channel SSDs.
18 * pblk-init.c - pblk's initialization.
23 static struct kmem_cache
*pblk_ws_cache
, *pblk_rec_cache
, *pblk_g_rq_cache
,
25 static DECLARE_RWSEM(pblk_lock
);
26 struct bio_set
*pblk_bio_set
;
28 static int pblk_rw_io(struct request_queue
*q
, struct pblk
*pblk
,
33 /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
34 * constraint. Writes can be of arbitrary size.
36 if (bio_data_dir(bio
) == READ
) {
37 blk_queue_split(q
, &bio
);
38 ret
= pblk_submit_read(pblk
, bio
);
39 if (ret
== NVM_IO_DONE
&& bio_flagged(bio
, BIO_CLONED
))
45 /* Prevent deadlock in the case of a modest LUN configuration and large
46 * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
47 * available for user I/O.
49 if (pblk_get_secs(bio
) > pblk_rl_max_io(&pblk
->rl
))
50 blk_queue_split(q
, &bio
);
52 return pblk_write_to_cache(pblk
, bio
, PBLK_IOTYPE_USER
);
55 static blk_qc_t
pblk_make_rq(struct request_queue
*q
, struct bio
*bio
)
57 struct pblk
*pblk
= q
->queuedata
;
59 if (bio_op(bio
) == REQ_OP_DISCARD
) {
60 pblk_discard(pblk
, bio
);
61 if (!(bio
->bi_opf
& REQ_PREFLUSH
)) {
67 switch (pblk_rw_io(q
, pblk
, bio
)) {
79 static size_t pblk_trans_map_size(struct pblk
*pblk
)
83 if (pblk
->ppaf_bitsize
< 32)
86 return entry_size
* pblk
->rl
.nr_secs
;
89 #ifdef CONFIG_NVM_DEBUG
90 static u32
pblk_l2p_crc(struct pblk
*pblk
)
95 map_size
= pblk_trans_map_size(pblk
);
96 crc
= crc32_le(crc
, pblk
->trans_map
, map_size
);
101 static void pblk_l2p_free(struct pblk
*pblk
)
103 vfree(pblk
->trans_map
);
106 static int pblk_l2p_init(struct pblk
*pblk
)
112 map_size
= pblk_trans_map_size(pblk
);
113 pblk
->trans_map
= vmalloc(map_size
);
114 if (!pblk
->trans_map
)
117 pblk_ppa_set_empty(&ppa
);
119 for (i
= 0; i
< pblk
->rl
.nr_secs
; i
++)
120 pblk_trans_map_set(pblk
, i
, ppa
);
125 static void pblk_rwb_free(struct pblk
*pblk
)
127 if (pblk_rb_tear_down_check(&pblk
->rwb
))
128 pr_err("pblk: write buffer error on tear down\n");
130 pblk_rb_data_free(&pblk
->rwb
);
131 vfree(pblk_rb_entries_ref(&pblk
->rwb
));
134 static int pblk_rwb_init(struct pblk
*pblk
)
136 struct nvm_tgt_dev
*dev
= pblk
->dev
;
137 struct nvm_geo
*geo
= &dev
->geo
;
138 struct pblk_rb_entry
*entries
;
139 unsigned long nr_entries
;
140 unsigned int power_size
, power_seg_sz
;
142 nr_entries
= pblk_rb_calculate_size(pblk
->pgs_in_buffer
);
144 entries
= vzalloc(nr_entries
* sizeof(struct pblk_rb_entry
));
148 power_size
= get_count_order(nr_entries
);
149 power_seg_sz
= get_count_order(geo
->sec_size
);
151 return pblk_rb_init(&pblk
->rwb
, entries
, power_size
, power_seg_sz
);
154 /* Minimum pages needed within a lun */
155 #define ADDR_POOL_SIZE 64
157 static int pblk_set_ppaf(struct pblk
*pblk
)
159 struct nvm_tgt_dev
*dev
= pblk
->dev
;
160 struct nvm_geo
*geo
= &dev
->geo
;
161 struct nvm_addr_format ppaf
= geo
->ppaf
;
164 /* Re-calculate channel and lun format to adapt to configuration */
165 power_len
= get_count_order(geo
->nr_chnls
);
166 if (1 << power_len
!= geo
->nr_chnls
) {
167 pr_err("pblk: supports only power-of-two channel config.\n");
170 ppaf
.ch_len
= power_len
;
172 power_len
= get_count_order(geo
->luns_per_chnl
);
173 if (1 << power_len
!= geo
->luns_per_chnl
) {
174 pr_err("pblk: supports only power-of-two LUN config.\n");
177 ppaf
.lun_len
= power_len
;
179 pblk
->ppaf
.sec_offset
= 0;
180 pblk
->ppaf
.pln_offset
= ppaf
.sect_len
;
181 pblk
->ppaf
.ch_offset
= pblk
->ppaf
.pln_offset
+ ppaf
.pln_len
;
182 pblk
->ppaf
.lun_offset
= pblk
->ppaf
.ch_offset
+ ppaf
.ch_len
;
183 pblk
->ppaf
.pg_offset
= pblk
->ppaf
.lun_offset
+ ppaf
.lun_len
;
184 pblk
->ppaf
.blk_offset
= pblk
->ppaf
.pg_offset
+ ppaf
.pg_len
;
185 pblk
->ppaf
.sec_mask
= (1ULL << ppaf
.sect_len
) - 1;
186 pblk
->ppaf
.pln_mask
= ((1ULL << ppaf
.pln_len
) - 1) <<
187 pblk
->ppaf
.pln_offset
;
188 pblk
->ppaf
.ch_mask
= ((1ULL << ppaf
.ch_len
) - 1) <<
189 pblk
->ppaf
.ch_offset
;
190 pblk
->ppaf
.lun_mask
= ((1ULL << ppaf
.lun_len
) - 1) <<
191 pblk
->ppaf
.lun_offset
;
192 pblk
->ppaf
.pg_mask
= ((1ULL << ppaf
.pg_len
) - 1) <<
193 pblk
->ppaf
.pg_offset
;
194 pblk
->ppaf
.blk_mask
= ((1ULL << ppaf
.blk_len
) - 1) <<
195 pblk
->ppaf
.blk_offset
;
197 pblk
->ppaf_bitsize
= pblk
->ppaf
.blk_offset
+ ppaf
.blk_len
;
202 static int pblk_init_global_caches(struct pblk
*pblk
)
204 down_write(&pblk_lock
);
205 pblk_ws_cache
= kmem_cache_create("pblk_blk_ws",
206 sizeof(struct pblk_line_ws
), 0, 0, NULL
);
207 if (!pblk_ws_cache
) {
208 up_write(&pblk_lock
);
212 pblk_rec_cache
= kmem_cache_create("pblk_rec",
213 sizeof(struct pblk_rec_ctx
), 0, 0, NULL
);
214 if (!pblk_rec_cache
) {
215 kmem_cache_destroy(pblk_ws_cache
);
216 up_write(&pblk_lock
);
220 pblk_g_rq_cache
= kmem_cache_create("pblk_g_rq", pblk_g_rq_size
,
222 if (!pblk_g_rq_cache
) {
223 kmem_cache_destroy(pblk_ws_cache
);
224 kmem_cache_destroy(pblk_rec_cache
);
225 up_write(&pblk_lock
);
229 pblk_w_rq_cache
= kmem_cache_create("pblk_w_rq", pblk_w_rq_size
,
231 if (!pblk_w_rq_cache
) {
232 kmem_cache_destroy(pblk_ws_cache
);
233 kmem_cache_destroy(pblk_rec_cache
);
234 kmem_cache_destroy(pblk_g_rq_cache
);
235 up_write(&pblk_lock
);
238 up_write(&pblk_lock
);
243 static void pblk_free_global_caches(struct pblk
*pblk
)
245 kmem_cache_destroy(pblk_ws_cache
);
246 kmem_cache_destroy(pblk_rec_cache
);
247 kmem_cache_destroy(pblk_g_rq_cache
);
248 kmem_cache_destroy(pblk_w_rq_cache
);
251 static int pblk_core_init(struct pblk
*pblk
)
253 struct nvm_tgt_dev
*dev
= pblk
->dev
;
254 struct nvm_geo
*geo
= &dev
->geo
;
256 pblk
->pgs_in_buffer
= NVM_MEM_PAGE_WRITE
* geo
->sec_per_pg
*
257 geo
->nr_planes
* geo
->nr_luns
;
259 if (pblk_init_global_caches(pblk
))
262 /* Internal bios can be at most the sectors signaled by the device. */
263 pblk
->page_bio_pool
= mempool_create_page_pool(nvm_max_phys_sects(dev
),
265 if (!pblk
->page_bio_pool
)
266 goto free_global_caches
;
268 pblk
->gen_ws_pool
= mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE
,
270 if (!pblk
->gen_ws_pool
)
271 goto free_page_bio_pool
;
273 pblk
->rec_pool
= mempool_create_slab_pool(geo
->nr_luns
, pblk_rec_cache
);
275 goto free_gen_ws_pool
;
277 pblk
->r_rq_pool
= mempool_create_slab_pool(geo
->nr_luns
,
279 if (!pblk
->r_rq_pool
)
282 pblk
->e_rq_pool
= mempool_create_slab_pool(geo
->nr_luns
,
284 if (!pblk
->e_rq_pool
)
287 pblk
->w_rq_pool
= mempool_create_slab_pool(geo
->nr_luns
,
289 if (!pblk
->w_rq_pool
)
292 pblk
->close_wq
= alloc_workqueue("pblk-close-wq",
293 WQ_MEM_RECLAIM
| WQ_UNBOUND
, PBLK_NR_CLOSE_JOBS
);
297 pblk
->bb_wq
= alloc_workqueue("pblk-bb-wq",
298 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 0);
302 pblk
->r_end_wq
= alloc_workqueue("pblk-read-end-wq",
303 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 0);
307 if (pblk_set_ppaf(pblk
))
310 if (pblk_rwb_init(pblk
))
313 INIT_LIST_HEAD(&pblk
->compl_list
);
317 destroy_workqueue(pblk
->r_end_wq
);
319 destroy_workqueue(pblk
->bb_wq
);
321 destroy_workqueue(pblk
->close_wq
);
323 mempool_destroy(pblk
->w_rq_pool
);
325 mempool_destroy(pblk
->e_rq_pool
);
327 mempool_destroy(pblk
->r_rq_pool
);
329 mempool_destroy(pblk
->rec_pool
);
331 mempool_destroy(pblk
->gen_ws_pool
);
333 mempool_destroy(pblk
->page_bio_pool
);
335 pblk_free_global_caches(pblk
);
339 static void pblk_core_free(struct pblk
*pblk
)
342 destroy_workqueue(pblk
->close_wq
);
345 destroy_workqueue(pblk
->r_end_wq
);
348 destroy_workqueue(pblk
->bb_wq
);
350 mempool_destroy(pblk
->page_bio_pool
);
351 mempool_destroy(pblk
->gen_ws_pool
);
352 mempool_destroy(pblk
->rec_pool
);
353 mempool_destroy(pblk
->r_rq_pool
);
354 mempool_destroy(pblk
->e_rq_pool
);
355 mempool_destroy(pblk
->w_rq_pool
);
357 pblk_free_global_caches(pblk
);
360 static void pblk_luns_free(struct pblk
*pblk
)
365 static void pblk_free_line_bitmaps(struct pblk_line
*line
)
367 kfree(line
->blk_bitmap
);
368 kfree(line
->erase_bitmap
);
371 static void pblk_lines_free(struct pblk
*pblk
)
373 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
374 struct pblk_line
*line
;
377 spin_lock(&l_mg
->free_lock
);
378 for (i
= 0; i
< l_mg
->nr_lines
; i
++) {
379 line
= &pblk
->lines
[i
];
381 pblk_line_free(pblk
, line
);
382 pblk_free_line_bitmaps(line
);
384 spin_unlock(&l_mg
->free_lock
);
387 static void pblk_line_meta_free(struct pblk
*pblk
)
389 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
392 kfree(l_mg
->bb_template
);
394 kfree(l_mg
->vsc_list
);
396 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
397 kfree(l_mg
->sline_meta
[i
]);
398 pblk_mfree(l_mg
->eline_meta
[i
]->buf
, l_mg
->emeta_alloc_type
);
399 kfree(l_mg
->eline_meta
[i
]);
405 static int pblk_bb_discovery(struct nvm_tgt_dev
*dev
, struct pblk_lun
*rlun
)
407 struct nvm_geo
*geo
= &dev
->geo
;
412 nr_blks
= geo
->blks_per_lun
* geo
->plane_mode
;
413 blks
= kmalloc(nr_blks
, GFP_KERNEL
);
418 ppa
.g
.ch
= rlun
->bppa
.g
.ch
;
419 ppa
.g
.lun
= rlun
->bppa
.g
.lun
;
421 ret
= nvm_get_tgt_bb_tbl(dev
, ppa
, blks
);
425 nr_blks
= nvm_bb_tbl_fold(dev
->parent
, blks
, nr_blks
);
431 rlun
->bb_list
= blks
;
439 static int pblk_bb_line(struct pblk
*pblk
, struct pblk_line
*line
,
442 struct nvm_tgt_dev
*dev
= pblk
->dev
;
443 struct nvm_geo
*geo
= &dev
->geo
;
444 struct pblk_lun
*rlun
;
448 for (i
= 0; i
< blk_per_line
; i
++) {
449 rlun
= &pblk
->luns
[i
];
450 if (rlun
->bb_list
[line
->id
] == NVM_BLK_T_FREE
)
453 set_bit(pblk_ppa_to_pos(geo
, rlun
->bppa
), line
->blk_bitmap
);
460 static int pblk_alloc_line_bitmaps(struct pblk
*pblk
, struct pblk_line
*line
)
462 struct pblk_line_meta
*lm
= &pblk
->lm
;
464 line
->blk_bitmap
= kzalloc(lm
->blk_bitmap_len
, GFP_KERNEL
);
465 if (!line
->blk_bitmap
)
468 line
->erase_bitmap
= kzalloc(lm
->blk_bitmap_len
, GFP_KERNEL
);
469 if (!line
->erase_bitmap
) {
470 kfree(line
->blk_bitmap
);
477 static int pblk_luns_init(struct pblk
*pblk
, struct ppa_addr
*luns
)
479 struct nvm_tgt_dev
*dev
= pblk
->dev
;
480 struct nvm_geo
*geo
= &dev
->geo
;
481 struct pblk_lun
*rlun
;
484 /* TODO: Implement unbalanced LUN support */
485 if (geo
->luns_per_chnl
< 0) {
486 pr_err("pblk: unbalanced LUN config.\n");
490 pblk
->luns
= kcalloc(geo
->nr_luns
, sizeof(struct pblk_lun
), GFP_KERNEL
);
494 for (i
= 0; i
< geo
->nr_luns
; i
++) {
495 /* Stripe across channels */
496 int ch
= i
% geo
->nr_chnls
;
497 int lun_raw
= i
/ geo
->nr_chnls
;
498 int lunid
= lun_raw
+ ch
* geo
->luns_per_chnl
;
500 rlun
= &pblk
->luns
[i
];
501 rlun
->bppa
= luns
[lunid
];
503 sema_init(&rlun
->wr_sem
, 1);
505 ret
= pblk_bb_discovery(dev
, rlun
);
508 kfree(pblk
->luns
[i
].bb_list
);
516 static int pblk_lines_configure(struct pblk
*pblk
, int flags
)
518 struct pblk_line
*line
= NULL
;
521 if (!(flags
& NVM_TARGET_FACTORY
)) {
522 line
= pblk_recov_l2p(pblk
);
524 pr_err("pblk: could not recover l2p table\n");
529 #ifdef CONFIG_NVM_DEBUG
530 pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk
));
533 /* Free full lines directly as GC has not been started yet */
534 pblk_gc_free_full_lines(pblk
);
537 /* Configure next line for user data */
538 line
= pblk_line_get_first_data(pblk
);
540 pr_err("pblk: line list corrupted\n");
548 /* See comment over struct line_emeta definition */
549 static unsigned int calc_emeta_len(struct pblk
*pblk
)
551 struct pblk_line_meta
*lm
= &pblk
->lm
;
552 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
553 struct nvm_tgt_dev
*dev
= pblk
->dev
;
554 struct nvm_geo
*geo
= &dev
->geo
;
556 /* Round to sector size so that lba_list starts on its own sector */
557 lm
->emeta_sec
[1] = DIV_ROUND_UP(
558 sizeof(struct line_emeta
) + lm
->blk_bitmap_len
,
560 lm
->emeta_len
[1] = lm
->emeta_sec
[1] * geo
->sec_size
;
562 /* Round to sector size so that vsc_list starts on its own sector */
563 lm
->dsec_per_line
= lm
->sec_per_line
- lm
->emeta_sec
[0];
564 lm
->emeta_sec
[2] = DIV_ROUND_UP(lm
->dsec_per_line
* sizeof(u64
),
566 lm
->emeta_len
[2] = lm
->emeta_sec
[2] * geo
->sec_size
;
568 lm
->emeta_sec
[3] = DIV_ROUND_UP(l_mg
->nr_lines
* sizeof(u32
),
570 lm
->emeta_len
[3] = lm
->emeta_sec
[3] * geo
->sec_size
;
572 lm
->vsc_list_len
= l_mg
->nr_lines
* sizeof(u32
);
574 return (lm
->emeta_len
[1] + lm
->emeta_len
[2] + lm
->emeta_len
[3]);
577 static void pblk_set_provision(struct pblk
*pblk
, long nr_free_blks
)
579 struct nvm_tgt_dev
*dev
= pblk
->dev
;
580 struct nvm_geo
*geo
= &dev
->geo
;
581 sector_t provisioned
;
585 provisioned
= nr_free_blks
;
586 provisioned
*= (100 - pblk
->over_pct
);
587 sector_div(provisioned
, 100);
589 /* Internally pblk manages all free blocks, but all calculations based
590 * on user capacity consider only provisioned blocks
592 pblk
->rl
.total_blocks
= nr_free_blks
;
593 pblk
->rl
.nr_secs
= nr_free_blks
* geo
->sec_per_blk
;
594 pblk
->capacity
= provisioned
* geo
->sec_per_blk
;
595 atomic_set(&pblk
->rl
.free_blocks
, nr_free_blks
);
598 static int pblk_lines_alloc_metadata(struct pblk
*pblk
)
600 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
601 struct pblk_line_meta
*lm
= &pblk
->lm
;
604 /* smeta is always small enough to fit on a kmalloc memory allocation,
605 * emeta depends on the number of LUNs allocated to the pblk instance
607 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
608 l_mg
->sline_meta
[i
] = kmalloc(lm
->smeta_len
, GFP_KERNEL
);
609 if (!l_mg
->sline_meta
[i
])
610 goto fail_free_smeta
;
613 /* emeta allocates three different buffers for managing metadata with
614 * in-memory and in-media layouts
616 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
617 struct pblk_emeta
*emeta
;
619 emeta
= kmalloc(sizeof(struct pblk_emeta
), GFP_KERNEL
);
621 goto fail_free_emeta
;
623 if (lm
->emeta_len
[0] > KMALLOC_MAX_CACHE_SIZE
) {
624 l_mg
->emeta_alloc_type
= PBLK_VMALLOC_META
;
626 emeta
->buf
= vmalloc(lm
->emeta_len
[0]);
629 goto fail_free_emeta
;
632 emeta
->nr_entries
= lm
->emeta_sec
[0];
633 l_mg
->eline_meta
[i
] = emeta
;
635 l_mg
->emeta_alloc_type
= PBLK_KMALLOC_META
;
637 emeta
->buf
= kmalloc(lm
->emeta_len
[0], GFP_KERNEL
);
640 goto fail_free_emeta
;
643 emeta
->nr_entries
= lm
->emeta_sec
[0];
644 l_mg
->eline_meta
[i
] = emeta
;
648 l_mg
->vsc_list
= kcalloc(l_mg
->nr_lines
, sizeof(__le32
), GFP_KERNEL
);
650 goto fail_free_emeta
;
652 for (i
= 0; i
< l_mg
->nr_lines
; i
++)
653 l_mg
->vsc_list
[i
] = cpu_to_le32(EMPTY_ENTRY
);
659 if (l_mg
->emeta_alloc_type
== PBLK_VMALLOC_META
)
660 vfree(l_mg
->eline_meta
[i
]->buf
);
662 kfree(l_mg
->eline_meta
[i
]->buf
);
663 kfree(l_mg
->eline_meta
[i
]);
667 for (i
= 0; i
< PBLK_DATA_LINES
; i
++)
668 kfree(l_mg
->sline_meta
[i
]);
673 static int pblk_lines_init(struct pblk
*pblk
)
675 struct nvm_tgt_dev
*dev
= pblk
->dev
;
676 struct nvm_geo
*geo
= &dev
->geo
;
677 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
678 struct pblk_line_meta
*lm
= &pblk
->lm
;
679 struct pblk_line
*line
;
680 unsigned int smeta_len
, emeta_len
;
681 long nr_bad_blks
, nr_free_blks
;
682 int bb_distance
, max_write_ppas
, mod
;
685 pblk
->min_write_pgs
= geo
->sec_per_pl
* (geo
->sec_size
/ PAGE_SIZE
);
686 max_write_ppas
= pblk
->min_write_pgs
* geo
->nr_luns
;
687 pblk
->max_write_pgs
= (max_write_ppas
< nvm_max_phys_sects(dev
)) ?
688 max_write_ppas
: nvm_max_phys_sects(dev
);
689 pblk_set_sec_per_write(pblk
, pblk
->min_write_pgs
);
691 if (pblk
->max_write_pgs
> PBLK_MAX_REQ_ADDRS
) {
692 pr_err("pblk: cannot support device max_phys_sect\n");
696 div_u64_rem(geo
->sec_per_blk
, pblk
->min_write_pgs
, &mod
);
698 pr_err("pblk: bad configuration of sectors/pages\n");
702 l_mg
->nr_lines
= geo
->blks_per_lun
;
703 l_mg
->log_line
= l_mg
->data_line
= NULL
;
704 l_mg
->l_seq_nr
= l_mg
->d_seq_nr
= 0;
705 l_mg
->nr_free_lines
= 0;
706 bitmap_zero(&l_mg
->meta_bitmap
, PBLK_DATA_LINES
);
708 lm
->sec_per_line
= geo
->sec_per_blk
* geo
->nr_luns
;
709 lm
->blk_per_line
= geo
->nr_luns
;
710 lm
->blk_bitmap_len
= BITS_TO_LONGS(geo
->nr_luns
) * sizeof(long);
711 lm
->sec_bitmap_len
= BITS_TO_LONGS(lm
->sec_per_line
) * sizeof(long);
712 lm
->lun_bitmap_len
= BITS_TO_LONGS(geo
->nr_luns
) * sizeof(long);
713 lm
->mid_thrs
= lm
->sec_per_line
/ 2;
714 lm
->high_thrs
= lm
->sec_per_line
/ 4;
715 lm
->meta_distance
= (geo
->nr_luns
/ 2) * pblk
->min_write_pgs
;
717 /* Calculate necessary pages for smeta. See comment over struct
718 * line_smeta definition
722 lm
->smeta_sec
= i
* geo
->sec_per_pl
;
723 lm
->smeta_len
= lm
->smeta_sec
* geo
->sec_size
;
725 smeta_len
= sizeof(struct line_smeta
) + lm
->lun_bitmap_len
;
726 if (smeta_len
> lm
->smeta_len
) {
731 /* Calculate necessary pages for emeta. See comment over struct
732 * line_emeta definition
736 lm
->emeta_sec
[0] = i
* geo
->sec_per_pl
;
737 lm
->emeta_len
[0] = lm
->emeta_sec
[0] * geo
->sec_size
;
739 emeta_len
= calc_emeta_len(pblk
);
740 if (emeta_len
> lm
->emeta_len
[0]) {
745 lm
->emeta_bb
= geo
->nr_luns
> i
? geo
->nr_luns
- i
: 0;
747 lm
->min_blk_line
= 1;
748 if (geo
->nr_luns
> 1)
749 lm
->min_blk_line
+= DIV_ROUND_UP(lm
->smeta_sec
+
750 lm
->emeta_sec
[0], geo
->sec_per_blk
);
752 if (lm
->min_blk_line
> lm
->blk_per_line
) {
753 pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
759 ret
= pblk_lines_alloc_metadata(pblk
);
763 l_mg
->bb_template
= kzalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
764 if (!l_mg
->bb_template
) {
769 l_mg
->bb_aux
= kzalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
772 goto fail_free_bb_template
;
775 bb_distance
= (geo
->nr_luns
) * geo
->sec_per_pl
;
776 for (i
= 0; i
< lm
->sec_per_line
; i
+= bb_distance
)
777 bitmap_set(l_mg
->bb_template
, i
, geo
->sec_per_pl
);
779 INIT_LIST_HEAD(&l_mg
->free_list
);
780 INIT_LIST_HEAD(&l_mg
->corrupt_list
);
781 INIT_LIST_HEAD(&l_mg
->bad_list
);
782 INIT_LIST_HEAD(&l_mg
->gc_full_list
);
783 INIT_LIST_HEAD(&l_mg
->gc_high_list
);
784 INIT_LIST_HEAD(&l_mg
->gc_mid_list
);
785 INIT_LIST_HEAD(&l_mg
->gc_low_list
);
786 INIT_LIST_HEAD(&l_mg
->gc_empty_list
);
788 INIT_LIST_HEAD(&l_mg
->emeta_list
);
790 l_mg
->gc_lists
[0] = &l_mg
->gc_high_list
;
791 l_mg
->gc_lists
[1] = &l_mg
->gc_mid_list
;
792 l_mg
->gc_lists
[2] = &l_mg
->gc_low_list
;
794 spin_lock_init(&l_mg
->free_lock
);
795 spin_lock_init(&l_mg
->close_lock
);
796 spin_lock_init(&l_mg
->gc_lock
);
798 pblk
->lines
= kcalloc(l_mg
->nr_lines
, sizeof(struct pblk_line
),
802 goto fail_free_bb_aux
;
806 for (i
= 0; i
< l_mg
->nr_lines
; i
++) {
809 line
= &pblk
->lines
[i
];
813 line
->type
= PBLK_LINETYPE_FREE
;
814 line
->state
= PBLK_LINESTATE_FREE
;
815 line
->gc_group
= PBLK_LINEGC_NONE
;
816 line
->vsc
= &l_mg
->vsc_list
[i
];
817 spin_lock_init(&line
->lock
);
819 ret
= pblk_alloc_line_bitmaps(pblk
, line
);
821 goto fail_free_lines
;
823 nr_bad_blks
= pblk_bb_line(pblk
, line
, lm
->blk_per_line
);
824 if (nr_bad_blks
< 0 || nr_bad_blks
> lm
->blk_per_line
) {
825 pblk_free_line_bitmaps(line
);
827 goto fail_free_lines
;
830 blk_in_line
= lm
->blk_per_line
- nr_bad_blks
;
831 if (blk_in_line
< lm
->min_blk_line
) {
832 line
->state
= PBLK_LINESTATE_BAD
;
833 list_add_tail(&line
->list
, &l_mg
->bad_list
);
837 nr_free_blks
+= blk_in_line
;
838 atomic_set(&line
->blk_in_line
, blk_in_line
);
840 l_mg
->nr_free_lines
++;
841 list_add_tail(&line
->list
, &l_mg
->free_list
);
844 pblk_set_provision(pblk
, nr_free_blks
);
846 /* Cleanup per-LUN bad block lists - managed within lines on run-time */
847 for (i
= 0; i
< geo
->nr_luns
; i
++)
848 kfree(pblk
->luns
[i
].bb_list
);
853 pblk_free_line_bitmaps(&pblk
->lines
[i
]);
856 fail_free_bb_template
:
857 kfree(l_mg
->bb_template
);
859 pblk_line_meta_free(pblk
);
861 for (i
= 0; i
< geo
->nr_luns
; i
++)
862 kfree(pblk
->luns
[i
].bb_list
);
867 static int pblk_writer_init(struct pblk
*pblk
)
869 timer_setup(&pblk
->wtimer
, pblk_write_timer_fn
, 0);
870 mod_timer(&pblk
->wtimer
, jiffies
+ msecs_to_jiffies(100));
872 pblk
->writer_ts
= kthread_create(pblk_write_ts
, pblk
, "pblk-writer-t");
873 if (IS_ERR(pblk
->writer_ts
)) {
874 pr_err("pblk: could not allocate writer kthread\n");
875 return PTR_ERR(pblk
->writer_ts
);
881 static void pblk_writer_stop(struct pblk
*pblk
)
883 /* The pipeline must be stopped and the write buffer emptied before the
884 * write thread is stopped
886 WARN(pblk_rb_read_count(&pblk
->rwb
),
887 "Stopping not fully persisted write buffer\n");
889 WARN(pblk_rb_sync_count(&pblk
->rwb
),
890 "Stopping not fully synced write buffer\n");
893 kthread_stop(pblk
->writer_ts
);
894 del_timer(&pblk
->wtimer
);
897 static void pblk_free(struct pblk
*pblk
)
899 pblk_luns_free(pblk
);
900 pblk_lines_free(pblk
);
901 pblk_line_meta_free(pblk
);
902 pblk_core_free(pblk
);
908 static void pblk_tear_down(struct pblk
*pblk
)
910 pblk_pipeline_stop(pblk
);
911 pblk_writer_stop(pblk
);
912 pblk_rb_sync_l2p(&pblk
->rwb
);
914 pblk_rl_free(&pblk
->rl
);
916 pr_debug("pblk: consistent tear down\n");
919 static void pblk_exit(void *private)
921 struct pblk
*pblk
= private;
923 down_write(&pblk_lock
);
925 pblk_tear_down(pblk
);
927 #ifdef CONFIG_NVM_DEBUG
928 pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk
));
932 up_write(&pblk_lock
);
935 static sector_t
pblk_capacity(void *private)
937 struct pblk
*pblk
= private;
939 return pblk
->capacity
* NR_PHY_IN_LOG
;
942 static void *pblk_init(struct nvm_tgt_dev
*dev
, struct gendisk
*tdisk
,
945 struct nvm_geo
*geo
= &dev
->geo
;
946 struct request_queue
*bqueue
= dev
->q
;
947 struct request_queue
*tqueue
= tdisk
->queue
;
951 if (dev
->identity
.dom
& NVM_RSP_L2P
) {
952 pr_err("pblk: host-side L2P table not supported. (%x)\n",
954 return ERR_PTR(-EINVAL
);
957 pblk
= kzalloc(sizeof(struct pblk
), GFP_KERNEL
);
959 return ERR_PTR(-ENOMEM
);
963 pblk
->state
= PBLK_STATE_RUNNING
;
964 pblk
->gc
.gc_enabled
= 0;
966 spin_lock_init(&pblk
->trans_lock
);
967 spin_lock_init(&pblk
->lock
);
969 if (flags
& NVM_TARGET_FACTORY
)
970 pblk_setup_uuid(pblk
);
972 #ifdef CONFIG_NVM_DEBUG
973 atomic_long_set(&pblk
->inflight_writes
, 0);
974 atomic_long_set(&pblk
->padded_writes
, 0);
975 atomic_long_set(&pblk
->padded_wb
, 0);
976 atomic_long_set(&pblk
->nr_flush
, 0);
977 atomic_long_set(&pblk
->req_writes
, 0);
978 atomic_long_set(&pblk
->sub_writes
, 0);
979 atomic_long_set(&pblk
->sync_writes
, 0);
980 atomic_long_set(&pblk
->inflight_reads
, 0);
981 atomic_long_set(&pblk
->cache_reads
, 0);
982 atomic_long_set(&pblk
->sync_reads
, 0);
983 atomic_long_set(&pblk
->recov_writes
, 0);
984 atomic_long_set(&pblk
->recov_writes
, 0);
985 atomic_long_set(&pblk
->recov_gc_writes
, 0);
986 atomic_long_set(&pblk
->recov_gc_reads
, 0);
989 atomic_long_set(&pblk
->read_failed
, 0);
990 atomic_long_set(&pblk
->read_empty
, 0);
991 atomic_long_set(&pblk
->read_high_ecc
, 0);
992 atomic_long_set(&pblk
->read_failed_gc
, 0);
993 atomic_long_set(&pblk
->write_failed
, 0);
994 atomic_long_set(&pblk
->erase_failed
, 0);
996 ret
= pblk_luns_init(pblk
, dev
->luns
);
998 pr_err("pblk: could not initialize luns\n");
1002 ret
= pblk_lines_init(pblk
);
1004 pr_err("pblk: could not initialize lines\n");
1005 goto fail_free_luns
;
1008 ret
= pblk_core_init(pblk
);
1010 pr_err("pblk: could not initialize core\n");
1011 goto fail_free_line_meta
;
1014 ret
= pblk_l2p_init(pblk
);
1016 pr_err("pblk: could not initialize maps\n");
1017 goto fail_free_core
;
1020 ret
= pblk_lines_configure(pblk
, flags
);
1022 pr_err("pblk: could not configure lines\n");
1026 ret
= pblk_writer_init(pblk
);
1028 pr_err("pblk: could not initialize write thread\n");
1029 goto fail_free_lines
;
1032 ret
= pblk_gc_init(pblk
);
1034 pr_err("pblk: could not initialize gc\n");
1035 goto fail_stop_writer
;
1038 /* inherit the size from the underlying device */
1039 blk_queue_logical_block_size(tqueue
, queue_physical_block_size(bqueue
));
1040 blk_queue_max_hw_sectors(tqueue
, queue_max_hw_sectors(bqueue
));
1042 blk_queue_write_cache(tqueue
, true, false);
1044 tqueue
->limits
.discard_granularity
= geo
->pgs_per_blk
* geo
->pfpg_size
;
1045 tqueue
->limits
.discard_alignment
= 0;
1046 blk_queue_max_discard_sectors(tqueue
, UINT_MAX
>> 9);
1047 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD
, tqueue
);
1049 pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
1050 geo
->nr_luns
, pblk
->l_mg
.nr_lines
,
1051 (unsigned long long)pblk
->rl
.nr_secs
,
1052 pblk
->rwb
.nr_entries
);
1054 wake_up_process(pblk
->writer_ts
);
1056 /* Check if we need to start GC */
1057 pblk_gc_should_kick(pblk
);
1062 pblk_writer_stop(pblk
);
1064 pblk_lines_free(pblk
);
1066 pblk_l2p_free(pblk
);
1068 pblk_core_free(pblk
);
1069 fail_free_line_meta
:
1070 pblk_line_meta_free(pblk
);
1072 pblk_luns_free(pblk
);
1075 return ERR_PTR(ret
);
1078 /* physical block device target */
1079 static struct nvm_tgt_type tt_pblk
= {
1081 .version
= {1, 0, 0},
1083 .make_rq
= pblk_make_rq
,
1084 .capacity
= pblk_capacity
,
1089 .sysfs_init
= pblk_sysfs_init
,
1090 .sysfs_exit
= pblk_sysfs_exit
,
1091 .owner
= THIS_MODULE
,
1094 static int __init
pblk_module_init(void)
1098 pblk_bio_set
= bioset_create(BIO_POOL_SIZE
, 0, 0);
1101 ret
= nvm_register_tgt_type(&tt_pblk
);
1103 bioset_free(pblk_bio_set
);
1107 static void pblk_module_exit(void)
1109 bioset_free(pblk_bio_set
);
1110 nvm_unregister_tgt_type(&tt_pblk
);
1113 module_init(pblk_module_init
);
1114 module_exit(pblk_module_exit
);
1115 MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
1116 MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
1117 MODULE_LICENSE("GPL v2");
1118 MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");