1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
4 * Copyright (C) 2016 CNEX Labs
5 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
6 * Matias Bjorling <matias@cnexlabs.com>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * Implementation of a physical block-device target for Open-channel SSDs.
19 * pblk-init.c - pblk's initialization.
23 #include "pblk-trace.h"
25 static unsigned int write_buffer_size
;
27 module_param(write_buffer_size
, uint
, 0644);
28 MODULE_PARM_DESC(write_buffer_size
, "number of entries in a write buffer");
30 struct pblk_global_caches
{
31 struct kmem_cache
*ws
;
32 struct kmem_cache
*rec
;
33 struct kmem_cache
*g_rq
;
34 struct kmem_cache
*w_rq
;
38 struct mutex mutex
; /* Ensures consistency between
43 static struct pblk_global_caches pblk_caches
= {
44 .mutex
= __MUTEX_INITIALIZER(pblk_caches
.mutex
),
48 struct bio_set pblk_bio_set
;
50 static int pblk_rw_io(struct request_queue
*q
, struct pblk
*pblk
,
55 /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
56 * constraint. Writes can be of arbitrary size.
58 if (bio_data_dir(bio
) == READ
) {
59 blk_queue_split(q
, &bio
);
60 ret
= pblk_submit_read(pblk
, bio
);
61 if (ret
== NVM_IO_DONE
&& bio_flagged(bio
, BIO_CLONED
))
67 /* Prevent deadlock in the case of a modest LUN configuration and large
68 * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
69 * available for user I/O.
71 if (pblk_get_secs(bio
) > pblk_rl_max_io(&pblk
->rl
))
72 blk_queue_split(q
, &bio
);
74 return pblk_write_to_cache(pblk
, bio
, PBLK_IOTYPE_USER
);
77 static blk_qc_t
pblk_make_rq(struct request_queue
*q
, struct bio
*bio
)
79 struct pblk
*pblk
= q
->queuedata
;
81 if (bio_op(bio
) == REQ_OP_DISCARD
) {
82 pblk_discard(pblk
, bio
);
83 if (!(bio
->bi_opf
& REQ_PREFLUSH
)) {
89 switch (pblk_rw_io(q
, pblk
, bio
)) {
101 static size_t pblk_trans_map_size(struct pblk
*pblk
)
105 if (pblk
->addrf_len
< 32)
108 return entry_size
* pblk
->rl
.nr_secs
;
111 #ifdef CONFIG_NVM_PBLK_DEBUG
112 static u32
pblk_l2p_crc(struct pblk
*pblk
)
117 map_size
= pblk_trans_map_size(pblk
);
118 crc
= crc32_le(crc
, pblk
->trans_map
, map_size
);
123 static void pblk_l2p_free(struct pblk
*pblk
)
125 vfree(pblk
->trans_map
);
128 static int pblk_l2p_recover(struct pblk
*pblk
, bool factory_init
)
130 struct pblk_line
*line
= NULL
;
133 pblk_setup_uuid(pblk
);
135 line
= pblk_recov_l2p(pblk
);
137 pblk_err(pblk
, "could not recover l2p table\n");
142 #ifdef CONFIG_NVM_PBLK_DEBUG
143 pblk_info(pblk
, "init: L2P CRC: %x\n", pblk_l2p_crc(pblk
));
146 /* Free full lines directly as GC has not been started yet */
147 pblk_gc_free_full_lines(pblk
);
150 /* Configure next line for user data */
151 line
= pblk_line_get_first_data(pblk
);
159 static int pblk_l2p_init(struct pblk
*pblk
, bool factory_init
)
166 map_size
= pblk_trans_map_size(pblk
);
167 pblk
->trans_map
= vmalloc(map_size
);
168 if (!pblk
->trans_map
)
171 pblk_ppa_set_empty(&ppa
);
173 for (i
= 0; i
< pblk
->rl
.nr_secs
; i
++)
174 pblk_trans_map_set(pblk
, i
, ppa
);
176 ret
= pblk_l2p_recover(pblk
, factory_init
);
178 vfree(pblk
->trans_map
);
183 static void pblk_rwb_free(struct pblk
*pblk
)
185 if (pblk_rb_tear_down_check(&pblk
->rwb
))
186 pblk_err(pblk
, "write buffer error on tear down\n");
188 pblk_rb_free(&pblk
->rwb
);
191 static int pblk_rwb_init(struct pblk
*pblk
)
193 struct nvm_tgt_dev
*dev
= pblk
->dev
;
194 struct nvm_geo
*geo
= &dev
->geo
;
195 unsigned long buffer_size
;
196 int pgs_in_buffer
, threshold
;
198 threshold
= geo
->mw_cunits
* geo
->all_luns
;
199 pgs_in_buffer
= (max(geo
->mw_cunits
, geo
->ws_opt
) + geo
->ws_opt
)
202 if (write_buffer_size
&& (write_buffer_size
> pgs_in_buffer
))
203 buffer_size
= write_buffer_size
;
205 buffer_size
= pgs_in_buffer
;
207 return pblk_rb_init(&pblk
->rwb
, buffer_size
, threshold
, geo
->csecs
);
210 /* Minimum pages needed within a lun */
211 #define ADDR_POOL_SIZE 64
213 static int pblk_set_addrf_12(struct pblk
*pblk
, struct nvm_geo
*geo
,
214 struct nvm_addrf_12
*dst
)
216 struct nvm_addrf_12
*src
= (struct nvm_addrf_12
*)&geo
->addrf
;
219 /* Re-calculate channel and lun format to adapt to configuration */
220 power_len
= get_count_order(geo
->num_ch
);
221 if (1 << power_len
!= geo
->num_ch
) {
222 pblk_err(pblk
, "supports only power-of-two channel config.\n");
225 dst
->ch_len
= power_len
;
227 power_len
= get_count_order(geo
->num_lun
);
228 if (1 << power_len
!= geo
->num_lun
) {
229 pblk_err(pblk
, "supports only power-of-two LUN config.\n");
232 dst
->lun_len
= power_len
;
234 dst
->blk_len
= src
->blk_len
;
235 dst
->pg_len
= src
->pg_len
;
236 dst
->pln_len
= src
->pln_len
;
237 dst
->sec_len
= src
->sec_len
;
240 dst
->pln_offset
= dst
->sec_len
;
241 dst
->ch_offset
= dst
->pln_offset
+ dst
->pln_len
;
242 dst
->lun_offset
= dst
->ch_offset
+ dst
->ch_len
;
243 dst
->pg_offset
= dst
->lun_offset
+ dst
->lun_len
;
244 dst
->blk_offset
= dst
->pg_offset
+ dst
->pg_len
;
246 dst
->sec_mask
= ((1ULL << dst
->sec_len
) - 1) << dst
->sec_offset
;
247 dst
->pln_mask
= ((1ULL << dst
->pln_len
) - 1) << dst
->pln_offset
;
248 dst
->ch_mask
= ((1ULL << dst
->ch_len
) - 1) << dst
->ch_offset
;
249 dst
->lun_mask
= ((1ULL << dst
->lun_len
) - 1) << dst
->lun_offset
;
250 dst
->pg_mask
= ((1ULL << dst
->pg_len
) - 1) << dst
->pg_offset
;
251 dst
->blk_mask
= ((1ULL << dst
->blk_len
) - 1) << dst
->blk_offset
;
253 return dst
->blk_offset
+ src
->blk_len
;
256 static int pblk_set_addrf_20(struct nvm_geo
*geo
, struct nvm_addrf
*adst
,
257 struct pblk_addrf
*udst
)
259 struct nvm_addrf
*src
= &geo
->addrf
;
261 adst
->ch_len
= get_count_order(geo
->num_ch
);
262 adst
->lun_len
= get_count_order(geo
->num_lun
);
263 adst
->chk_len
= src
->chk_len
;
264 adst
->sec_len
= src
->sec_len
;
266 adst
->sec_offset
= 0;
267 adst
->ch_offset
= adst
->sec_len
;
268 adst
->lun_offset
= adst
->ch_offset
+ adst
->ch_len
;
269 adst
->chk_offset
= adst
->lun_offset
+ adst
->lun_len
;
271 adst
->sec_mask
= ((1ULL << adst
->sec_len
) - 1) << adst
->sec_offset
;
272 adst
->chk_mask
= ((1ULL << adst
->chk_len
) - 1) << adst
->chk_offset
;
273 adst
->lun_mask
= ((1ULL << adst
->lun_len
) - 1) << adst
->lun_offset
;
274 adst
->ch_mask
= ((1ULL << adst
->ch_len
) - 1) << adst
->ch_offset
;
276 udst
->sec_stripe
= geo
->ws_opt
;
277 udst
->ch_stripe
= geo
->num_ch
;
278 udst
->lun_stripe
= geo
->num_lun
;
280 udst
->sec_lun_stripe
= udst
->sec_stripe
* udst
->ch_stripe
;
281 udst
->sec_ws_stripe
= udst
->sec_lun_stripe
* udst
->lun_stripe
;
283 return adst
->chk_offset
+ adst
->chk_len
;
286 static int pblk_set_addrf(struct pblk
*pblk
)
288 struct nvm_tgt_dev
*dev
= pblk
->dev
;
289 struct nvm_geo
*geo
= &dev
->geo
;
292 switch (geo
->version
) {
293 case NVM_OCSSD_SPEC_12
:
294 div_u64_rem(geo
->clba
, pblk
->min_write_pgs
, &mod
);
296 pblk_err(pblk
, "bad configuration of sectors/pages\n");
300 pblk
->addrf_len
= pblk_set_addrf_12(pblk
, geo
,
301 (void *)&pblk
->addrf
);
303 case NVM_OCSSD_SPEC_20
:
304 pblk
->addrf_len
= pblk_set_addrf_20(geo
, (void *)&pblk
->addrf
,
308 pblk_err(pblk
, "OCSSD revision not supported (%d)\n",
316 static int pblk_create_global_caches(void)
319 pblk_caches
.ws
= kmem_cache_create("pblk_blk_ws",
320 sizeof(struct pblk_line_ws
), 0, 0, NULL
);
324 pblk_caches
.rec
= kmem_cache_create("pblk_rec",
325 sizeof(struct pblk_rec_ctx
), 0, 0, NULL
);
326 if (!pblk_caches
.rec
)
327 goto fail_destroy_ws
;
329 pblk_caches
.g_rq
= kmem_cache_create("pblk_g_rq", pblk_g_rq_size
,
331 if (!pblk_caches
.g_rq
)
332 goto fail_destroy_rec
;
334 pblk_caches
.w_rq
= kmem_cache_create("pblk_w_rq", pblk_w_rq_size
,
336 if (!pblk_caches
.w_rq
)
337 goto fail_destroy_g_rq
;
342 kmem_cache_destroy(pblk_caches
.g_rq
);
344 kmem_cache_destroy(pblk_caches
.rec
);
346 kmem_cache_destroy(pblk_caches
.ws
);
351 static int pblk_get_global_caches(void)
355 mutex_lock(&pblk_caches
.mutex
);
357 if (kref_read(&pblk_caches
.kref
) > 0) {
358 kref_get(&pblk_caches
.kref
);
359 mutex_unlock(&pblk_caches
.mutex
);
363 ret
= pblk_create_global_caches();
366 kref_get(&pblk_caches
.kref
);
368 mutex_unlock(&pblk_caches
.mutex
);
373 static void pblk_destroy_global_caches(struct kref
*ref
)
375 struct pblk_global_caches
*c
;
377 c
= container_of(ref
, struct pblk_global_caches
, kref
);
379 kmem_cache_destroy(c
->ws
);
380 kmem_cache_destroy(c
->rec
);
381 kmem_cache_destroy(c
->g_rq
);
382 kmem_cache_destroy(c
->w_rq
);
385 static void pblk_put_global_caches(void)
387 mutex_lock(&pblk_caches
.mutex
);
388 kref_put(&pblk_caches
.kref
, pblk_destroy_global_caches
);
389 mutex_unlock(&pblk_caches
.mutex
);
392 static int pblk_core_init(struct pblk
*pblk
)
394 struct nvm_tgt_dev
*dev
= pblk
->dev
;
395 struct nvm_geo
*geo
= &dev
->geo
;
396 int ret
, max_write_ppas
;
398 atomic64_set(&pblk
->user_wa
, 0);
399 atomic64_set(&pblk
->pad_wa
, 0);
400 atomic64_set(&pblk
->gc_wa
, 0);
401 pblk
->user_rst_wa
= 0;
402 pblk
->pad_rst_wa
= 0;
405 atomic64_set(&pblk
->nr_flush
, 0);
406 pblk
->nr_flush_rst
= 0;
408 pblk
->min_write_pgs
= geo
->ws_opt
;
409 max_write_ppas
= pblk
->min_write_pgs
* geo
->all_luns
;
410 pblk
->max_write_pgs
= min_t(int, max_write_ppas
, NVM_MAX_VLBA
);
411 pblk
->max_write_pgs
= min_t(int, pblk
->max_write_pgs
,
412 queue_max_hw_sectors(dev
->q
) / (geo
->csecs
>> SECTOR_SHIFT
));
413 pblk_set_sec_per_write(pblk
, pblk
->min_write_pgs
);
415 pblk
->pad_dist
= kcalloc(pblk
->min_write_pgs
- 1, sizeof(atomic64_t
),
420 if (pblk_get_global_caches())
421 goto fail_free_pad_dist
;
423 /* Internal bios can be at most the sectors signaled by the device. */
424 ret
= mempool_init_page_pool(&pblk
->page_bio_pool
, NVM_MAX_VLBA
, 0);
426 goto free_global_caches
;
428 ret
= mempool_init_slab_pool(&pblk
->gen_ws_pool
, PBLK_GEN_WS_POOL_SIZE
,
431 goto free_page_bio_pool
;
433 ret
= mempool_init_slab_pool(&pblk
->rec_pool
, geo
->all_luns
,
436 goto free_gen_ws_pool
;
438 ret
= mempool_init_slab_pool(&pblk
->r_rq_pool
, geo
->all_luns
,
443 ret
= mempool_init_slab_pool(&pblk
->e_rq_pool
, geo
->all_luns
,
448 ret
= mempool_init_slab_pool(&pblk
->w_rq_pool
, geo
->all_luns
,
453 pblk
->close_wq
= alloc_workqueue("pblk-close-wq",
454 WQ_MEM_RECLAIM
| WQ_UNBOUND
, PBLK_NR_CLOSE_JOBS
);
458 pblk
->bb_wq
= alloc_workqueue("pblk-bb-wq",
459 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 0);
463 pblk
->r_end_wq
= alloc_workqueue("pblk-read-end-wq",
464 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 0);
468 if (pblk_set_addrf(pblk
))
471 INIT_LIST_HEAD(&pblk
->compl_list
);
472 INIT_LIST_HEAD(&pblk
->resubmit_list
);
477 destroy_workqueue(pblk
->r_end_wq
);
479 destroy_workqueue(pblk
->bb_wq
);
481 destroy_workqueue(pblk
->close_wq
);
483 mempool_exit(&pblk
->w_rq_pool
);
485 mempool_exit(&pblk
->e_rq_pool
);
487 mempool_exit(&pblk
->r_rq_pool
);
489 mempool_exit(&pblk
->rec_pool
);
491 mempool_exit(&pblk
->gen_ws_pool
);
493 mempool_exit(&pblk
->page_bio_pool
);
495 pblk_put_global_caches();
497 kfree(pblk
->pad_dist
);
501 static void pblk_core_free(struct pblk
*pblk
)
504 destroy_workqueue(pblk
->close_wq
);
507 destroy_workqueue(pblk
->r_end_wq
);
510 destroy_workqueue(pblk
->bb_wq
);
512 mempool_exit(&pblk
->page_bio_pool
);
513 mempool_exit(&pblk
->gen_ws_pool
);
514 mempool_exit(&pblk
->rec_pool
);
515 mempool_exit(&pblk
->r_rq_pool
);
516 mempool_exit(&pblk
->e_rq_pool
);
517 mempool_exit(&pblk
->w_rq_pool
);
519 pblk_put_global_caches();
520 kfree(pblk
->pad_dist
);
523 static void pblk_line_mg_free(struct pblk
*pblk
)
525 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
528 kfree(l_mg
->bb_template
);
530 kfree(l_mg
->vsc_list
);
532 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
533 kfree(l_mg
->sline_meta
[i
]);
534 pblk_mfree(l_mg
->eline_meta
[i
]->buf
, l_mg
->emeta_alloc_type
);
535 kfree(l_mg
->eline_meta
[i
]);
538 mempool_destroy(l_mg
->bitmap_pool
);
539 kmem_cache_destroy(l_mg
->bitmap_cache
);
542 static void pblk_line_meta_free(struct pblk_line_mgmt
*l_mg
,
543 struct pblk_line
*line
)
545 struct pblk_w_err_gc
*w_err_gc
= line
->w_err_gc
;
547 kfree(line
->blk_bitmap
);
548 kfree(line
->erase_bitmap
);
551 pblk_mfree(w_err_gc
->lba_list
, l_mg
->emeta_alloc_type
);
555 static void pblk_lines_free(struct pblk
*pblk
)
557 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
558 struct pblk_line
*line
;
561 spin_lock(&l_mg
->free_lock
);
562 for (i
= 0; i
< l_mg
->nr_lines
; i
++) {
563 line
= &pblk
->lines
[i
];
565 pblk_line_free(line
);
566 pblk_line_meta_free(l_mg
, line
);
568 spin_unlock(&l_mg
->free_lock
);
570 pblk_line_mg_free(pblk
);
576 static int pblk_luns_init(struct pblk
*pblk
)
578 struct nvm_tgt_dev
*dev
= pblk
->dev
;
579 struct nvm_geo
*geo
= &dev
->geo
;
580 struct pblk_lun
*rlun
;
583 /* TODO: Implement unbalanced LUN support */
584 if (geo
->num_lun
< 0) {
585 pblk_err(pblk
, "unbalanced LUN config.\n");
589 pblk
->luns
= kcalloc(geo
->all_luns
, sizeof(struct pblk_lun
),
594 for (i
= 0; i
< geo
->all_luns
; i
++) {
595 /* Stripe across channels */
596 int ch
= i
% geo
->num_ch
;
597 int lun_raw
= i
/ geo
->num_ch
;
598 int lunid
= lun_raw
+ ch
* geo
->num_lun
;
600 rlun
= &pblk
->luns
[i
];
601 rlun
->bppa
= dev
->luns
[lunid
];
603 sema_init(&rlun
->wr_sem
, 1);
609 /* See comment over struct line_emeta definition */
610 static unsigned int calc_emeta_len(struct pblk
*pblk
)
612 struct pblk_line_meta
*lm
= &pblk
->lm
;
613 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
614 struct nvm_tgt_dev
*dev
= pblk
->dev
;
615 struct nvm_geo
*geo
= &dev
->geo
;
617 /* Round to sector size so that lba_list starts on its own sector */
618 lm
->emeta_sec
[1] = DIV_ROUND_UP(
619 sizeof(struct line_emeta
) + lm
->blk_bitmap_len
+
620 sizeof(struct wa_counters
), geo
->csecs
);
621 lm
->emeta_len
[1] = lm
->emeta_sec
[1] * geo
->csecs
;
623 /* Round to sector size so that vsc_list starts on its own sector */
624 lm
->dsec_per_line
= lm
->sec_per_line
- lm
->emeta_sec
[0];
625 lm
->emeta_sec
[2] = DIV_ROUND_UP(lm
->dsec_per_line
* sizeof(u64
),
627 lm
->emeta_len
[2] = lm
->emeta_sec
[2] * geo
->csecs
;
629 lm
->emeta_sec
[3] = DIV_ROUND_UP(l_mg
->nr_lines
* sizeof(u32
),
631 lm
->emeta_len
[3] = lm
->emeta_sec
[3] * geo
->csecs
;
633 lm
->vsc_list_len
= l_mg
->nr_lines
* sizeof(u32
);
635 return (lm
->emeta_len
[1] + lm
->emeta_len
[2] + lm
->emeta_len
[3]);
638 static void pblk_set_provision(struct pblk
*pblk
, long nr_free_blks
)
640 struct nvm_tgt_dev
*dev
= pblk
->dev
;
641 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
642 struct pblk_line_meta
*lm
= &pblk
->lm
;
643 struct nvm_geo
*geo
= &dev
->geo
;
644 sector_t provisioned
;
645 int sec_meta
, blk_meta
;
647 if (geo
->op
== NVM_TARGET_DEFAULT_OP
)
648 pblk
->op
= PBLK_DEFAULT_OP
;
652 provisioned
= nr_free_blks
;
653 provisioned
*= (100 - pblk
->op
);
654 sector_div(provisioned
, 100);
656 pblk
->op_blks
= nr_free_blks
- provisioned
;
658 /* Internally pblk manages all free blocks, but all calculations based
659 * on user capacity consider only provisioned blocks
661 pblk
->rl
.total_blocks
= nr_free_blks
;
662 pblk
->rl
.nr_secs
= nr_free_blks
* geo
->clba
;
664 /* Consider sectors used for metadata */
665 sec_meta
= (lm
->smeta_sec
+ lm
->emeta_sec
[0]) * l_mg
->nr_free_lines
;
666 blk_meta
= DIV_ROUND_UP(sec_meta
, geo
->clba
);
668 pblk
->capacity
= (provisioned
- blk_meta
) * geo
->clba
;
670 atomic_set(&pblk
->rl
.free_blocks
, nr_free_blks
);
671 atomic_set(&pblk
->rl
.free_user_blocks
, nr_free_blks
);
674 static int pblk_setup_line_meta_chk(struct pblk
*pblk
, struct pblk_line
*line
,
675 struct nvm_chk_meta
*meta
)
677 struct nvm_tgt_dev
*dev
= pblk
->dev
;
678 struct nvm_geo
*geo
= &dev
->geo
;
679 struct pblk_line_meta
*lm
= &pblk
->lm
;
680 int i
, nr_bad_chks
= 0;
682 for (i
= 0; i
< lm
->blk_per_line
; i
++) {
683 struct pblk_lun
*rlun
= &pblk
->luns
[i
];
684 struct nvm_chk_meta
*chunk
;
685 struct nvm_chk_meta
*chunk_meta
;
690 pos
= pblk_ppa_to_pos(geo
, ppa
);
691 chunk
= &line
->chks
[pos
];
693 ppa
.m
.chk
= line
->id
;
694 chunk_meta
= pblk_chunk_get_off(pblk
, meta
, ppa
);
696 chunk
->state
= chunk_meta
->state
;
697 chunk
->type
= chunk_meta
->type
;
698 chunk
->wi
= chunk_meta
->wi
;
699 chunk
->slba
= chunk_meta
->slba
;
700 chunk
->cnlb
= chunk_meta
->cnlb
;
701 chunk
->wp
= chunk_meta
->wp
;
703 trace_pblk_chunk_state(pblk_disk_name(pblk
), &ppa
,
706 if (chunk
->type
& NVM_CHK_TP_SZ_SPEC
) {
707 WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n");
711 if (!(chunk
->state
& NVM_CHK_ST_OFFLINE
))
714 set_bit(pos
, line
->blk_bitmap
);
721 static long pblk_setup_line_meta(struct pblk
*pblk
, struct pblk_line
*line
,
722 void *chunk_meta
, int line_id
)
724 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
725 struct pblk_line_meta
*lm
= &pblk
->lm
;
726 long nr_bad_chks
, chk_in_line
;
730 line
->type
= PBLK_LINETYPE_FREE
;
731 line
->state
= PBLK_LINESTATE_NEW
;
732 line
->gc_group
= PBLK_LINEGC_NONE
;
733 line
->vsc
= &l_mg
->vsc_list
[line_id
];
734 spin_lock_init(&line
->lock
);
736 nr_bad_chks
= pblk_setup_line_meta_chk(pblk
, line
, chunk_meta
);
738 chk_in_line
= lm
->blk_per_line
- nr_bad_chks
;
739 if (nr_bad_chks
< 0 || nr_bad_chks
> lm
->blk_per_line
||
740 chk_in_line
< lm
->min_blk_line
) {
741 line
->state
= PBLK_LINESTATE_BAD
;
742 list_add_tail(&line
->list
, &l_mg
->bad_list
);
746 atomic_set(&line
->blk_in_line
, chk_in_line
);
747 list_add_tail(&line
->list
, &l_mg
->free_list
);
748 l_mg
->nr_free_lines
++;
753 static int pblk_alloc_line_meta(struct pblk
*pblk
, struct pblk_line
*line
)
755 struct pblk_line_meta
*lm
= &pblk
->lm
;
757 line
->blk_bitmap
= kzalloc(lm
->blk_bitmap_len
, GFP_KERNEL
);
758 if (!line
->blk_bitmap
)
761 line
->erase_bitmap
= kzalloc(lm
->blk_bitmap_len
, GFP_KERNEL
);
762 if (!line
->erase_bitmap
)
763 goto free_blk_bitmap
;
766 line
->chks
= kmalloc_array(lm
->blk_per_line
,
767 sizeof(struct nvm_chk_meta
), GFP_KERNEL
);
769 goto free_erase_bitmap
;
771 line
->w_err_gc
= kzalloc(sizeof(struct pblk_w_err_gc
), GFP_KERNEL
);
780 kfree(line
->erase_bitmap
);
782 kfree(line
->blk_bitmap
);
786 static int pblk_line_mg_init(struct pblk
*pblk
)
788 struct nvm_tgt_dev
*dev
= pblk
->dev
;
789 struct nvm_geo
*geo
= &dev
->geo
;
790 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
791 struct pblk_line_meta
*lm
= &pblk
->lm
;
794 l_mg
->nr_lines
= geo
->num_chk
;
795 l_mg
->log_line
= l_mg
->data_line
= NULL
;
796 l_mg
->l_seq_nr
= l_mg
->d_seq_nr
= 0;
797 l_mg
->nr_free_lines
= 0;
798 bitmap_zero(&l_mg
->meta_bitmap
, PBLK_DATA_LINES
);
800 INIT_LIST_HEAD(&l_mg
->free_list
);
801 INIT_LIST_HEAD(&l_mg
->corrupt_list
);
802 INIT_LIST_HEAD(&l_mg
->bad_list
);
803 INIT_LIST_HEAD(&l_mg
->gc_full_list
);
804 INIT_LIST_HEAD(&l_mg
->gc_high_list
);
805 INIT_LIST_HEAD(&l_mg
->gc_mid_list
);
806 INIT_LIST_HEAD(&l_mg
->gc_low_list
);
807 INIT_LIST_HEAD(&l_mg
->gc_empty_list
);
808 INIT_LIST_HEAD(&l_mg
->gc_werr_list
);
810 INIT_LIST_HEAD(&l_mg
->emeta_list
);
812 l_mg
->gc_lists
[0] = &l_mg
->gc_werr_list
;
813 l_mg
->gc_lists
[1] = &l_mg
->gc_high_list
;
814 l_mg
->gc_lists
[2] = &l_mg
->gc_mid_list
;
815 l_mg
->gc_lists
[3] = &l_mg
->gc_low_list
;
817 spin_lock_init(&l_mg
->free_lock
);
818 spin_lock_init(&l_mg
->close_lock
);
819 spin_lock_init(&l_mg
->gc_lock
);
821 l_mg
->vsc_list
= kcalloc(l_mg
->nr_lines
, sizeof(__le32
), GFP_KERNEL
);
825 l_mg
->bb_template
= kzalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
826 if (!l_mg
->bb_template
)
827 goto fail_free_vsc_list
;
829 l_mg
->bb_aux
= kzalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
831 goto fail_free_bb_template
;
833 /* smeta is always small enough to fit on a kmalloc memory allocation,
834 * emeta depends on the number of LUNs allocated to the pblk instance
836 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
837 l_mg
->sline_meta
[i
] = kmalloc(lm
->smeta_len
, GFP_KERNEL
);
838 if (!l_mg
->sline_meta
[i
])
839 goto fail_free_smeta
;
842 l_mg
->bitmap_cache
= kmem_cache_create("pblk_lm_bitmap",
843 lm
->sec_bitmap_len
, 0, 0, NULL
);
844 if (!l_mg
->bitmap_cache
)
845 goto fail_free_smeta
;
847 /* the bitmap pool is used for both valid and map bitmaps */
848 l_mg
->bitmap_pool
= mempool_create_slab_pool(PBLK_DATA_LINES
* 2,
850 if (!l_mg
->bitmap_pool
)
851 goto fail_destroy_bitmap_cache
;
853 /* emeta allocates three different buffers for managing metadata with
854 * in-memory and in-media layouts
856 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
857 struct pblk_emeta
*emeta
;
859 emeta
= kmalloc(sizeof(struct pblk_emeta
), GFP_KERNEL
);
861 goto fail_free_emeta
;
863 if (lm
->emeta_len
[0] > KMALLOC_MAX_CACHE_SIZE
) {
864 l_mg
->emeta_alloc_type
= PBLK_VMALLOC_META
;
866 emeta
->buf
= vmalloc(lm
->emeta_len
[0]);
869 goto fail_free_emeta
;
872 emeta
->nr_entries
= lm
->emeta_sec
[0];
873 l_mg
->eline_meta
[i
] = emeta
;
875 l_mg
->emeta_alloc_type
= PBLK_KMALLOC_META
;
877 emeta
->buf
= kmalloc(lm
->emeta_len
[0], GFP_KERNEL
);
880 goto fail_free_emeta
;
883 emeta
->nr_entries
= lm
->emeta_sec
[0];
884 l_mg
->eline_meta
[i
] = emeta
;
888 for (i
= 0; i
< l_mg
->nr_lines
; i
++)
889 l_mg
->vsc_list
[i
] = cpu_to_le32(EMPTY_ENTRY
);
891 bb_distance
= (geo
->all_luns
) * geo
->ws_opt
;
892 for (i
= 0; i
< lm
->sec_per_line
; i
+= bb_distance
)
893 bitmap_set(l_mg
->bb_template
, i
, geo
->ws_opt
);
899 if (l_mg
->emeta_alloc_type
== PBLK_VMALLOC_META
)
900 vfree(l_mg
->eline_meta
[i
]->buf
);
902 kfree(l_mg
->eline_meta
[i
]->buf
);
903 kfree(l_mg
->eline_meta
[i
]);
906 mempool_destroy(l_mg
->bitmap_pool
);
907 fail_destroy_bitmap_cache
:
908 kmem_cache_destroy(l_mg
->bitmap_cache
);
910 for (i
= 0; i
< PBLK_DATA_LINES
; i
++)
911 kfree(l_mg
->sline_meta
[i
]);
913 fail_free_bb_template
:
914 kfree(l_mg
->bb_template
);
916 kfree(l_mg
->vsc_list
);
921 static int pblk_line_meta_init(struct pblk
*pblk
)
923 struct nvm_tgt_dev
*dev
= pblk
->dev
;
924 struct nvm_geo
*geo
= &dev
->geo
;
925 struct pblk_line_meta
*lm
= &pblk
->lm
;
926 unsigned int smeta_len
, emeta_len
;
929 lm
->sec_per_line
= geo
->clba
* geo
->all_luns
;
930 lm
->blk_per_line
= geo
->all_luns
;
931 lm
->blk_bitmap_len
= BITS_TO_LONGS(geo
->all_luns
) * sizeof(long);
932 lm
->sec_bitmap_len
= BITS_TO_LONGS(lm
->sec_per_line
) * sizeof(long);
933 lm
->lun_bitmap_len
= BITS_TO_LONGS(geo
->all_luns
) * sizeof(long);
934 lm
->mid_thrs
= lm
->sec_per_line
/ 2;
935 lm
->high_thrs
= lm
->sec_per_line
/ 4;
936 lm
->meta_distance
= (geo
->all_luns
/ 2) * pblk
->min_write_pgs
;
938 /* Calculate necessary pages for smeta. See comment over struct
939 * line_smeta definition
943 lm
->smeta_sec
= i
* geo
->ws_opt
;
944 lm
->smeta_len
= lm
->smeta_sec
* geo
->csecs
;
946 smeta_len
= sizeof(struct line_smeta
) + lm
->lun_bitmap_len
;
947 if (smeta_len
> lm
->smeta_len
) {
952 /* Calculate necessary pages for emeta. See comment over struct
953 * line_emeta definition
957 lm
->emeta_sec
[0] = i
* geo
->ws_opt
;
958 lm
->emeta_len
[0] = lm
->emeta_sec
[0] * geo
->csecs
;
960 emeta_len
= calc_emeta_len(pblk
);
961 if (emeta_len
> lm
->emeta_len
[0]) {
966 lm
->emeta_bb
= geo
->all_luns
> i
? geo
->all_luns
- i
: 0;
968 lm
->min_blk_line
= 1;
969 if (geo
->all_luns
> 1)
970 lm
->min_blk_line
+= DIV_ROUND_UP(lm
->smeta_sec
+
971 lm
->emeta_sec
[0], geo
->clba
);
973 if (lm
->min_blk_line
> lm
->blk_per_line
) {
974 pblk_err(pblk
, "config. not supported. Min. LUN in line:%d\n",
982 static int pblk_lines_init(struct pblk
*pblk
)
984 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
985 struct pblk_line
*line
;
987 long nr_free_chks
= 0;
990 ret
= pblk_line_meta_init(pblk
);
994 ret
= pblk_line_mg_init(pblk
);
998 ret
= pblk_luns_init(pblk
);
1000 goto fail_free_meta
;
1002 chunk_meta
= pblk_get_chunk_meta(pblk
);
1003 if (IS_ERR(chunk_meta
)) {
1004 ret
= PTR_ERR(chunk_meta
);
1005 goto fail_free_luns
;
1008 pblk
->lines
= kcalloc(l_mg
->nr_lines
, sizeof(struct pblk_line
),
1012 goto fail_free_chunk_meta
;
1015 for (i
= 0; i
< l_mg
->nr_lines
; i
++) {
1016 line
= &pblk
->lines
[i
];
1018 ret
= pblk_alloc_line_meta(pblk
, line
);
1020 goto fail_free_lines
;
1022 nr_free_chks
+= pblk_setup_line_meta(pblk
, line
, chunk_meta
, i
);
1024 trace_pblk_line_state(pblk_disk_name(pblk
), line
->id
,
1028 if (!nr_free_chks
) {
1029 pblk_err(pblk
, "too many bad blocks prevent for sane instance\n");
1031 goto fail_free_lines
;
1034 pblk_set_provision(pblk
, nr_free_chks
);
1041 pblk_line_meta_free(l_mg
, &pblk
->lines
[i
]);
1043 fail_free_chunk_meta
:
1048 pblk_line_mg_free(pblk
);
1053 static int pblk_writer_init(struct pblk
*pblk
)
1055 pblk
->writer_ts
= kthread_create(pblk_write_ts
, pblk
, "pblk-writer-t");
1056 if (IS_ERR(pblk
->writer_ts
)) {
1057 int err
= PTR_ERR(pblk
->writer_ts
);
1060 pblk_err(pblk
, "could not allocate writer kthread (%d)\n",
1065 timer_setup(&pblk
->wtimer
, pblk_write_timer_fn
, 0);
1066 mod_timer(&pblk
->wtimer
, jiffies
+ msecs_to_jiffies(100));
1071 static void pblk_writer_stop(struct pblk
*pblk
)
1073 /* The pipeline must be stopped and the write buffer emptied before the
1074 * write thread is stopped
1076 WARN(pblk_rb_read_count(&pblk
->rwb
),
1077 "Stopping not fully persisted write buffer\n");
1079 WARN(pblk_rb_sync_count(&pblk
->rwb
),
1080 "Stopping not fully synced write buffer\n");
1082 del_timer_sync(&pblk
->wtimer
);
1083 if (pblk
->writer_ts
)
1084 kthread_stop(pblk
->writer_ts
);
1087 static void pblk_free(struct pblk
*pblk
)
1089 pblk_lines_free(pblk
);
1090 pblk_l2p_free(pblk
);
1091 pblk_rwb_free(pblk
);
1092 pblk_core_free(pblk
);
1097 static void pblk_tear_down(struct pblk
*pblk
, bool graceful
)
1100 __pblk_pipeline_flush(pblk
);
1101 __pblk_pipeline_stop(pblk
);
1102 pblk_writer_stop(pblk
);
1103 pblk_rb_sync_l2p(&pblk
->rwb
);
1104 pblk_rl_free(&pblk
->rl
);
1106 pblk_debug(pblk
, "consistent tear down (graceful:%d)\n", graceful
);
1109 static void pblk_exit(void *private, bool graceful
)
1111 struct pblk
*pblk
= private;
1113 pblk_gc_exit(pblk
, graceful
);
1114 pblk_tear_down(pblk
, graceful
);
1116 #ifdef CONFIG_NVM_PBLK_DEBUG
1117 pblk_info(pblk
, "exit: L2P CRC: %x\n", pblk_l2p_crc(pblk
));
1123 static sector_t
pblk_capacity(void *private)
1125 struct pblk
*pblk
= private;
1127 return pblk
->capacity
* NR_PHY_IN_LOG
;
1130 static void *pblk_init(struct nvm_tgt_dev
*dev
, struct gendisk
*tdisk
,
1133 struct nvm_geo
*geo
= &dev
->geo
;
1134 struct request_queue
*bqueue
= dev
->q
;
1135 struct request_queue
*tqueue
= tdisk
->queue
;
1139 pblk
= kzalloc(sizeof(struct pblk
), GFP_KERNEL
);
1141 return ERR_PTR(-ENOMEM
);
1145 pblk
->state
= PBLK_STATE_RUNNING
;
1146 trace_pblk_state(pblk_disk_name(pblk
), pblk
->state
);
1147 pblk
->gc
.gc_enabled
= 0;
1149 if (!(geo
->version
== NVM_OCSSD_SPEC_12
||
1150 geo
->version
== NVM_OCSSD_SPEC_20
)) {
1151 pblk_err(pblk
, "OCSSD version not supported (%u)\n",
1154 return ERR_PTR(-EINVAL
);
1157 spin_lock_init(&pblk
->resubmit_lock
);
1158 spin_lock_init(&pblk
->trans_lock
);
1159 spin_lock_init(&pblk
->lock
);
1161 #ifdef CONFIG_NVM_PBLK_DEBUG
1162 atomic_long_set(&pblk
->inflight_writes
, 0);
1163 atomic_long_set(&pblk
->padded_writes
, 0);
1164 atomic_long_set(&pblk
->padded_wb
, 0);
1165 atomic_long_set(&pblk
->req_writes
, 0);
1166 atomic_long_set(&pblk
->sub_writes
, 0);
1167 atomic_long_set(&pblk
->sync_writes
, 0);
1168 atomic_long_set(&pblk
->inflight_reads
, 0);
1169 atomic_long_set(&pblk
->cache_reads
, 0);
1170 atomic_long_set(&pblk
->sync_reads
, 0);
1171 atomic_long_set(&pblk
->recov_writes
, 0);
1172 atomic_long_set(&pblk
->recov_writes
, 0);
1173 atomic_long_set(&pblk
->recov_gc_writes
, 0);
1174 atomic_long_set(&pblk
->recov_gc_reads
, 0);
1177 atomic_long_set(&pblk
->read_failed
, 0);
1178 atomic_long_set(&pblk
->read_empty
, 0);
1179 atomic_long_set(&pblk
->read_high_ecc
, 0);
1180 atomic_long_set(&pblk
->read_failed_gc
, 0);
1181 atomic_long_set(&pblk
->write_failed
, 0);
1182 atomic_long_set(&pblk
->erase_failed
, 0);
1184 ret
= pblk_core_init(pblk
);
1186 pblk_err(pblk
, "could not initialize core\n");
1190 ret
= pblk_lines_init(pblk
);
1192 pblk_err(pblk
, "could not initialize lines\n");
1193 goto fail_free_core
;
1196 ret
= pblk_rwb_init(pblk
);
1198 pblk_err(pblk
, "could not initialize write buffer\n");
1199 goto fail_free_lines
;
1202 ret
= pblk_l2p_init(pblk
, flags
& NVM_TARGET_FACTORY
);
1204 pblk_err(pblk
, "could not initialize maps\n");
1208 ret
= pblk_writer_init(pblk
);
1211 pblk_err(pblk
, "could not initialize write thread\n");
1215 ret
= pblk_gc_init(pblk
);
1217 pblk_err(pblk
, "could not initialize gc\n");
1218 goto fail_stop_writer
;
1221 /* inherit the size from the underlying device */
1222 blk_queue_logical_block_size(tqueue
, queue_physical_block_size(bqueue
));
1223 blk_queue_max_hw_sectors(tqueue
, queue_max_hw_sectors(bqueue
));
1225 blk_queue_write_cache(tqueue
, true, false);
1227 tqueue
->limits
.discard_granularity
= geo
->clba
* geo
->csecs
;
1228 tqueue
->limits
.discard_alignment
= 0;
1229 blk_queue_max_discard_sectors(tqueue
, UINT_MAX
>> 9);
1230 blk_queue_flag_set(QUEUE_FLAG_DISCARD
, tqueue
);
1232 pblk_info(pblk
, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
1233 geo
->all_luns
, pblk
->l_mg
.nr_lines
,
1234 (unsigned long long)pblk
->rl
.nr_secs
,
1235 pblk
->rwb
.nr_entries
);
1237 wake_up_process(pblk
->writer_ts
);
1239 /* Check if we need to start GC */
1240 pblk_gc_should_kick(pblk
);
1245 pblk_writer_stop(pblk
);
1247 pblk_l2p_free(pblk
);
1249 pblk_rwb_free(pblk
);
1251 pblk_lines_free(pblk
);
1253 pblk_core_free(pblk
);
1256 return ERR_PTR(ret
);
1259 /* physical block device target */
1260 static struct nvm_tgt_type tt_pblk
= {
1262 .version
= {1, 0, 0},
1264 .make_rq
= pblk_make_rq
,
1265 .capacity
= pblk_capacity
,
1270 .sysfs_init
= pblk_sysfs_init
,
1271 .sysfs_exit
= pblk_sysfs_exit
,
1272 .owner
= THIS_MODULE
,
1275 static int __init
pblk_module_init(void)
1279 ret
= bioset_init(&pblk_bio_set
, BIO_POOL_SIZE
, 0, 0);
1282 ret
= nvm_register_tgt_type(&tt_pblk
);
1284 bioset_exit(&pblk_bio_set
);
1288 static void pblk_module_exit(void)
1290 bioset_exit(&pblk_bio_set
);
1291 nvm_unregister_tgt_type(&tt_pblk
);
1294 module_init(pblk_module_init
);
1295 module_exit(pblk_module_exit
);
1296 MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
1297 MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
1298 MODULE_LICENSE("GPL v2");
1299 MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");