2 * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
3 * Copyright (C) 2016 CNEX Labs
4 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
5 * Matias Bjorling <matias@cnexlabs.com>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * Implementation of a physical block-device target for Open-channel SSDs.
18 * pblk-init.c - pblk's initialization.
23 static struct kmem_cache
*pblk_blk_ws_cache
, *pblk_rec_cache
, *pblk_g_rq_cache
,
24 *pblk_w_rq_cache
, *pblk_line_meta_cache
;
25 static DECLARE_RWSEM(pblk_lock
);
26 struct bio_set
*pblk_bio_set
;
28 static int pblk_rw_io(struct request_queue
*q
, struct pblk
*pblk
,
33 /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
34 * constraint. Writes can be of arbitrary size.
36 if (bio_data_dir(bio
) == READ
) {
37 blk_queue_split(q
, &bio
);
38 ret
= pblk_submit_read(pblk
, bio
);
39 if (ret
== NVM_IO_DONE
&& bio_flagged(bio
, BIO_CLONED
))
45 /* Prevent deadlock in the case of a modest LUN configuration and large
46 * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
47 * available for user I/O.
49 if (unlikely(pblk_get_secs(bio
) >= pblk_rl_sysfs_rate_show(&pblk
->rl
)))
50 blk_queue_split(q
, &bio
);
52 return pblk_write_to_cache(pblk
, bio
, PBLK_IOTYPE_USER
);
55 static blk_qc_t
pblk_make_rq(struct request_queue
*q
, struct bio
*bio
)
57 struct pblk
*pblk
= q
->queuedata
;
59 if (bio_op(bio
) == REQ_OP_DISCARD
) {
60 pblk_discard(pblk
, bio
);
61 if (!(bio
->bi_opf
& REQ_PREFLUSH
)) {
67 switch (pblk_rw_io(q
, pblk
, bio
)) {
79 static void pblk_l2p_free(struct pblk
*pblk
)
81 vfree(pblk
->trans_map
);
84 static int pblk_l2p_init(struct pblk
*pblk
)
90 if (pblk
->ppaf_bitsize
< 32)
93 pblk
->trans_map
= vmalloc(entry_size
* pblk
->rl
.nr_secs
);
97 pblk_ppa_set_empty(&ppa
);
99 for (i
= 0; i
< pblk
->rl
.nr_secs
; i
++)
100 pblk_trans_map_set(pblk
, i
, ppa
);
105 static void pblk_rwb_free(struct pblk
*pblk
)
107 if (pblk_rb_tear_down_check(&pblk
->rwb
))
108 pr_err("pblk: write buffer error on tear down\n");
110 pblk_rb_data_free(&pblk
->rwb
);
111 vfree(pblk_rb_entries_ref(&pblk
->rwb
));
114 static int pblk_rwb_init(struct pblk
*pblk
)
116 struct nvm_tgt_dev
*dev
= pblk
->dev
;
117 struct nvm_geo
*geo
= &dev
->geo
;
118 struct pblk_rb_entry
*entries
;
119 unsigned long nr_entries
;
120 unsigned int power_size
, power_seg_sz
;
122 nr_entries
= pblk_rb_calculate_size(pblk
->pgs_in_buffer
);
124 entries
= vzalloc(nr_entries
* sizeof(struct pblk_rb_entry
));
128 power_size
= get_count_order(nr_entries
);
129 power_seg_sz
= get_count_order(geo
->sec_size
);
131 return pblk_rb_init(&pblk
->rwb
, entries
, power_size
, power_seg_sz
);
134 /* Minimum pages needed within a lun */
135 #define PAGE_POOL_SIZE 16
136 #define ADDR_POOL_SIZE 64
138 static int pblk_set_ppaf(struct pblk
*pblk
)
140 struct nvm_tgt_dev
*dev
= pblk
->dev
;
141 struct nvm_geo
*geo
= &dev
->geo
;
142 struct nvm_addr_format ppaf
= geo
->ppaf
;
145 /* Re-calculate channel and lun format to adapt to configuration */
146 power_len
= get_count_order(geo
->nr_chnls
);
147 if (1 << power_len
!= geo
->nr_chnls
) {
148 pr_err("pblk: supports only power-of-two channel config.\n");
151 ppaf
.ch_len
= power_len
;
153 power_len
= get_count_order(geo
->luns_per_chnl
);
154 if (1 << power_len
!= geo
->luns_per_chnl
) {
155 pr_err("pblk: supports only power-of-two LUN config.\n");
158 ppaf
.lun_len
= power_len
;
160 pblk
->ppaf
.sec_offset
= 0;
161 pblk
->ppaf
.pln_offset
= ppaf
.sect_len
;
162 pblk
->ppaf
.ch_offset
= pblk
->ppaf
.pln_offset
+ ppaf
.pln_len
;
163 pblk
->ppaf
.lun_offset
= pblk
->ppaf
.ch_offset
+ ppaf
.ch_len
;
164 pblk
->ppaf
.pg_offset
= pblk
->ppaf
.lun_offset
+ ppaf
.lun_len
;
165 pblk
->ppaf
.blk_offset
= pblk
->ppaf
.pg_offset
+ ppaf
.pg_len
;
166 pblk
->ppaf
.sec_mask
= (1ULL << ppaf
.sect_len
) - 1;
167 pblk
->ppaf
.pln_mask
= ((1ULL << ppaf
.pln_len
) - 1) <<
168 pblk
->ppaf
.pln_offset
;
169 pblk
->ppaf
.ch_mask
= ((1ULL << ppaf
.ch_len
) - 1) <<
170 pblk
->ppaf
.ch_offset
;
171 pblk
->ppaf
.lun_mask
= ((1ULL << ppaf
.lun_len
) - 1) <<
172 pblk
->ppaf
.lun_offset
;
173 pblk
->ppaf
.pg_mask
= ((1ULL << ppaf
.pg_len
) - 1) <<
174 pblk
->ppaf
.pg_offset
;
175 pblk
->ppaf
.blk_mask
= ((1ULL << ppaf
.blk_len
) - 1) <<
176 pblk
->ppaf
.blk_offset
;
178 pblk
->ppaf_bitsize
= pblk
->ppaf
.blk_offset
+ ppaf
.blk_len
;
183 static int pblk_init_global_caches(struct pblk
*pblk
)
185 char cache_name
[PBLK_CACHE_NAME_LEN
];
187 down_write(&pblk_lock
);
188 pblk_blk_ws_cache
= kmem_cache_create("pblk_blk_ws",
189 sizeof(struct pblk_line_ws
), 0, 0, NULL
);
190 if (!pblk_blk_ws_cache
) {
191 up_write(&pblk_lock
);
195 pblk_rec_cache
= kmem_cache_create("pblk_rec",
196 sizeof(struct pblk_rec_ctx
), 0, 0, NULL
);
197 if (!pblk_rec_cache
) {
198 kmem_cache_destroy(pblk_blk_ws_cache
);
199 up_write(&pblk_lock
);
203 pblk_g_rq_cache
= kmem_cache_create("pblk_g_rq", pblk_g_rq_size
,
205 if (!pblk_g_rq_cache
) {
206 kmem_cache_destroy(pblk_blk_ws_cache
);
207 kmem_cache_destroy(pblk_rec_cache
);
208 up_write(&pblk_lock
);
212 pblk_w_rq_cache
= kmem_cache_create("pblk_w_rq", pblk_w_rq_size
,
214 if (!pblk_w_rq_cache
) {
215 kmem_cache_destroy(pblk_blk_ws_cache
);
216 kmem_cache_destroy(pblk_rec_cache
);
217 kmem_cache_destroy(pblk_g_rq_cache
);
218 up_write(&pblk_lock
);
222 snprintf(cache_name
, sizeof(cache_name
), "pblk_line_m_%s",
223 pblk
->disk
->disk_name
);
224 pblk_line_meta_cache
= kmem_cache_create(cache_name
,
225 pblk
->lm
.sec_bitmap_len
, 0, 0, NULL
);
226 if (!pblk_line_meta_cache
) {
227 kmem_cache_destroy(pblk_blk_ws_cache
);
228 kmem_cache_destroy(pblk_rec_cache
);
229 kmem_cache_destroy(pblk_g_rq_cache
);
230 kmem_cache_destroy(pblk_w_rq_cache
);
231 up_write(&pblk_lock
);
234 up_write(&pblk_lock
);
239 static int pblk_core_init(struct pblk
*pblk
)
241 struct nvm_tgt_dev
*dev
= pblk
->dev
;
242 struct nvm_geo
*geo
= &dev
->geo
;
244 pblk
->pgs_in_buffer
= NVM_MEM_PAGE_WRITE
* geo
->sec_per_pg
*
245 geo
->nr_planes
* geo
->nr_luns
;
247 if (pblk_init_global_caches(pblk
))
250 pblk
->page_pool
= mempool_create_page_pool(PAGE_POOL_SIZE
, 0);
251 if (!pblk
->page_pool
)
254 pblk
->line_ws_pool
= mempool_create_slab_pool(PBLK_WS_POOL_SIZE
,
256 if (!pblk
->line_ws_pool
)
259 pblk
->rec_pool
= mempool_create_slab_pool(geo
->nr_luns
, pblk_rec_cache
);
261 goto free_blk_ws_pool
;
263 pblk
->g_rq_pool
= mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE
,
265 if (!pblk
->g_rq_pool
)
268 pblk
->w_rq_pool
= mempool_create_slab_pool(geo
->nr_luns
* 2,
270 if (!pblk
->w_rq_pool
)
273 pblk
->line_meta_pool
=
274 mempool_create_slab_pool(PBLK_META_POOL_SIZE
,
275 pblk_line_meta_cache
);
276 if (!pblk
->line_meta_pool
)
279 pblk
->close_wq
= alloc_workqueue("pblk-close-wq",
280 WQ_MEM_RECLAIM
| WQ_UNBOUND
, PBLK_NR_CLOSE_JOBS
);
282 goto free_line_meta_pool
;
284 pblk
->bb_wq
= alloc_workqueue("pblk-bb-wq",
285 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 0);
289 if (pblk_set_ppaf(pblk
))
292 if (pblk_rwb_init(pblk
))
295 INIT_LIST_HEAD(&pblk
->compl_list
);
299 destroy_workqueue(pblk
->bb_wq
);
301 destroy_workqueue(pblk
->close_wq
);
303 mempool_destroy(pblk
->line_meta_pool
);
305 mempool_destroy(pblk
->w_rq_pool
);
307 mempool_destroy(pblk
->g_rq_pool
);
309 mempool_destroy(pblk
->rec_pool
);
311 mempool_destroy(pblk
->line_ws_pool
);
313 mempool_destroy(pblk
->page_pool
);
317 static void pblk_core_free(struct pblk
*pblk
)
320 destroy_workqueue(pblk
->close_wq
);
323 destroy_workqueue(pblk
->bb_wq
);
325 mempool_destroy(pblk
->page_pool
);
326 mempool_destroy(pblk
->line_ws_pool
);
327 mempool_destroy(pblk
->rec_pool
);
328 mempool_destroy(pblk
->g_rq_pool
);
329 mempool_destroy(pblk
->w_rq_pool
);
330 mempool_destroy(pblk
->line_meta_pool
);
332 kmem_cache_destroy(pblk_blk_ws_cache
);
333 kmem_cache_destroy(pblk_rec_cache
);
334 kmem_cache_destroy(pblk_g_rq_cache
);
335 kmem_cache_destroy(pblk_w_rq_cache
);
336 kmem_cache_destroy(pblk_line_meta_cache
);
339 static void pblk_luns_free(struct pblk
*pblk
)
344 static void pblk_free_line_bitmaps(struct pblk_line
*line
)
346 kfree(line
->blk_bitmap
);
347 kfree(line
->erase_bitmap
);
350 static void pblk_lines_free(struct pblk
*pblk
)
352 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
353 struct pblk_line
*line
;
356 spin_lock(&l_mg
->free_lock
);
357 for (i
= 0; i
< l_mg
->nr_lines
; i
++) {
358 line
= &pblk
->lines
[i
];
360 pblk_line_free(pblk
, line
);
361 pblk_free_line_bitmaps(line
);
363 spin_unlock(&l_mg
->free_lock
);
366 static void pblk_line_meta_free(struct pblk
*pblk
)
368 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
371 kfree(l_mg
->bb_template
);
373 kfree(l_mg
->vsc_list
);
375 spin_lock(&l_mg
->free_lock
);
376 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
377 kfree(l_mg
->sline_meta
[i
]);
378 pblk_mfree(l_mg
->eline_meta
[i
]->buf
, l_mg
->emeta_alloc_type
);
379 kfree(l_mg
->eline_meta
[i
]);
381 spin_unlock(&l_mg
->free_lock
);
386 static int pblk_bb_discovery(struct nvm_tgt_dev
*dev
, struct pblk_lun
*rlun
)
388 struct nvm_geo
*geo
= &dev
->geo
;
393 nr_blks
= geo
->blks_per_lun
* geo
->plane_mode
;
394 blks
= kmalloc(nr_blks
, GFP_KERNEL
);
399 ppa
.g
.ch
= rlun
->bppa
.g
.ch
;
400 ppa
.g
.lun
= rlun
->bppa
.g
.lun
;
402 ret
= nvm_get_tgt_bb_tbl(dev
, ppa
, blks
);
406 nr_blks
= nvm_bb_tbl_fold(dev
->parent
, blks
, nr_blks
);
412 rlun
->bb_list
= blks
;
420 static int pblk_bb_line(struct pblk
*pblk
, struct pblk_line
*line
,
423 struct nvm_tgt_dev
*dev
= pblk
->dev
;
424 struct nvm_geo
*geo
= &dev
->geo
;
425 struct pblk_lun
*rlun
;
429 for (i
= 0; i
< blk_per_line
; i
++) {
430 rlun
= &pblk
->luns
[i
];
431 if (rlun
->bb_list
[line
->id
] == NVM_BLK_T_FREE
)
434 set_bit(pblk_ppa_to_pos(geo
, rlun
->bppa
), line
->blk_bitmap
);
441 static int pblk_alloc_line_bitmaps(struct pblk
*pblk
, struct pblk_line
*line
)
443 struct pblk_line_meta
*lm
= &pblk
->lm
;
445 line
->blk_bitmap
= kzalloc(lm
->blk_bitmap_len
, GFP_KERNEL
);
446 if (!line
->blk_bitmap
)
449 line
->erase_bitmap
= kzalloc(lm
->blk_bitmap_len
, GFP_KERNEL
);
450 if (!line
->erase_bitmap
) {
451 kfree(line
->blk_bitmap
);
458 static int pblk_luns_init(struct pblk
*pblk
, struct ppa_addr
*luns
)
460 struct nvm_tgt_dev
*dev
= pblk
->dev
;
461 struct nvm_geo
*geo
= &dev
->geo
;
462 struct pblk_lun
*rlun
;
465 /* TODO: Implement unbalanced LUN support */
466 if (geo
->luns_per_chnl
< 0) {
467 pr_err("pblk: unbalanced LUN config.\n");
471 pblk
->luns
= kcalloc(geo
->nr_luns
, sizeof(struct pblk_lun
), GFP_KERNEL
);
475 for (i
= 0; i
< geo
->nr_luns
; i
++) {
476 /* Stripe across channels */
477 int ch
= i
% geo
->nr_chnls
;
478 int lun_raw
= i
/ geo
->nr_chnls
;
479 int lunid
= lun_raw
+ ch
* geo
->luns_per_chnl
;
481 rlun
= &pblk
->luns
[i
];
482 rlun
->bppa
= luns
[lunid
];
484 sema_init(&rlun
->wr_sem
, 1);
486 ret
= pblk_bb_discovery(dev
, rlun
);
489 kfree(pblk
->luns
[i
].bb_list
);
497 static int pblk_lines_configure(struct pblk
*pblk
, int flags
)
499 struct pblk_line
*line
= NULL
;
502 if (!(flags
& NVM_TARGET_FACTORY
)) {
503 line
= pblk_recov_l2p(pblk
);
505 pr_err("pblk: could not recover l2p table\n");
511 /* Configure next line for user data */
512 line
= pblk_line_get_first_data(pblk
);
514 pr_err("pblk: line list corrupted\n");
522 /* See comment over struct line_emeta definition */
523 static unsigned int calc_emeta_len(struct pblk
*pblk
)
525 struct pblk_line_meta
*lm
= &pblk
->lm
;
526 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
527 struct nvm_tgt_dev
*dev
= pblk
->dev
;
528 struct nvm_geo
*geo
= &dev
->geo
;
530 /* Round to sector size so that lba_list starts on its own sector */
531 lm
->emeta_sec
[1] = DIV_ROUND_UP(
532 sizeof(struct line_emeta
) + lm
->blk_bitmap_len
,
534 lm
->emeta_len
[1] = lm
->emeta_sec
[1] * geo
->sec_size
;
536 /* Round to sector size so that vsc_list starts on its own sector */
537 lm
->dsec_per_line
= lm
->sec_per_line
- lm
->emeta_sec
[0];
538 lm
->emeta_sec
[2] = DIV_ROUND_UP(lm
->dsec_per_line
* sizeof(u64
),
540 lm
->emeta_len
[2] = lm
->emeta_sec
[2] * geo
->sec_size
;
542 lm
->emeta_sec
[3] = DIV_ROUND_UP(l_mg
->nr_lines
* sizeof(u32
),
544 lm
->emeta_len
[3] = lm
->emeta_sec
[3] * geo
->sec_size
;
546 lm
->vsc_list_len
= l_mg
->nr_lines
* sizeof(u32
);
548 return (lm
->emeta_len
[1] + lm
->emeta_len
[2] + lm
->emeta_len
[3]);
551 static void pblk_set_provision(struct pblk
*pblk
, long nr_free_blks
)
553 struct nvm_tgt_dev
*dev
= pblk
->dev
;
554 struct nvm_geo
*geo
= &dev
->geo
;
555 sector_t provisioned
;
559 provisioned
= nr_free_blks
;
560 provisioned
*= (100 - pblk
->over_pct
);
561 sector_div(provisioned
, 100);
563 /* Internally pblk manages all free blocks, but all calculations based
564 * on user capacity consider only provisioned blocks
566 pblk
->rl
.total_blocks
= nr_free_blks
;
567 pblk
->rl
.nr_secs
= nr_free_blks
* geo
->sec_per_blk
;
568 pblk
->capacity
= provisioned
* geo
->sec_per_blk
;
569 atomic_set(&pblk
->rl
.free_blocks
, nr_free_blks
);
572 static int pblk_lines_alloc_metadata(struct pblk
*pblk
)
574 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
575 struct pblk_line_meta
*lm
= &pblk
->lm
;
578 /* smeta is always small enough to fit on a kmalloc memory allocation,
579 * emeta depends on the number of LUNs allocated to the pblk instance
581 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
582 l_mg
->sline_meta
[i
] = kmalloc(lm
->smeta_len
, GFP_KERNEL
);
583 if (!l_mg
->sline_meta
[i
])
584 goto fail_free_smeta
;
587 /* emeta allocates three different buffers for managing metadata with
588 * in-memory and in-media layouts
590 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
591 struct pblk_emeta
*emeta
;
593 emeta
= kmalloc(sizeof(struct pblk_emeta
), GFP_KERNEL
);
595 goto fail_free_emeta
;
597 if (lm
->emeta_len
[0] > KMALLOC_MAX_CACHE_SIZE
) {
598 l_mg
->emeta_alloc_type
= PBLK_VMALLOC_META
;
600 emeta
->buf
= vmalloc(lm
->emeta_len
[0]);
603 goto fail_free_emeta
;
606 emeta
->nr_entries
= lm
->emeta_sec
[0];
607 l_mg
->eline_meta
[i
] = emeta
;
609 l_mg
->emeta_alloc_type
= PBLK_KMALLOC_META
;
611 emeta
->buf
= kmalloc(lm
->emeta_len
[0], GFP_KERNEL
);
614 goto fail_free_emeta
;
617 emeta
->nr_entries
= lm
->emeta_sec
[0];
618 l_mg
->eline_meta
[i
] = emeta
;
622 l_mg
->vsc_list
= kcalloc(l_mg
->nr_lines
, sizeof(__le32
), GFP_KERNEL
);
624 goto fail_free_emeta
;
626 for (i
= 0; i
< l_mg
->nr_lines
; i
++)
627 l_mg
->vsc_list
[i
] = cpu_to_le32(EMPTY_ENTRY
);
633 vfree(l_mg
->eline_meta
[i
]->buf
);
634 kfree(l_mg
->eline_meta
[i
]);
638 for (i
= 0; i
< PBLK_DATA_LINES
; i
++)
639 kfree(l_mg
->sline_meta
[i
]);
644 static int pblk_lines_init(struct pblk
*pblk
)
646 struct nvm_tgt_dev
*dev
= pblk
->dev
;
647 struct nvm_geo
*geo
= &dev
->geo
;
648 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
649 struct pblk_line_meta
*lm
= &pblk
->lm
;
650 struct pblk_line
*line
;
651 unsigned int smeta_len
, emeta_len
;
652 long nr_bad_blks
, nr_free_blks
;
653 int bb_distance
, max_write_ppas
, mod
;
656 pblk
->min_write_pgs
= geo
->sec_per_pl
* (geo
->sec_size
/ PAGE_SIZE
);
657 max_write_ppas
= pblk
->min_write_pgs
* geo
->nr_luns
;
658 pblk
->max_write_pgs
= (max_write_ppas
< nvm_max_phys_sects(dev
)) ?
659 max_write_ppas
: nvm_max_phys_sects(dev
);
660 pblk_set_sec_per_write(pblk
, pblk
->min_write_pgs
);
662 if (pblk
->max_write_pgs
> PBLK_MAX_REQ_ADDRS
) {
663 pr_err("pblk: cannot support device max_phys_sect\n");
667 div_u64_rem(geo
->sec_per_blk
, pblk
->min_write_pgs
, &mod
);
669 pr_err("pblk: bad configuration of sectors/pages\n");
673 l_mg
->nr_lines
= geo
->blks_per_lun
;
674 l_mg
->log_line
= l_mg
->data_line
= NULL
;
675 l_mg
->l_seq_nr
= l_mg
->d_seq_nr
= 0;
676 l_mg
->nr_free_lines
= 0;
677 bitmap_zero(&l_mg
->meta_bitmap
, PBLK_DATA_LINES
);
679 lm
->sec_per_line
= geo
->sec_per_blk
* geo
->nr_luns
;
680 lm
->blk_per_line
= geo
->nr_luns
;
681 lm
->blk_bitmap_len
= BITS_TO_LONGS(geo
->nr_luns
) * sizeof(long);
682 lm
->sec_bitmap_len
= BITS_TO_LONGS(lm
->sec_per_line
) * sizeof(long);
683 lm
->lun_bitmap_len
= BITS_TO_LONGS(geo
->nr_luns
) * sizeof(long);
684 lm
->high_thrs
= lm
->sec_per_line
/ 2;
685 lm
->mid_thrs
= lm
->sec_per_line
/ 4;
686 lm
->meta_distance
= (geo
->nr_luns
/ 2) * pblk
->min_write_pgs
;
688 /* Calculate necessary pages for smeta. See comment over struct
689 * line_smeta definition
693 lm
->smeta_sec
= i
* geo
->sec_per_pl
;
694 lm
->smeta_len
= lm
->smeta_sec
* geo
->sec_size
;
696 smeta_len
= sizeof(struct line_smeta
) + lm
->lun_bitmap_len
;
697 if (smeta_len
> lm
->smeta_len
) {
702 /* Calculate necessary pages for emeta. See comment over struct
703 * line_emeta definition
707 lm
->emeta_sec
[0] = i
* geo
->sec_per_pl
;
708 lm
->emeta_len
[0] = lm
->emeta_sec
[0] * geo
->sec_size
;
710 emeta_len
= calc_emeta_len(pblk
);
711 if (emeta_len
> lm
->emeta_len
[0]) {
716 lm
->emeta_bb
= geo
->nr_luns
- i
;
717 lm
->min_blk_line
= 1 + DIV_ROUND_UP(lm
->smeta_sec
+ lm
->emeta_sec
[0],
719 if (lm
->min_blk_line
> lm
->blk_per_line
) {
720 pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
726 ret
= pblk_lines_alloc_metadata(pblk
);
730 l_mg
->bb_template
= kzalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
731 if (!l_mg
->bb_template
) {
736 l_mg
->bb_aux
= kzalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
739 goto fail_free_bb_template
;
742 bb_distance
= (geo
->nr_luns
) * geo
->sec_per_pl
;
743 for (i
= 0; i
< lm
->sec_per_line
; i
+= bb_distance
)
744 bitmap_set(l_mg
->bb_template
, i
, geo
->sec_per_pl
);
746 INIT_LIST_HEAD(&l_mg
->free_list
);
747 INIT_LIST_HEAD(&l_mg
->corrupt_list
);
748 INIT_LIST_HEAD(&l_mg
->bad_list
);
749 INIT_LIST_HEAD(&l_mg
->gc_full_list
);
750 INIT_LIST_HEAD(&l_mg
->gc_high_list
);
751 INIT_LIST_HEAD(&l_mg
->gc_mid_list
);
752 INIT_LIST_HEAD(&l_mg
->gc_low_list
);
753 INIT_LIST_HEAD(&l_mg
->gc_empty_list
);
755 INIT_LIST_HEAD(&l_mg
->emeta_list
);
757 l_mg
->gc_lists
[0] = &l_mg
->gc_high_list
;
758 l_mg
->gc_lists
[1] = &l_mg
->gc_mid_list
;
759 l_mg
->gc_lists
[2] = &l_mg
->gc_low_list
;
761 spin_lock_init(&l_mg
->free_lock
);
762 spin_lock_init(&l_mg
->close_lock
);
763 spin_lock_init(&l_mg
->gc_lock
);
765 pblk
->lines
= kcalloc(l_mg
->nr_lines
, sizeof(struct pblk_line
),
769 goto fail_free_bb_aux
;
773 for (i
= 0; i
< l_mg
->nr_lines
; i
++) {
776 line
= &pblk
->lines
[i
];
780 line
->type
= PBLK_LINETYPE_FREE
;
781 line
->state
= PBLK_LINESTATE_FREE
;
782 line
->gc_group
= PBLK_LINEGC_NONE
;
783 line
->vsc
= &l_mg
->vsc_list
[i
];
784 spin_lock_init(&line
->lock
);
786 ret
= pblk_alloc_line_bitmaps(pblk
, line
);
788 goto fail_free_lines
;
790 nr_bad_blks
= pblk_bb_line(pblk
, line
, lm
->blk_per_line
);
791 if (nr_bad_blks
< 0 || nr_bad_blks
> lm
->blk_per_line
) {
792 pblk_free_line_bitmaps(line
);
794 goto fail_free_lines
;
797 blk_in_line
= lm
->blk_per_line
- nr_bad_blks
;
798 if (blk_in_line
< lm
->min_blk_line
) {
799 line
->state
= PBLK_LINESTATE_BAD
;
800 list_add_tail(&line
->list
, &l_mg
->bad_list
);
804 nr_free_blks
+= blk_in_line
;
805 atomic_set(&line
->blk_in_line
, blk_in_line
);
807 l_mg
->nr_free_lines
++;
808 list_add_tail(&line
->list
, &l_mg
->free_list
);
811 pblk_set_provision(pblk
, nr_free_blks
);
813 /* Cleanup per-LUN bad block lists - managed within lines on run-time */
814 for (i
= 0; i
< geo
->nr_luns
; i
++)
815 kfree(pblk
->luns
[i
].bb_list
);
820 pblk_free_line_bitmaps(&pblk
->lines
[i
]);
823 fail_free_bb_template
:
824 kfree(l_mg
->bb_template
);
826 pblk_line_meta_free(pblk
);
828 for (i
= 0; i
< geo
->nr_luns
; i
++)
829 kfree(pblk
->luns
[i
].bb_list
);
834 static int pblk_writer_init(struct pblk
*pblk
)
836 setup_timer(&pblk
->wtimer
, pblk_write_timer_fn
, (unsigned long)pblk
);
837 mod_timer(&pblk
->wtimer
, jiffies
+ msecs_to_jiffies(100));
839 pblk
->writer_ts
= kthread_create(pblk_write_ts
, pblk
, "pblk-writer-t");
840 if (IS_ERR(pblk
->writer_ts
)) {
841 pr_err("pblk: could not allocate writer kthread\n");
842 return PTR_ERR(pblk
->writer_ts
);
848 static void pblk_writer_stop(struct pblk
*pblk
)
850 /* The pipeline must be stopped and the write buffer emptied before the
851 * write thread is stopped
853 WARN(pblk_rb_read_count(&pblk
->rwb
),
854 "Stopping not fully persisted write buffer\n");
856 WARN(pblk_rb_sync_count(&pblk
->rwb
),
857 "Stopping not fully synced write buffer\n");
860 kthread_stop(pblk
->writer_ts
);
861 del_timer(&pblk
->wtimer
);
864 static void pblk_free(struct pblk
*pblk
)
866 pblk_luns_free(pblk
);
867 pblk_lines_free(pblk
);
868 pblk_line_meta_free(pblk
);
869 pblk_core_free(pblk
);
875 static void pblk_tear_down(struct pblk
*pblk
)
877 pblk_pipeline_stop(pblk
);
878 pblk_writer_stop(pblk
);
879 pblk_rb_sync_l2p(&pblk
->rwb
);
881 pblk_rl_free(&pblk
->rl
);
883 pr_debug("pblk: consistent tear down\n");
886 static void pblk_exit(void *private)
888 struct pblk
*pblk
= private;
890 down_write(&pblk_lock
);
892 pblk_tear_down(pblk
);
894 up_write(&pblk_lock
);
897 static sector_t
pblk_capacity(void *private)
899 struct pblk
*pblk
= private;
901 return pblk
->capacity
* NR_PHY_IN_LOG
;
904 static void *pblk_init(struct nvm_tgt_dev
*dev
, struct gendisk
*tdisk
,
907 struct nvm_geo
*geo
= &dev
->geo
;
908 struct request_queue
*bqueue
= dev
->q
;
909 struct request_queue
*tqueue
= tdisk
->queue
;
913 if (dev
->identity
.dom
& NVM_RSP_L2P
) {
914 pr_err("pblk: device-side L2P table not supported. (%x)\n",
916 return ERR_PTR(-EINVAL
);
919 pblk
= kzalloc(sizeof(struct pblk
), GFP_KERNEL
);
921 return ERR_PTR(-ENOMEM
);
925 pblk
->state
= PBLK_STATE_RUNNING
;
927 spin_lock_init(&pblk
->trans_lock
);
928 spin_lock_init(&pblk
->lock
);
930 if (flags
& NVM_TARGET_FACTORY
)
931 pblk_setup_uuid(pblk
);
933 #ifdef CONFIG_NVM_DEBUG
934 atomic_long_set(&pblk
->inflight_writes
, 0);
935 atomic_long_set(&pblk
->padded_writes
, 0);
936 atomic_long_set(&pblk
->padded_wb
, 0);
937 atomic_long_set(&pblk
->nr_flush
, 0);
938 atomic_long_set(&pblk
->req_writes
, 0);
939 atomic_long_set(&pblk
->sub_writes
, 0);
940 atomic_long_set(&pblk
->sync_writes
, 0);
941 atomic_long_set(&pblk
->inflight_reads
, 0);
942 atomic_long_set(&pblk
->cache_reads
, 0);
943 atomic_long_set(&pblk
->sync_reads
, 0);
944 atomic_long_set(&pblk
->recov_writes
, 0);
945 atomic_long_set(&pblk
->recov_writes
, 0);
946 atomic_long_set(&pblk
->recov_gc_writes
, 0);
949 atomic_long_set(&pblk
->read_failed
, 0);
950 atomic_long_set(&pblk
->read_empty
, 0);
951 atomic_long_set(&pblk
->read_high_ecc
, 0);
952 atomic_long_set(&pblk
->read_failed_gc
, 0);
953 atomic_long_set(&pblk
->write_failed
, 0);
954 atomic_long_set(&pblk
->erase_failed
, 0);
956 ret
= pblk_luns_init(pblk
, dev
->luns
);
958 pr_err("pblk: could not initialize luns\n");
962 ret
= pblk_lines_init(pblk
);
964 pr_err("pblk: could not initialize lines\n");
968 ret
= pblk_core_init(pblk
);
970 pr_err("pblk: could not initialize core\n");
971 goto fail_free_line_meta
;
974 ret
= pblk_l2p_init(pblk
);
976 pr_err("pblk: could not initialize maps\n");
980 ret
= pblk_lines_configure(pblk
, flags
);
982 pr_err("pblk: could not configure lines\n");
986 ret
= pblk_writer_init(pblk
);
988 pr_err("pblk: could not initialize write thread\n");
989 goto fail_free_lines
;
992 ret
= pblk_gc_init(pblk
);
994 pr_err("pblk: could not initialize gc\n");
995 goto fail_stop_writer
;
998 /* inherit the size from the underlying device */
999 blk_queue_logical_block_size(tqueue
, queue_physical_block_size(bqueue
));
1000 blk_queue_max_hw_sectors(tqueue
, queue_max_hw_sectors(bqueue
));
1002 blk_queue_write_cache(tqueue
, true, false);
1004 tqueue
->limits
.discard_granularity
= geo
->pgs_per_blk
* geo
->pfpg_size
;
1005 tqueue
->limits
.discard_alignment
= 0;
1006 blk_queue_max_discard_sectors(tqueue
, UINT_MAX
>> 9);
1007 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD
, tqueue
);
1009 pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
1010 geo
->nr_luns
, pblk
->l_mg
.nr_lines
,
1011 (unsigned long long)pblk
->rl
.nr_secs
,
1012 pblk
->rwb
.nr_entries
);
1014 wake_up_process(pblk
->writer_ts
);
1018 pblk_writer_stop(pblk
);
1020 pblk_lines_free(pblk
);
1022 pblk_l2p_free(pblk
);
1024 pblk_core_free(pblk
);
1025 fail_free_line_meta
:
1026 pblk_line_meta_free(pblk
);
1028 pblk_luns_free(pblk
);
1031 return ERR_PTR(ret
);
1034 /* physical block device target */
1035 static struct nvm_tgt_type tt_pblk
= {
1037 .version
= {1, 0, 0},
1039 .make_rq
= pblk_make_rq
,
1040 .capacity
= pblk_capacity
,
1045 .sysfs_init
= pblk_sysfs_init
,
1046 .sysfs_exit
= pblk_sysfs_exit
,
1049 static int __init
pblk_module_init(void)
1053 pblk_bio_set
= bioset_create(BIO_POOL_SIZE
, 0, 0);
1056 ret
= nvm_register_tgt_type(&tt_pblk
);
1058 bioset_free(pblk_bio_set
);
1062 static void pblk_module_exit(void)
1064 bioset_free(pblk_bio_set
);
1065 nvm_unregister_tgt_type(&tt_pblk
);
1068 module_init(pblk_module_init
);
1069 module_exit(pblk_module_exit
);
1070 MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
1071 MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
1072 MODULE_LICENSE("GPL v2");
1073 MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");