2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * pblk-core.c - pblk's core functionality
21 static void pblk_mark_bb(struct pblk
*pblk
, struct pblk_line
*line
,
24 struct nvm_tgt_dev
*dev
= pblk
->dev
;
25 struct nvm_geo
*geo
= &dev
->geo
;
26 int pos
= pblk_dev_ppa_to_pos(geo
, *ppa
);
28 pr_debug("pblk: erase failed: line:%d, pos:%d\n", line
->id
, pos
);
29 atomic_long_inc(&pblk
->erase_failed
);
31 atomic_dec(&line
->blk_in_line
);
32 if (test_and_set_bit(pos
, line
->blk_bitmap
))
33 pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
36 pblk_line_run_ws(pblk
, NULL
, ppa
, pblk_line_mark_bb
, pblk
->bb_wq
);
39 static void __pblk_end_io_erase(struct pblk
*pblk
, struct nvm_rq
*rqd
)
41 struct pblk_line
*line
;
43 line
= &pblk
->lines
[pblk_dev_ppa_to_line(rqd
->ppa_addr
)];
44 atomic_dec(&line
->left_seblks
);
49 ppa
= kmalloc(sizeof(struct ppa_addr
), GFP_ATOMIC
);
54 pblk_mark_bb(pblk
, line
, ppa
);
57 atomic_dec(&pblk
->inflight_io
);
60 /* Erase completion assumes that only one block is erased at the time */
61 static void pblk_end_io_erase(struct nvm_rq
*rqd
)
63 struct pblk
*pblk
= rqd
->private;
65 __pblk_end_io_erase(pblk
, rqd
);
66 mempool_free(rqd
, pblk
->g_rq_pool
);
69 void __pblk_map_invalidate(struct pblk
*pblk
, struct pblk_line
*line
,
72 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
73 struct list_head
*move_list
= NULL
;
75 /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P
76 * table is modified with reclaimed sectors, a check is done to endure
77 * that newer updates are not overwritten.
79 spin_lock(&line
->lock
);
80 if (line
->state
== PBLK_LINESTATE_GC
||
81 line
->state
== PBLK_LINESTATE_FREE
) {
82 spin_unlock(&line
->lock
);
86 if (test_and_set_bit(paddr
, line
->invalid_bitmap
)) {
87 WARN_ONCE(1, "pblk: double invalidate\n");
88 spin_unlock(&line
->lock
);
91 le32_add_cpu(line
->vsc
, -1);
93 if (line
->state
== PBLK_LINESTATE_CLOSED
)
94 move_list
= pblk_line_gc_list(pblk
, line
);
95 spin_unlock(&line
->lock
);
98 spin_lock(&l_mg
->gc_lock
);
99 spin_lock(&line
->lock
);
100 /* Prevent moving a line that has just been chosen for GC */
101 if (line
->state
== PBLK_LINESTATE_GC
||
102 line
->state
== PBLK_LINESTATE_FREE
) {
103 spin_unlock(&line
->lock
);
104 spin_unlock(&l_mg
->gc_lock
);
107 spin_unlock(&line
->lock
);
109 list_move_tail(&line
->list
, move_list
);
110 spin_unlock(&l_mg
->gc_lock
);
114 void pblk_map_invalidate(struct pblk
*pblk
, struct ppa_addr ppa
)
116 struct pblk_line
*line
;
120 #ifdef CONFIG_NVM_DEBUG
121 /* Callers must ensure that the ppa points to a device address */
122 BUG_ON(pblk_addr_in_cache(ppa
));
123 BUG_ON(pblk_ppa_empty(ppa
));
126 line_id
= pblk_tgt_ppa_to_line(ppa
);
127 line
= &pblk
->lines
[line_id
];
128 paddr
= pblk_dev_ppa_to_line_addr(pblk
, ppa
);
130 __pblk_map_invalidate(pblk
, line
, paddr
);
133 static void pblk_invalidate_range(struct pblk
*pblk
, sector_t slba
,
134 unsigned int nr_secs
)
138 spin_lock(&pblk
->trans_lock
);
139 for (lba
= slba
; lba
< slba
+ nr_secs
; lba
++) {
142 ppa
= pblk_trans_map_get(pblk
, lba
);
144 if (!pblk_addr_in_cache(ppa
) && !pblk_ppa_empty(ppa
))
145 pblk_map_invalidate(pblk
, ppa
);
147 pblk_ppa_set_empty(&ppa
);
148 pblk_trans_map_set(pblk
, lba
, ppa
);
150 spin_unlock(&pblk
->trans_lock
);
153 struct nvm_rq
*pblk_alloc_rqd(struct pblk
*pblk
, int rw
)
160 pool
= pblk
->w_rq_pool
;
161 rq_size
= pblk_w_rq_size
;
163 pool
= pblk
->g_rq_pool
;
164 rq_size
= pblk_g_rq_size
;
167 rqd
= mempool_alloc(pool
, GFP_KERNEL
);
168 memset(rqd
, 0, rq_size
);
173 void pblk_free_rqd(struct pblk
*pblk
, struct nvm_rq
*rqd
, int rw
)
178 pool
= pblk
->w_rq_pool
;
180 pool
= pblk
->g_rq_pool
;
182 mempool_free(rqd
, pool
);
185 void pblk_bio_free_pages(struct pblk
*pblk
, struct bio
*bio
, int off
,
191 WARN_ON(off
+ nr_pages
!= bio
->bi_vcnt
);
193 bio_advance(bio
, off
* PBLK_EXPOSED_PAGE_SIZE
);
194 for (i
= off
; i
< nr_pages
+ off
; i
++) {
195 bv
= bio
->bi_io_vec
[i
];
196 mempool_free(bv
.bv_page
, pblk
->page_pool
);
200 int pblk_bio_add_pages(struct pblk
*pblk
, struct bio
*bio
, gfp_t flags
,
203 struct request_queue
*q
= pblk
->dev
->q
;
207 for (i
= 0; i
< nr_pages
; i
++) {
208 page
= mempool_alloc(pblk
->page_pool
, flags
);
212 ret
= bio_add_pc_page(q
, bio
, page
, PBLK_EXPOSED_PAGE_SIZE
, 0);
213 if (ret
!= PBLK_EXPOSED_PAGE_SIZE
) {
214 pr_err("pblk: could not add page to bio\n");
215 mempool_free(page
, pblk
->page_pool
);
222 pblk_bio_free_pages(pblk
, bio
, 0, i
- 1);
226 static void pblk_write_kick(struct pblk
*pblk
)
228 wake_up_process(pblk
->writer_ts
);
229 mod_timer(&pblk
->wtimer
, jiffies
+ msecs_to_jiffies(1000));
232 void pblk_write_timer_fn(unsigned long data
)
234 struct pblk
*pblk
= (struct pblk
*)data
;
236 /* kick the write thread every tick to flush outstanding data */
237 pblk_write_kick(pblk
);
240 void pblk_write_should_kick(struct pblk
*pblk
)
242 unsigned int secs_avail
= pblk_rb_read_count(&pblk
->rwb
);
244 if (secs_avail
>= pblk
->min_write_pgs
)
245 pblk_write_kick(pblk
);
248 void pblk_end_bio_sync(struct bio
*bio
)
250 struct completion
*waiting
= bio
->bi_private
;
255 void pblk_end_io_sync(struct nvm_rq
*rqd
)
257 struct completion
*waiting
= rqd
->private;
262 void pblk_wait_for_meta(struct pblk
*pblk
)
265 if (!atomic_read(&pblk
->inflight_io
))
272 static void pblk_flush_writer(struct pblk
*pblk
)
274 pblk_rb_flush(&pblk
->rwb
);
276 if (!pblk_rb_sync_count(&pblk
->rwb
))
279 pblk_write_kick(pblk
);
284 struct list_head
*pblk_line_gc_list(struct pblk
*pblk
, struct pblk_line
*line
)
286 struct pblk_line_meta
*lm
= &pblk
->lm
;
287 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
288 struct list_head
*move_list
= NULL
;
289 int vsc
= le32_to_cpu(*line
->vsc
);
291 lockdep_assert_held(&line
->lock
);
294 if (line
->gc_group
!= PBLK_LINEGC_FULL
) {
295 line
->gc_group
= PBLK_LINEGC_FULL
;
296 move_list
= &l_mg
->gc_full_list
;
298 } else if (vsc
< lm
->high_thrs
) {
299 if (line
->gc_group
!= PBLK_LINEGC_HIGH
) {
300 line
->gc_group
= PBLK_LINEGC_HIGH
;
301 move_list
= &l_mg
->gc_high_list
;
303 } else if (vsc
< lm
->mid_thrs
) {
304 if (line
->gc_group
!= PBLK_LINEGC_MID
) {
305 line
->gc_group
= PBLK_LINEGC_MID
;
306 move_list
= &l_mg
->gc_mid_list
;
308 } else if (vsc
< line
->sec_in_line
) {
309 if (line
->gc_group
!= PBLK_LINEGC_LOW
) {
310 line
->gc_group
= PBLK_LINEGC_LOW
;
311 move_list
= &l_mg
->gc_low_list
;
313 } else if (vsc
== line
->sec_in_line
) {
314 if (line
->gc_group
!= PBLK_LINEGC_EMPTY
) {
315 line
->gc_group
= PBLK_LINEGC_EMPTY
;
316 move_list
= &l_mg
->gc_empty_list
;
319 line
->state
= PBLK_LINESTATE_CORRUPT
;
320 line
->gc_group
= PBLK_LINEGC_NONE
;
321 move_list
= &l_mg
->corrupt_list
;
322 pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
325 lm
->high_thrs
, lm
->mid_thrs
);
331 void pblk_discard(struct pblk
*pblk
, struct bio
*bio
)
333 sector_t slba
= pblk_get_lba(bio
);
334 sector_t nr_secs
= pblk_get_secs(bio
);
336 pblk_invalidate_range(pblk
, slba
, nr_secs
);
339 struct ppa_addr
pblk_get_lba_map(struct pblk
*pblk
, sector_t lba
)
343 spin_lock(&pblk
->trans_lock
);
344 ppa
= pblk_trans_map_get(pblk
, lba
);
345 spin_unlock(&pblk
->trans_lock
);
350 void pblk_log_write_err(struct pblk
*pblk
, struct nvm_rq
*rqd
)
352 atomic_long_inc(&pblk
->write_failed
);
353 #ifdef CONFIG_NVM_DEBUG
354 pblk_print_failed_rqd(pblk
, rqd
, rqd
->error
);
358 void pblk_log_read_err(struct pblk
*pblk
, struct nvm_rq
*rqd
)
360 /* Empty page read is not necessarily an error (e.g., L2P recovery) */
361 if (rqd
->error
== NVM_RSP_ERR_EMPTYPAGE
) {
362 atomic_long_inc(&pblk
->read_empty
);
366 switch (rqd
->error
) {
367 case NVM_RSP_WARN_HIGHECC
:
368 atomic_long_inc(&pblk
->read_high_ecc
);
370 case NVM_RSP_ERR_FAILECC
:
371 case NVM_RSP_ERR_FAILCRC
:
372 atomic_long_inc(&pblk
->read_failed
);
375 pr_err("pblk: unknown read error:%d\n", rqd
->error
);
377 #ifdef CONFIG_NVM_DEBUG
378 pblk_print_failed_rqd(pblk
, rqd
, rqd
->error
);
382 void pblk_set_sec_per_write(struct pblk
*pblk
, int sec_per_write
)
384 pblk
->sec_per_write
= sec_per_write
;
387 int pblk_submit_io(struct pblk
*pblk
, struct nvm_rq
*rqd
)
389 struct nvm_tgt_dev
*dev
= pblk
->dev
;
391 #ifdef CONFIG_NVM_DEBUG
392 struct ppa_addr
*ppa_list
;
394 ppa_list
= (rqd
->nr_ppas
> 1) ? rqd
->ppa_list
: &rqd
->ppa_addr
;
395 if (pblk_boundary_ppa_checks(dev
, ppa_list
, rqd
->nr_ppas
)) {
400 if (rqd
->opcode
== NVM_OP_PWRITE
) {
401 struct pblk_line
*line
;
405 for (i
= 0; i
< rqd
->nr_ppas
; i
++) {
407 line
= &pblk
->lines
[pblk_dev_ppa_to_line(ppa
)];
409 spin_lock(&line
->lock
);
410 if (line
->state
!= PBLK_LINESTATE_OPEN
) {
411 pr_err("pblk: bad ppa: line:%d,state:%d\n",
412 line
->id
, line
->state
);
414 spin_unlock(&line
->lock
);
417 spin_unlock(&line
->lock
);
422 atomic_inc(&pblk
->inflight_io
);
424 return nvm_submit_io(dev
, rqd
);
427 struct bio
*pblk_bio_map_addr(struct pblk
*pblk
, void *data
,
428 unsigned int nr_secs
, unsigned int len
,
429 int alloc_type
, gfp_t gfp_mask
)
431 struct nvm_tgt_dev
*dev
= pblk
->dev
;
437 if (alloc_type
== PBLK_KMALLOC_META
)
438 return bio_map_kern(dev
->q
, kaddr
, len
, gfp_mask
);
440 bio
= bio_kmalloc(gfp_mask
, nr_secs
);
442 return ERR_PTR(-ENOMEM
);
444 for (i
= 0; i
< nr_secs
; i
++) {
445 page
= vmalloc_to_page(kaddr
);
447 pr_err("pblk: could not map vmalloc bio\n");
449 bio
= ERR_PTR(-ENOMEM
);
453 ret
= bio_add_pc_page(dev
->q
, bio
, page
, PAGE_SIZE
, 0);
454 if (ret
!= PAGE_SIZE
) {
455 pr_err("pblk: could not add page to bio\n");
457 bio
= ERR_PTR(-ENOMEM
);
467 int pblk_calc_secs(struct pblk
*pblk
, unsigned long secs_avail
,
468 unsigned long secs_to_flush
)
470 int max
= pblk
->sec_per_write
;
471 int min
= pblk
->min_write_pgs
;
472 int secs_to_sync
= 0;
474 if (secs_avail
>= max
)
476 else if (secs_avail
>= min
)
477 secs_to_sync
= min
* (secs_avail
/ min
);
478 else if (secs_to_flush
)
484 void pblk_dealloc_page(struct pblk
*pblk
, struct pblk_line
*line
, int nr_secs
)
489 addr
= find_next_zero_bit(line
->map_bitmap
,
490 pblk
->lm
.sec_per_line
, line
->cur_sec
);
491 line
->cur_sec
= addr
- nr_secs
;
493 for (i
= 0; i
< nr_secs
; i
++, line
->cur_sec
--)
494 WARN_ON(!test_and_clear_bit(line
->cur_sec
, line
->map_bitmap
));
497 u64
__pblk_alloc_page(struct pblk
*pblk
, struct pblk_line
*line
, int nr_secs
)
502 lockdep_assert_held(&line
->lock
);
504 /* logic error: ppa out-of-bounds. Prevent generating bad address */
505 if (line
->cur_sec
+ nr_secs
> pblk
->lm
.sec_per_line
) {
506 WARN(1, "pblk: page allocation out of bounds\n");
507 nr_secs
= pblk
->lm
.sec_per_line
- line
->cur_sec
;
510 line
->cur_sec
= addr
= find_next_zero_bit(line
->map_bitmap
,
511 pblk
->lm
.sec_per_line
, line
->cur_sec
);
512 for (i
= 0; i
< nr_secs
; i
++, line
->cur_sec
++)
513 WARN_ON(test_and_set_bit(line
->cur_sec
, line
->map_bitmap
));
518 u64
pblk_alloc_page(struct pblk
*pblk
, struct pblk_line
*line
, int nr_secs
)
522 /* Lock needed in case a write fails and a recovery needs to remap
523 * failed write buffer entries
525 spin_lock(&line
->lock
);
526 addr
= __pblk_alloc_page(pblk
, line
, nr_secs
);
527 line
->left_msecs
-= nr_secs
;
528 WARN(line
->left_msecs
< 0, "pblk: page allocation out of bounds\n");
529 spin_unlock(&line
->lock
);
534 u64
pblk_lookup_page(struct pblk
*pblk
, struct pblk_line
*line
)
538 spin_lock(&line
->lock
);
539 paddr
= find_next_zero_bit(line
->map_bitmap
,
540 pblk
->lm
.sec_per_line
, line
->cur_sec
);
541 spin_unlock(&line
->lock
);
547 * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
548 * taking the per LUN semaphore.
550 static int pblk_line_submit_emeta_io(struct pblk
*pblk
, struct pblk_line
*line
,
551 void *emeta_buf
, u64 paddr
, int dir
)
553 struct nvm_tgt_dev
*dev
= pblk
->dev
;
554 struct nvm_geo
*geo
= &dev
->geo
;
555 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
556 struct pblk_line_meta
*lm
= &pblk
->lm
;
557 void *ppa_list
, *meta_list
;
560 dma_addr_t dma_ppa_list
, dma_meta_list
;
561 int min
= pblk
->min_write_pgs
;
562 int left_ppas
= lm
->emeta_sec
[0];
568 DECLARE_COMPLETION_ONSTACK(wait
);
571 bio_op
= REQ_OP_WRITE
;
572 cmd_op
= NVM_OP_PWRITE
;
573 } else if (dir
== READ
) {
574 bio_op
= REQ_OP_READ
;
575 cmd_op
= NVM_OP_PREAD
;
579 meta_list
= nvm_dev_dma_alloc(dev
->parent
, GFP_KERNEL
,
584 ppa_list
= meta_list
+ pblk_dma_meta_size
;
585 dma_ppa_list
= dma_meta_list
+ pblk_dma_meta_size
;
588 memset(&rqd
, 0, sizeof(struct nvm_rq
));
590 rq_ppas
= pblk_calc_secs(pblk
, left_ppas
, 0);
591 rq_len
= rq_ppas
* geo
->sec_size
;
593 bio
= pblk_bio_map_addr(pblk
, emeta_buf
, rq_ppas
, rq_len
,
594 l_mg
->emeta_alloc_type
, GFP_KERNEL
);
600 bio
->bi_iter
.bi_sector
= 0; /* internal bio */
601 bio_set_op_attrs(bio
, bio_op
, 0);
604 rqd
.meta_list
= meta_list
;
605 rqd
.ppa_list
= ppa_list
;
606 rqd
.dma_meta_list
= dma_meta_list
;
607 rqd
.dma_ppa_list
= dma_ppa_list
;
609 rqd
.nr_ppas
= rq_ppas
;
610 rqd
.end_io
= pblk_end_io_sync
;
614 struct pblk_sec_meta
*meta_list
= rqd
.meta_list
;
616 rqd
.flags
= pblk_set_progr_mode(pblk
, WRITE
);
617 for (i
= 0; i
< rqd
.nr_ppas
; ) {
618 spin_lock(&line
->lock
);
619 paddr
= __pblk_alloc_page(pblk
, line
, min
);
620 spin_unlock(&line
->lock
);
621 for (j
= 0; j
< min
; j
++, i
++, paddr
++) {
622 meta_list
[i
].lba
= cpu_to_le64(ADDR_EMPTY
);
624 addr_to_gen_ppa(pblk
, paddr
, id
);
628 for (i
= 0; i
< rqd
.nr_ppas
; ) {
629 struct ppa_addr ppa
= addr_to_gen_ppa(pblk
, paddr
, id
);
630 int pos
= pblk_dev_ppa_to_pos(geo
, ppa
);
631 int read_type
= PBLK_READ_RANDOM
;
633 if (pblk_io_aligned(pblk
, rq_ppas
))
634 read_type
= PBLK_READ_SEQUENTIAL
;
635 rqd
.flags
= pblk_set_read_mode(pblk
, read_type
);
637 while (test_bit(pos
, line
->blk_bitmap
)) {
639 if (pblk_boundary_paddr_checks(pblk
, paddr
)) {
640 pr_err("pblk: corrupt emeta line:%d\n",
647 ppa
= addr_to_gen_ppa(pblk
, paddr
, id
);
648 pos
= pblk_dev_ppa_to_pos(geo
, ppa
);
651 if (pblk_boundary_paddr_checks(pblk
, paddr
+ min
)) {
652 pr_err("pblk: corrupt emeta line:%d\n",
659 for (j
= 0; j
< min
; j
++, i
++, paddr
++)
661 addr_to_gen_ppa(pblk
, paddr
, line
->id
);
665 ret
= pblk_submit_io(pblk
, &rqd
);
667 pr_err("pblk: emeta I/O submission failed: %d\n", ret
);
672 if (!wait_for_completion_io_timeout(&wait
,
673 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS
))) {
674 pr_err("pblk: emeta I/O timed out\n");
676 atomic_dec(&pblk
->inflight_io
);
677 reinit_completion(&wait
);
679 if (likely(pblk
->l_mg
.emeta_alloc_type
== PBLK_VMALLOC_META
))
684 pblk_log_write_err(pblk
, &rqd
);
686 pblk_log_read_err(pblk
, &rqd
);
690 left_ppas
-= rq_ppas
;
694 nvm_dev_dma_free(dev
->parent
, rqd
.meta_list
, rqd
.dma_meta_list
);
698 u64
pblk_line_smeta_start(struct pblk
*pblk
, struct pblk_line
*line
)
700 struct nvm_tgt_dev
*dev
= pblk
->dev
;
701 struct nvm_geo
*geo
= &dev
->geo
;
702 struct pblk_line_meta
*lm
= &pblk
->lm
;
705 /* This usually only happens on bad lines */
706 bit
= find_first_zero_bit(line
->blk_bitmap
, lm
->blk_per_line
);
707 if (bit
>= lm
->blk_per_line
)
710 return bit
* geo
->sec_per_pl
;
713 static int pblk_line_submit_smeta_io(struct pblk
*pblk
, struct pblk_line
*line
,
716 struct nvm_tgt_dev
*dev
= pblk
->dev
;
717 struct pblk_line_meta
*lm
= &pblk
->lm
;
720 __le64
*lba_list
= NULL
;
724 DECLARE_COMPLETION_ONSTACK(wait
);
727 bio_op
= REQ_OP_WRITE
;
728 cmd_op
= NVM_OP_PWRITE
;
729 flags
= pblk_set_progr_mode(pblk
, WRITE
);
730 lba_list
= emeta_to_lbas(pblk
, line
->emeta
->buf
);
731 } else if (dir
== READ
) {
732 bio_op
= REQ_OP_READ
;
733 cmd_op
= NVM_OP_PREAD
;
734 flags
= pblk_set_read_mode(pblk
, PBLK_READ_SEQUENTIAL
);
738 memset(&rqd
, 0, sizeof(struct nvm_rq
));
740 rqd
.meta_list
= nvm_dev_dma_alloc(dev
->parent
, GFP_KERNEL
,
745 rqd
.ppa_list
= rqd
.meta_list
+ pblk_dma_meta_size
;
746 rqd
.dma_ppa_list
= rqd
.dma_meta_list
+ pblk_dma_meta_size
;
748 bio
= bio_map_kern(dev
->q
, line
->smeta
, lm
->smeta_len
, GFP_KERNEL
);
754 bio
->bi_iter
.bi_sector
= 0; /* internal bio */
755 bio_set_op_attrs(bio
, bio_op
, 0);
760 rqd
.nr_ppas
= lm
->smeta_sec
;
761 rqd
.end_io
= pblk_end_io_sync
;
764 for (i
= 0; i
< lm
->smeta_sec
; i
++, paddr
++) {
765 struct pblk_sec_meta
*meta_list
= rqd
.meta_list
;
767 rqd
.ppa_list
[i
] = addr_to_gen_ppa(pblk
, paddr
, line
->id
);
770 __le64 addr_empty
= cpu_to_le64(ADDR_EMPTY
);
772 meta_list
[i
].lba
= lba_list
[paddr
] = addr_empty
;
777 * This I/O is sent by the write thread when a line is replace. Since
778 * the write thread is the only one sending write and erase commands,
779 * there is no need to take the LUN semaphore.
781 ret
= pblk_submit_io(pblk
, &rqd
);
783 pr_err("pblk: smeta I/O submission failed: %d\n", ret
);
788 if (!wait_for_completion_io_timeout(&wait
,
789 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS
))) {
790 pr_err("pblk: smeta I/O timed out\n");
792 atomic_dec(&pblk
->inflight_io
);
796 pblk_log_write_err(pblk
, &rqd
);
798 pblk_log_read_err(pblk
, &rqd
);
802 nvm_dev_dma_free(dev
->parent
, rqd
.meta_list
, rqd
.dma_meta_list
);
807 int pblk_line_read_smeta(struct pblk
*pblk
, struct pblk_line
*line
)
809 u64 bpaddr
= pblk_line_smeta_start(pblk
, line
);
811 return pblk_line_submit_smeta_io(pblk
, line
, bpaddr
, READ
);
814 int pblk_line_read_emeta(struct pblk
*pblk
, struct pblk_line
*line
,
817 return pblk_line_submit_emeta_io(pblk
, line
, emeta_buf
,
818 line
->emeta_ssec
, READ
);
821 static void pblk_setup_e_rq(struct pblk
*pblk
, struct nvm_rq
*rqd
,
824 rqd
->opcode
= NVM_OP_ERASE
;
827 rqd
->flags
= pblk_set_progr_mode(pblk
, ERASE
);
831 static int pblk_blk_erase_sync(struct pblk
*pblk
, struct ppa_addr ppa
)
835 DECLARE_COMPLETION_ONSTACK(wait
);
837 memset(&rqd
, 0, sizeof(struct nvm_rq
));
839 pblk_setup_e_rq(pblk
, &rqd
, ppa
);
841 rqd
.end_io
= pblk_end_io_sync
;
844 /* The write thread schedules erases so that it minimizes disturbances
845 * with writes. Thus, there is no need to take the LUN semaphore.
847 ret
= pblk_submit_io(pblk
, &rqd
);
849 struct nvm_tgt_dev
*dev
= pblk
->dev
;
850 struct nvm_geo
*geo
= &dev
->geo
;
852 pr_err("pblk: could not sync erase line:%d,blk:%d\n",
853 pblk_dev_ppa_to_line(ppa
),
854 pblk_dev_ppa_to_pos(geo
, ppa
));
860 if (!wait_for_completion_io_timeout(&wait
,
861 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS
))) {
862 pr_err("pblk: sync erase timed out\n");
867 __pblk_end_io_erase(pblk
, &rqd
);
872 int pblk_line_erase(struct pblk
*pblk
, struct pblk_line
*line
)
874 struct pblk_line_meta
*lm
= &pblk
->lm
;
878 /* Erase only good blocks, one at a time */
880 spin_lock(&line
->lock
);
881 bit
= find_next_zero_bit(line
->erase_bitmap
, lm
->blk_per_line
,
883 if (bit
>= lm
->blk_per_line
) {
884 spin_unlock(&line
->lock
);
888 ppa
= pblk
->luns
[bit
].bppa
; /* set ch and lun */
889 ppa
.g
.blk
= line
->id
;
891 atomic_dec(&line
->left_eblks
);
892 WARN_ON(test_and_set_bit(bit
, line
->erase_bitmap
));
893 spin_unlock(&line
->lock
);
895 ret
= pblk_blk_erase_sync(pblk
, ppa
);
897 pr_err("pblk: failed to erase line %d\n", line
->id
);
905 static void pblk_line_setup_metadata(struct pblk_line
*line
,
906 struct pblk_line_mgmt
*l_mg
,
907 struct pblk_line_meta
*lm
)
911 lockdep_assert_held(&l_mg
->free_lock
);
914 meta_line
= find_first_zero_bit(&l_mg
->meta_bitmap
, PBLK_DATA_LINES
);
915 if (meta_line
== PBLK_DATA_LINES
) {
916 spin_unlock(&l_mg
->free_lock
);
918 spin_lock(&l_mg
->free_lock
);
922 set_bit(meta_line
, &l_mg
->meta_bitmap
);
923 line
->meta_line
= meta_line
;
925 line
->smeta
= l_mg
->sline_meta
[meta_line
];
926 line
->emeta
= l_mg
->eline_meta
[meta_line
];
928 memset(line
->smeta
, 0, lm
->smeta_len
);
929 memset(line
->emeta
->buf
, 0, lm
->emeta_len
[0]);
931 line
->emeta
->mem
= 0;
932 atomic_set(&line
->emeta
->sync
, 0);
935 /* For now lines are always assumed full lines. Thus, smeta former and current
936 * lun bitmaps are omitted.
938 static int pblk_line_init_metadata(struct pblk
*pblk
, struct pblk_line
*line
,
939 struct pblk_line
*cur
)
941 struct nvm_tgt_dev
*dev
= pblk
->dev
;
942 struct nvm_geo
*geo
= &dev
->geo
;
943 struct pblk_line_meta
*lm
= &pblk
->lm
;
944 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
945 struct pblk_emeta
*emeta
= line
->emeta
;
946 struct line_emeta
*emeta_buf
= emeta
->buf
;
947 struct line_smeta
*smeta_buf
= (struct line_smeta
*)line
->smeta
;
950 /* After erasing the line, new bad blocks might appear and we risk
951 * having an invalid line
953 nr_blk_line
= lm
->blk_per_line
-
954 bitmap_weight(line
->blk_bitmap
, lm
->blk_per_line
);
955 if (nr_blk_line
< lm
->min_blk_line
) {
956 spin_lock(&l_mg
->free_lock
);
957 spin_lock(&line
->lock
);
958 line
->state
= PBLK_LINESTATE_BAD
;
959 spin_unlock(&line
->lock
);
961 list_add_tail(&line
->list
, &l_mg
->bad_list
);
962 spin_unlock(&l_mg
->free_lock
);
964 pr_debug("pblk: line %d is bad\n", line
->id
);
969 /* Run-time metadata */
970 line
->lun_bitmap
= ((void *)(smeta_buf
)) + sizeof(struct line_smeta
);
972 /* Mark LUNs allocated in this line (all for now) */
973 bitmap_set(line
->lun_bitmap
, 0, lm
->lun_bitmap_len
);
975 smeta_buf
->header
.identifier
= cpu_to_le32(PBLK_MAGIC
);
976 memcpy(smeta_buf
->header
.uuid
, pblk
->instance_uuid
, 16);
977 smeta_buf
->header
.id
= cpu_to_le32(line
->id
);
978 smeta_buf
->header
.type
= cpu_to_le16(line
->type
);
979 smeta_buf
->header
.version
= cpu_to_le16(1);
982 smeta_buf
->seq_nr
= cpu_to_le64(line
->seq_nr
);
983 smeta_buf
->window_wr_lun
= cpu_to_le32(geo
->nr_luns
);
985 /* Fill metadata among lines */
987 memcpy(line
->lun_bitmap
, cur
->lun_bitmap
, lm
->lun_bitmap_len
);
988 smeta_buf
->prev_id
= cpu_to_le32(cur
->id
);
989 cur
->emeta
->buf
->next_id
= cpu_to_le32(line
->id
);
991 smeta_buf
->prev_id
= cpu_to_le32(PBLK_LINE_EMPTY
);
994 /* All smeta must be set at this point */
995 smeta_buf
->header
.crc
= cpu_to_le32(
996 pblk_calc_meta_header_crc(pblk
, &smeta_buf
->header
));
997 smeta_buf
->crc
= cpu_to_le32(pblk_calc_smeta_crc(pblk
, smeta_buf
));
1000 memcpy(&emeta_buf
->header
, &smeta_buf
->header
,
1001 sizeof(struct line_header
));
1002 emeta_buf
->seq_nr
= cpu_to_le64(line
->seq_nr
);
1003 emeta_buf
->nr_lbas
= cpu_to_le64(line
->sec_in_line
);
1004 emeta_buf
->nr_valid_lbas
= cpu_to_le64(0);
1005 emeta_buf
->next_id
= cpu_to_le32(PBLK_LINE_EMPTY
);
1006 emeta_buf
->crc
= cpu_to_le32(0);
1007 emeta_buf
->prev_id
= smeta_buf
->prev_id
;
1012 /* For now lines are always assumed full lines. Thus, smeta former and current
1013 * lun bitmaps are omitted.
1015 static int pblk_line_init_bb(struct pblk
*pblk
, struct pblk_line
*line
,
1018 struct nvm_tgt_dev
*dev
= pblk
->dev
;
1019 struct nvm_geo
*geo
= &dev
->geo
;
1020 struct pblk_line_meta
*lm
= &pblk
->lm
;
1021 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
1026 line
->sec_in_line
= lm
->sec_per_line
;
1028 /* Capture bad block information on line mapping bitmaps */
1029 while ((bit
= find_next_bit(line
->blk_bitmap
, lm
->blk_per_line
,
1030 bit
+ 1)) < lm
->blk_per_line
) {
1031 off
= bit
* geo
->sec_per_pl
;
1032 bitmap_shift_left(l_mg
->bb_aux
, l_mg
->bb_template
, off
,
1034 bitmap_or(line
->map_bitmap
, line
->map_bitmap
, l_mg
->bb_aux
,
1036 line
->sec_in_line
-= geo
->sec_per_blk
;
1037 if (bit
>= lm
->emeta_bb
)
1041 /* Mark smeta metadata sectors as bad sectors */
1042 bit
= find_first_zero_bit(line
->blk_bitmap
, lm
->blk_per_line
);
1043 off
= bit
* geo
->sec_per_pl
;
1044 bitmap_set(line
->map_bitmap
, off
, lm
->smeta_sec
);
1045 line
->sec_in_line
-= lm
->smeta_sec
;
1046 line
->smeta_ssec
= off
;
1047 line
->cur_sec
= off
+ lm
->smeta_sec
;
1049 if (init
&& pblk_line_submit_smeta_io(pblk
, line
, off
, WRITE
)) {
1050 pr_debug("pblk: line smeta I/O failed. Retry\n");
1054 bitmap_copy(line
->invalid_bitmap
, line
->map_bitmap
, lm
->sec_per_line
);
1056 /* Mark emeta metadata sectors as bad sectors. We need to consider bad
1057 * blocks to make sure that there are enough sectors to store emeta
1059 bit
= lm
->sec_per_line
;
1060 off
= lm
->sec_per_line
- lm
->emeta_sec
[0];
1061 bitmap_set(line
->invalid_bitmap
, off
, lm
->emeta_sec
[0]);
1063 off
-= geo
->sec_per_pl
;
1064 if (!test_bit(off
, line
->invalid_bitmap
)) {
1065 bitmap_set(line
->invalid_bitmap
, off
, geo
->sec_per_pl
);
1070 line
->sec_in_line
-= lm
->emeta_sec
[0];
1071 line
->emeta_ssec
= off
;
1072 line
->nr_valid_lbas
= 0;
1073 line
->left_msecs
= line
->sec_in_line
;
1074 *line
->vsc
= cpu_to_le32(line
->sec_in_line
);
1076 if (lm
->sec_per_line
- line
->sec_in_line
!=
1077 bitmap_weight(line
->invalid_bitmap
, lm
->sec_per_line
)) {
1078 spin_lock(&line
->lock
);
1079 line
->state
= PBLK_LINESTATE_BAD
;
1080 spin_unlock(&line
->lock
);
1082 list_add_tail(&line
->list
, &l_mg
->bad_list
);
1083 pr_err("pblk: unexpected line %d is bad\n", line
->id
);
1091 static int pblk_line_prepare(struct pblk
*pblk
, struct pblk_line
*line
)
1093 struct pblk_line_meta
*lm
= &pblk
->lm
;
1094 int blk_in_line
= atomic_read(&line
->blk_in_line
);
1096 line
->map_bitmap
= mempool_alloc(pblk
->line_meta_pool
, GFP_ATOMIC
);
1097 if (!line
->map_bitmap
)
1099 memset(line
->map_bitmap
, 0, lm
->sec_bitmap_len
);
1101 /* invalid_bitmap is special since it is used when line is closed. No
1102 * need to zeroized; it will be initialized using bb info form
1105 line
->invalid_bitmap
= mempool_alloc(pblk
->line_meta_pool
, GFP_ATOMIC
);
1106 if (!line
->invalid_bitmap
) {
1107 mempool_free(line
->map_bitmap
, pblk
->line_meta_pool
);
1111 spin_lock(&line
->lock
);
1112 if (line
->state
!= PBLK_LINESTATE_FREE
) {
1113 mempool_free(line
->invalid_bitmap
, pblk
->line_meta_pool
);
1114 mempool_free(line
->map_bitmap
, pblk
->line_meta_pool
);
1115 spin_unlock(&line
->lock
);
1116 WARN(1, "pblk: corrupted line %d, state %d\n",
1117 line
->id
, line
->state
);
1121 line
->state
= PBLK_LINESTATE_OPEN
;
1123 atomic_set(&line
->left_eblks
, blk_in_line
);
1124 atomic_set(&line
->left_seblks
, blk_in_line
);
1126 line
->meta_distance
= lm
->meta_distance
;
1127 spin_unlock(&line
->lock
);
1129 /* Bad blocks do not need to be erased */
1130 bitmap_copy(line
->erase_bitmap
, line
->blk_bitmap
, lm
->blk_per_line
);
1132 kref_init(&line
->ref
);
1137 int pblk_line_recov_alloc(struct pblk
*pblk
, struct pblk_line
*line
)
1139 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
1142 spin_lock(&l_mg
->free_lock
);
1143 l_mg
->data_line
= line
;
1144 list_del(&line
->list
);
1146 ret
= pblk_line_prepare(pblk
, line
);
1148 list_add(&line
->list
, &l_mg
->free_list
);
1149 spin_unlock(&l_mg
->free_lock
);
1152 spin_unlock(&l_mg
->free_lock
);
1154 pblk_rl_free_lines_dec(&pblk
->rl
, line
);
1156 if (!pblk_line_init_bb(pblk
, line
, 0)) {
1157 list_add(&line
->list
, &l_mg
->free_list
);
1164 void pblk_line_recov_close(struct pblk
*pblk
, struct pblk_line
*line
)
1166 mempool_free(line
->map_bitmap
, pblk
->line_meta_pool
);
1167 line
->map_bitmap
= NULL
;
1172 struct pblk_line
*pblk_line_get(struct pblk
*pblk
)
1174 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
1175 struct pblk_line_meta
*lm
= &pblk
->lm
;
1176 struct pblk_line
*line
;
1179 lockdep_assert_held(&l_mg
->free_lock
);
1182 if (list_empty(&l_mg
->free_list
)) {
1183 pr_err("pblk: no free lines\n");
1187 line
= list_first_entry(&l_mg
->free_list
, struct pblk_line
, list
);
1188 list_del(&line
->list
);
1189 l_mg
->nr_free_lines
--;
1191 bit
= find_first_zero_bit(line
->blk_bitmap
, lm
->blk_per_line
);
1192 if (unlikely(bit
>= lm
->blk_per_line
)) {
1193 spin_lock(&line
->lock
);
1194 line
->state
= PBLK_LINESTATE_BAD
;
1195 spin_unlock(&line
->lock
);
1197 list_add_tail(&line
->list
, &l_mg
->bad_list
);
1199 pr_debug("pblk: line %d is bad\n", line
->id
);
1203 ret
= pblk_line_prepare(pblk
, line
);
1205 if (ret
== -EAGAIN
) {
1206 list_add(&line
->list
, &l_mg
->corrupt_list
);
1209 pr_err("pblk: failed to prepare line %d\n", line
->id
);
1210 list_add(&line
->list
, &l_mg
->free_list
);
1211 l_mg
->nr_free_lines
++;
1219 static struct pblk_line
*pblk_line_retry(struct pblk
*pblk
,
1220 struct pblk_line
*line
)
1222 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
1223 struct pblk_line
*retry_line
;
1226 spin_lock(&l_mg
->free_lock
);
1227 retry_line
= pblk_line_get(pblk
);
1229 l_mg
->data_line
= NULL
;
1230 spin_unlock(&l_mg
->free_lock
);
1234 retry_line
->smeta
= line
->smeta
;
1235 retry_line
->emeta
= line
->emeta
;
1236 retry_line
->meta_line
= line
->meta_line
;
1238 pblk_line_free(pblk
, line
);
1239 l_mg
->data_line
= retry_line
;
1240 spin_unlock(&l_mg
->free_lock
);
1242 pblk_rl_free_lines_dec(&pblk
->rl
, retry_line
);
1244 if (pblk_line_erase(pblk
, retry_line
))
1250 static void pblk_set_space_limit(struct pblk
*pblk
)
1252 struct pblk_rl
*rl
= &pblk
->rl
;
1254 atomic_set(&rl
->rb_space
, 0);
1257 struct pblk_line
*pblk_line_get_first_data(struct pblk
*pblk
)
1259 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
1260 struct pblk_line
*line
;
1263 spin_lock(&l_mg
->free_lock
);
1264 line
= pblk_line_get(pblk
);
1266 spin_unlock(&l_mg
->free_lock
);
1270 line
->seq_nr
= l_mg
->d_seq_nr
++;
1271 line
->type
= PBLK_LINETYPE_DATA
;
1272 l_mg
->data_line
= line
;
1274 pblk_line_setup_metadata(line
, l_mg
, &pblk
->lm
);
1276 /* Allocate next line for preparation */
1277 l_mg
->data_next
= pblk_line_get(pblk
);
1278 if (!l_mg
->data_next
) {
1279 /* If we cannot get a new line, we need to stop the pipeline.
1280 * Only allow as many writes in as we can store safely and then
1283 pblk_set_space_limit(pblk
);
1285 l_mg
->data_next
= NULL
;
1287 l_mg
->data_next
->seq_nr
= l_mg
->d_seq_nr
++;
1288 l_mg
->data_next
->type
= PBLK_LINETYPE_DATA
;
1291 spin_unlock(&l_mg
->free_lock
);
1293 if (pblk_line_erase(pblk
, line
)) {
1294 line
= pblk_line_retry(pblk
, line
);
1299 pblk_rl_free_lines_dec(&pblk
->rl
, line
);
1301 pblk_rl_free_lines_dec(&pblk
->rl
, l_mg
->data_next
);
1304 if (!pblk_line_init_metadata(pblk
, line
, NULL
)) {
1305 line
= pblk_line_retry(pblk
, line
);
1312 if (!pblk_line_init_bb(pblk
, line
, 1)) {
1313 line
= pblk_line_retry(pblk
, line
);
1323 static void pblk_stop_writes(struct pblk
*pblk
, struct pblk_line
*line
)
1325 lockdep_assert_held(&pblk
->l_mg
.free_lock
);
1327 pblk_set_space_limit(pblk
);
1328 pblk
->state
= PBLK_STATE_STOPPING
;
1331 void pblk_pipeline_stop(struct pblk
*pblk
)
1333 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
1336 spin_lock(&l_mg
->free_lock
);
1337 if (pblk
->state
== PBLK_STATE_RECOVERING
||
1338 pblk
->state
== PBLK_STATE_STOPPED
) {
1339 spin_unlock(&l_mg
->free_lock
);
1342 pblk
->state
= PBLK_STATE_RECOVERING
;
1343 spin_unlock(&l_mg
->free_lock
);
1345 pblk_flush_writer(pblk
);
1346 pblk_wait_for_meta(pblk
);
1348 ret
= pblk_recov_pad(pblk
);
1350 pr_err("pblk: could not close data on teardown(%d)\n", ret
);
1354 flush_workqueue(pblk
->bb_wq
);
1355 pblk_line_close_meta_sync(pblk
);
1357 spin_lock(&l_mg
->free_lock
);
1358 pblk
->state
= PBLK_STATE_STOPPED
;
1359 l_mg
->data_line
= NULL
;
1360 l_mg
->data_next
= NULL
;
1361 spin_unlock(&l_mg
->free_lock
);
1364 void pblk_line_replace_data(struct pblk
*pblk
)
1366 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
1367 struct pblk_line
*cur
, *new;
1368 unsigned int left_seblks
;
1371 cur
= l_mg
->data_line
;
1372 new = l_mg
->data_next
;
1375 l_mg
->data_line
= new;
1377 spin_lock(&l_mg
->free_lock
);
1378 if (pblk
->state
!= PBLK_STATE_RUNNING
) {
1379 l_mg
->data_line
= NULL
;
1380 l_mg
->data_next
= NULL
;
1381 spin_unlock(&l_mg
->free_lock
);
1385 pblk_line_setup_metadata(new, l_mg
, &pblk
->lm
);
1386 spin_unlock(&l_mg
->free_lock
);
1389 left_seblks
= atomic_read(&new->left_seblks
);
1391 /* If line is not fully erased, erase it */
1392 if (atomic_read(&new->left_eblks
)) {
1393 if (pblk_line_erase(pblk
, new))
1402 if (!pblk_line_init_metadata(pblk
, new, cur
)) {
1403 new = pblk_line_retry(pblk
, new);
1410 if (!pblk_line_init_bb(pblk
, new, 1)) {
1411 new = pblk_line_retry(pblk
, new);
1418 /* Allocate next line for preparation */
1419 spin_lock(&l_mg
->free_lock
);
1420 l_mg
->data_next
= pblk_line_get(pblk
);
1421 if (!l_mg
->data_next
) {
1422 /* If we cannot get a new line, we need to stop the pipeline.
1423 * Only allow as many writes in as we can store safely and then
1426 pblk_stop_writes(pblk
, new);
1427 l_mg
->data_next
= NULL
;
1429 l_mg
->data_next
->seq_nr
= l_mg
->d_seq_nr
++;
1430 l_mg
->data_next
->type
= PBLK_LINETYPE_DATA
;
1433 spin_unlock(&l_mg
->free_lock
);
1436 pblk_rl_free_lines_dec(&pblk
->rl
, l_mg
->data_next
);
1439 void pblk_line_free(struct pblk
*pblk
, struct pblk_line
*line
)
1441 if (line
->map_bitmap
)
1442 mempool_free(line
->map_bitmap
, pblk
->line_meta_pool
);
1443 if (line
->invalid_bitmap
)
1444 mempool_free(line
->invalid_bitmap
, pblk
->line_meta_pool
);
1446 *line
->vsc
= cpu_to_le32(EMPTY_ENTRY
);
1448 line
->map_bitmap
= NULL
;
1449 line
->invalid_bitmap
= NULL
;
1454 void pblk_line_put(struct kref
*ref
)
1456 struct pblk_line
*line
= container_of(ref
, struct pblk_line
, ref
);
1457 struct pblk
*pblk
= line
->pblk
;
1458 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
1460 spin_lock(&line
->lock
);
1461 WARN_ON(line
->state
!= PBLK_LINESTATE_GC
);
1462 line
->state
= PBLK_LINESTATE_FREE
;
1463 line
->gc_group
= PBLK_LINEGC_NONE
;
1464 pblk_line_free(pblk
, line
);
1465 spin_unlock(&line
->lock
);
1467 spin_lock(&l_mg
->free_lock
);
1468 list_add_tail(&line
->list
, &l_mg
->free_list
);
1469 l_mg
->nr_free_lines
++;
1470 spin_unlock(&l_mg
->free_lock
);
1472 pblk_rl_free_lines_inc(&pblk
->rl
, line
);
1475 int pblk_blk_erase_async(struct pblk
*pblk
, struct ppa_addr ppa
)
1480 rqd
= mempool_alloc(pblk
->g_rq_pool
, GFP_KERNEL
);
1481 memset(rqd
, 0, pblk_g_rq_size
);
1483 pblk_setup_e_rq(pblk
, rqd
, ppa
);
1485 rqd
->end_io
= pblk_end_io_erase
;
1486 rqd
->private = pblk
;
1488 /* The write thread schedules erases so that it minimizes disturbances
1489 * with writes. Thus, there is no need to take the LUN semaphore.
1491 err
= pblk_submit_io(pblk
, rqd
);
1493 struct nvm_tgt_dev
*dev
= pblk
->dev
;
1494 struct nvm_geo
*geo
= &dev
->geo
;
1496 pr_err("pblk: could not async erase line:%d,blk:%d\n",
1497 pblk_dev_ppa_to_line(ppa
),
1498 pblk_dev_ppa_to_pos(geo
, ppa
));
1504 struct pblk_line
*pblk_line_get_data(struct pblk
*pblk
)
1506 return pblk
->l_mg
.data_line
;
1509 /* For now, always erase next line */
1510 struct pblk_line
*pblk_line_get_erase(struct pblk
*pblk
)
1512 return pblk
->l_mg
.data_next
;
1515 int pblk_line_is_full(struct pblk_line
*line
)
1517 return (line
->left_msecs
== 0);
1520 void pblk_line_close_meta_sync(struct pblk
*pblk
)
1522 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
1523 struct pblk_line_meta
*lm
= &pblk
->lm
;
1524 struct pblk_line
*line
, *tline
;
1527 spin_lock(&l_mg
->close_lock
);
1528 if (list_empty(&l_mg
->emeta_list
)) {
1529 spin_unlock(&l_mg
->close_lock
);
1533 list_cut_position(&list
, &l_mg
->emeta_list
, l_mg
->emeta_list
.prev
);
1534 spin_unlock(&l_mg
->close_lock
);
1536 list_for_each_entry_safe(line
, tline
, &list
, list
) {
1537 struct pblk_emeta
*emeta
= line
->emeta
;
1539 while (emeta
->mem
< lm
->emeta_len
[0]) {
1542 ret
= pblk_submit_meta_io(pblk
, line
);
1544 pr_err("pblk: sync meta line %d failed (%d)\n",
1551 pblk_wait_for_meta(pblk
);
1552 flush_workqueue(pblk
->close_wq
);
1555 static void pblk_line_should_sync_meta(struct pblk
*pblk
)
1557 if (pblk_rl_is_limit(&pblk
->rl
))
1558 pblk_line_close_meta_sync(pblk
);
1561 void pblk_line_close(struct pblk
*pblk
, struct pblk_line
*line
)
1563 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
1564 struct list_head
*move_list
;
1566 #ifdef CONFIG_NVM_DEBUG
1567 struct pblk_line_meta
*lm
= &pblk
->lm
;
1569 WARN(!bitmap_full(line
->map_bitmap
, lm
->sec_per_line
),
1570 "pblk: corrupt closed line %d\n", line
->id
);
1573 spin_lock(&l_mg
->free_lock
);
1574 WARN_ON(!test_and_clear_bit(line
->meta_line
, &l_mg
->meta_bitmap
));
1575 spin_unlock(&l_mg
->free_lock
);
1577 spin_lock(&l_mg
->gc_lock
);
1578 spin_lock(&line
->lock
);
1579 WARN_ON(line
->state
!= PBLK_LINESTATE_OPEN
);
1580 line
->state
= PBLK_LINESTATE_CLOSED
;
1581 move_list
= pblk_line_gc_list(pblk
, line
);
1583 list_add_tail(&line
->list
, move_list
);
1585 mempool_free(line
->map_bitmap
, pblk
->line_meta_pool
);
1586 line
->map_bitmap
= NULL
;
1590 spin_unlock(&line
->lock
);
1591 spin_unlock(&l_mg
->gc_lock
);
1593 pblk_gc_should_kick(pblk
);
1596 void pblk_line_close_meta(struct pblk
*pblk
, struct pblk_line
*line
)
1598 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
1599 struct pblk_line_meta
*lm
= &pblk
->lm
;
1600 struct pblk_emeta
*emeta
= line
->emeta
;
1601 struct line_emeta
*emeta_buf
= emeta
->buf
;
1603 /* No need for exact vsc value; avoid a big line lock and take aprox. */
1604 memcpy(emeta_to_vsc(pblk
, emeta_buf
), l_mg
->vsc_list
, lm
->vsc_list_len
);
1605 memcpy(emeta_to_bb(emeta_buf
), line
->blk_bitmap
, lm
->blk_bitmap_len
);
1607 emeta_buf
->nr_valid_lbas
= cpu_to_le64(line
->nr_valid_lbas
);
1608 emeta_buf
->crc
= cpu_to_le32(pblk_calc_emeta_crc(pblk
, emeta_buf
));
1610 spin_lock(&l_mg
->close_lock
);
1611 spin_lock(&line
->lock
);
1612 list_add_tail(&line
->list
, &l_mg
->emeta_list
);
1613 spin_unlock(&line
->lock
);
1614 spin_unlock(&l_mg
->close_lock
);
1616 pblk_line_should_sync_meta(pblk
);
1619 void pblk_line_close_ws(struct work_struct
*work
)
1621 struct pblk_line_ws
*line_ws
= container_of(work
, struct pblk_line_ws
,
1623 struct pblk
*pblk
= line_ws
->pblk
;
1624 struct pblk_line
*line
= line_ws
->line
;
1626 pblk_line_close(pblk
, line
);
1627 mempool_free(line_ws
, pblk
->line_ws_pool
);
1630 void pblk_line_mark_bb(struct work_struct
*work
)
1632 struct pblk_line_ws
*line_ws
= container_of(work
, struct pblk_line_ws
,
1634 struct pblk
*pblk
= line_ws
->pblk
;
1635 struct nvm_tgt_dev
*dev
= pblk
->dev
;
1636 struct ppa_addr
*ppa
= line_ws
->priv
;
1639 ret
= nvm_set_tgt_bb_tbl(dev
, ppa
, 1, NVM_BLK_T_GRWN_BAD
);
1641 struct pblk_line
*line
;
1644 line
= &pblk
->lines
[pblk_dev_ppa_to_line(*ppa
)];
1645 pos
= pblk_dev_ppa_to_pos(&dev
->geo
, *ppa
);
1647 pr_err("pblk: failed to mark bb, line:%d, pos:%d\n",
1652 mempool_free(line_ws
, pblk
->line_ws_pool
);
1655 void pblk_line_run_ws(struct pblk
*pblk
, struct pblk_line
*line
, void *priv
,
1656 void (*work
)(struct work_struct
*),
1657 struct workqueue_struct
*wq
)
1659 struct pblk_line_ws
*line_ws
;
1661 line_ws
= mempool_alloc(pblk
->line_ws_pool
, GFP_ATOMIC
);
1665 line_ws
->pblk
= pblk
;
1666 line_ws
->line
= line
;
1667 line_ws
->priv
= priv
;
1669 INIT_WORK(&line_ws
->ws
, work
);
1670 queue_work(wq
, &line_ws
->ws
);
1673 static void __pblk_down_page(struct pblk
*pblk
, struct ppa_addr
*ppa_list
,
1674 int nr_ppas
, int pos
)
1676 struct pblk_lun
*rlun
= &pblk
->luns
[pos
];
1680 * Only send one inflight I/O per LUN. Since we map at a page
1681 * granurality, all ppas in the I/O will map to the same LUN
1683 #ifdef CONFIG_NVM_DEBUG
1686 for (i
= 1; i
< nr_ppas
; i
++)
1687 WARN_ON(ppa_list
[0].g
.lun
!= ppa_list
[i
].g
.lun
||
1688 ppa_list
[0].g
.ch
!= ppa_list
[i
].g
.ch
);
1691 ret
= down_timeout(&rlun
->wr_sem
, msecs_to_jiffies(30000));
1695 pr_err("pblk: lun semaphore timed out\n");
1698 pr_err("pblk: lun semaphore timed out\n");
1704 void pblk_down_page(struct pblk
*pblk
, struct ppa_addr
*ppa_list
, int nr_ppas
)
1706 struct nvm_tgt_dev
*dev
= pblk
->dev
;
1707 struct nvm_geo
*geo
= &dev
->geo
;
1708 int pos
= pblk_ppa_to_pos(geo
, ppa_list
[0]);
1710 __pblk_down_page(pblk
, ppa_list
, nr_ppas
, pos
);
1713 void pblk_down_rq(struct pblk
*pblk
, struct ppa_addr
*ppa_list
, int nr_ppas
,
1714 unsigned long *lun_bitmap
)
1716 struct nvm_tgt_dev
*dev
= pblk
->dev
;
1717 struct nvm_geo
*geo
= &dev
->geo
;
1718 int pos
= pblk_ppa_to_pos(geo
, ppa_list
[0]);
1720 /* If the LUN has been locked for this same request, do no attempt to
1723 if (test_and_set_bit(pos
, lun_bitmap
))
1726 __pblk_down_page(pblk
, ppa_list
, nr_ppas
, pos
);
1729 void pblk_up_page(struct pblk
*pblk
, struct ppa_addr
*ppa_list
, int nr_ppas
)
1731 struct nvm_tgt_dev
*dev
= pblk
->dev
;
1732 struct nvm_geo
*geo
= &dev
->geo
;
1733 struct pblk_lun
*rlun
;
1734 int pos
= pblk_ppa_to_pos(geo
, ppa_list
[0]);
1736 #ifdef CONFIG_NVM_DEBUG
1739 for (i
= 1; i
< nr_ppas
; i
++)
1740 WARN_ON(ppa_list
[0].g
.lun
!= ppa_list
[i
].g
.lun
||
1741 ppa_list
[0].g
.ch
!= ppa_list
[i
].g
.ch
);
1744 rlun
= &pblk
->luns
[pos
];
1748 void pblk_up_rq(struct pblk
*pblk
, struct ppa_addr
*ppa_list
, int nr_ppas
,
1749 unsigned long *lun_bitmap
)
1751 struct nvm_tgt_dev
*dev
= pblk
->dev
;
1752 struct nvm_geo
*geo
= &dev
->geo
;
1753 struct pblk_lun
*rlun
;
1754 int nr_luns
= geo
->nr_luns
;
1757 while ((bit
= find_next_bit(lun_bitmap
, nr_luns
, bit
+ 1)) < nr_luns
) {
1758 rlun
= &pblk
->luns
[bit
];
1765 void pblk_update_map(struct pblk
*pblk
, sector_t lba
, struct ppa_addr ppa
)
1767 struct ppa_addr l2p_ppa
;
1769 /* logic error: lba out-of-bounds. Ignore update */
1770 if (!(lba
< pblk
->rl
.nr_secs
)) {
1771 WARN(1, "pblk: corrupted L2P map request\n");
1775 spin_lock(&pblk
->trans_lock
);
1776 l2p_ppa
= pblk_trans_map_get(pblk
, lba
);
1778 if (!pblk_addr_in_cache(l2p_ppa
) && !pblk_ppa_empty(l2p_ppa
))
1779 pblk_map_invalidate(pblk
, l2p_ppa
);
1781 pblk_trans_map_set(pblk
, lba
, ppa
);
1782 spin_unlock(&pblk
->trans_lock
);
1785 void pblk_update_map_cache(struct pblk
*pblk
, sector_t lba
, struct ppa_addr ppa
)
1787 #ifdef CONFIG_NVM_DEBUG
1788 /* Callers must ensure that the ppa points to a cache address */
1789 BUG_ON(!pblk_addr_in_cache(ppa
));
1790 BUG_ON(pblk_rb_pos_oob(&pblk
->rwb
, pblk_addr_to_cacheline(ppa
)));
1793 pblk_update_map(pblk
, lba
, ppa
);
1796 int pblk_update_map_gc(struct pblk
*pblk
, sector_t lba
, struct ppa_addr ppa
,
1797 struct pblk_line
*gc_line
)
1799 struct ppa_addr l2p_ppa
;
1802 #ifdef CONFIG_NVM_DEBUG
1803 /* Callers must ensure that the ppa points to a cache address */
1804 BUG_ON(!pblk_addr_in_cache(ppa
));
1805 BUG_ON(pblk_rb_pos_oob(&pblk
->rwb
, pblk_addr_to_cacheline(ppa
)));
1808 /* logic error: lba out-of-bounds. Ignore update */
1809 if (!(lba
< pblk
->rl
.nr_secs
)) {
1810 WARN(1, "pblk: corrupted L2P map request\n");
1814 spin_lock(&pblk
->trans_lock
);
1815 l2p_ppa
= pblk_trans_map_get(pblk
, lba
);
1817 /* Prevent updated entries to be overwritten by GC */
1818 if (pblk_addr_in_cache(l2p_ppa
) || pblk_ppa_empty(l2p_ppa
) ||
1819 pblk_tgt_ppa_to_line(l2p_ppa
) != gc_line
->id
) {
1824 pblk_trans_map_set(pblk
, lba
, ppa
);
1826 spin_unlock(&pblk
->trans_lock
);
1830 void pblk_update_map_dev(struct pblk
*pblk
, sector_t lba
, struct ppa_addr ppa
,
1831 struct ppa_addr entry_line
)
1833 struct ppa_addr l2p_line
;
1835 #ifdef CONFIG_NVM_DEBUG
1836 /* Callers must ensure that the ppa points to a device address */
1837 BUG_ON(pblk_addr_in_cache(ppa
));
1839 /* Invalidate and discard padded entries */
1840 if (lba
== ADDR_EMPTY
) {
1841 #ifdef CONFIG_NVM_DEBUG
1842 atomic_long_inc(&pblk
->padded_wb
);
1844 pblk_map_invalidate(pblk
, ppa
);
1848 /* logic error: lba out-of-bounds. Ignore update */
1849 if (!(lba
< pblk
->rl
.nr_secs
)) {
1850 WARN(1, "pblk: corrupted L2P map request\n");
1854 spin_lock(&pblk
->trans_lock
);
1855 l2p_line
= pblk_trans_map_get(pblk
, lba
);
1857 /* Do not update L2P if the cacheline has been updated. In this case,
1858 * the mapped ppa must be invalidated
1860 if (l2p_line
.ppa
!= entry_line
.ppa
) {
1861 if (!pblk_ppa_empty(ppa
))
1862 pblk_map_invalidate(pblk
, ppa
);
1866 #ifdef CONFIG_NVM_DEBUG
1867 WARN_ON(!pblk_addr_in_cache(l2p_line
) && !pblk_ppa_empty(l2p_line
));
1870 pblk_trans_map_set(pblk
, lba
, ppa
);
1872 spin_unlock(&pblk
->trans_lock
);
1875 void pblk_lookup_l2p_seq(struct pblk
*pblk
, struct ppa_addr
*ppas
,
1876 sector_t blba
, int nr_secs
)
1880 spin_lock(&pblk
->trans_lock
);
1881 for (i
= 0; i
< nr_secs
; i
++)
1882 ppas
[i
] = pblk_trans_map_get(pblk
, blba
+ i
);
1883 spin_unlock(&pblk
->trans_lock
);
1886 void pblk_lookup_l2p_rand(struct pblk
*pblk
, struct ppa_addr
*ppas
,
1887 u64
*lba_list
, int nr_secs
)
1892 spin_lock(&pblk
->trans_lock
);
1893 for (i
= 0; i
< nr_secs
; i
++) {
1895 if (lba
== ADDR_EMPTY
) {
1896 ppas
[i
].ppa
= ADDR_EMPTY
;
1898 /* logic error: lba out-of-bounds. Ignore update */
1899 if (!(lba
< pblk
->rl
.nr_secs
)) {
1900 WARN(1, "pblk: corrupted L2P map request\n");
1903 ppas
[i
] = pblk_trans_map_get(pblk
, lba
);
1906 spin_unlock(&pblk
->trans_lock
);