2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * pblk-gc.c - pblk's garbage collector
19 #include <linux/delay.h>
21 static void pblk_gc_free_gc_rq(struct pblk_gc_rq
*gc_rq
)
28 static int pblk_gc_write(struct pblk
*pblk
)
30 struct pblk_gc
*gc
= &pblk
->gc
;
31 struct pblk_gc_rq
*gc_rq
, *tgc_rq
;
34 spin_lock(&gc
->w_lock
);
35 if (list_empty(&gc
->w_list
)) {
36 spin_unlock(&gc
->w_lock
);
40 list_cut_position(&w_list
, &gc
->w_list
, gc
->w_list
.prev
);
42 spin_unlock(&gc
->w_lock
);
44 list_for_each_entry_safe(gc_rq
, tgc_rq
, &w_list
, list
) {
45 pblk_write_gc_to_cache(pblk
, gc_rq
);
46 list_del(&gc_rq
->list
);
47 kref_put(&gc_rq
->line
->ref
, pblk_line_put
);
48 pblk_gc_free_gc_rq(gc_rq
);
54 static void pblk_gc_writer_kick(struct pblk_gc
*gc
)
56 wake_up_process(gc
->gc_writer_ts
);
59 static void pblk_put_line_back(struct pblk
*pblk
, struct pblk_line
*line
)
61 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
62 struct list_head
*move_list
;
64 spin_lock(&line
->lock
);
65 WARN_ON(line
->state
!= PBLK_LINESTATE_GC
);
66 line
->state
= PBLK_LINESTATE_CLOSED
;
67 move_list
= pblk_line_gc_list(pblk
, line
);
68 spin_unlock(&line
->lock
);
71 spin_lock(&l_mg
->gc_lock
);
72 list_add_tail(&line
->list
, move_list
);
73 spin_unlock(&l_mg
->gc_lock
);
77 static void pblk_gc_line_ws(struct work_struct
*work
)
79 struct pblk_line_ws
*gc_rq_ws
= container_of(work
,
80 struct pblk_line_ws
, ws
);
81 struct pblk
*pblk
= gc_rq_ws
->pblk
;
82 struct nvm_tgt_dev
*dev
= pblk
->dev
;
83 struct nvm_geo
*geo
= &dev
->geo
;
84 struct pblk_gc
*gc
= &pblk
->gc
;
85 struct pblk_line
*line
= gc_rq_ws
->line
;
86 struct pblk_gc_rq
*gc_rq
= gc_rq_ws
->priv
;
91 gc_rq
->data
= vmalloc(gc_rq
->nr_secs
* geo
->sec_size
);
93 pr_err("pblk: could not GC line:%d (%d/%d)\n",
94 line
->id
, *line
->vsc
, gc_rq
->nr_secs
);
98 /* Read from GC victim block */
99 ret
= pblk_submit_read_gc(pblk
, gc_rq
);
101 pr_err("pblk: failed GC read in line:%d (err:%d)\n",
106 if (!gc_rq
->secs_to_gc
)
110 spin_lock(&gc
->w_lock
);
111 if (gc
->w_entries
>= PBLK_GC_RQ_QD
) {
112 spin_unlock(&gc
->w_lock
);
113 pblk_gc_writer_kick(&pblk
->gc
);
114 usleep_range(128, 256);
118 list_add_tail(&gc_rq
->list
, &gc
->w_list
);
119 spin_unlock(&gc
->w_lock
);
121 pblk_gc_writer_kick(&pblk
->gc
);
127 pblk_gc_free_gc_rq(gc_rq
);
128 kref_put(&line
->ref
, pblk_line_put
);
132 static void pblk_gc_line_prepare_ws(struct work_struct
*work
)
134 struct pblk_line_ws
*line_ws
= container_of(work
, struct pblk_line_ws
,
136 struct pblk
*pblk
= line_ws
->pblk
;
137 struct pblk_line
*line
= line_ws
->line
;
138 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
139 struct pblk_line_meta
*lm
= &pblk
->lm
;
140 struct pblk_gc
*gc
= &pblk
->gc
;
141 struct line_emeta
*emeta_buf
;
142 struct pblk_line_ws
*gc_rq_ws
;
143 struct pblk_gc_rq
*gc_rq
;
145 unsigned long *invalid_bitmap
;
146 int sec_left
, nr_secs
, bit
;
149 invalid_bitmap
= kmalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
150 if (!invalid_bitmap
) {
151 pr_err("pblk: could not allocate GC invalid bitmap\n");
155 emeta_buf
= pblk_malloc(lm
->emeta_len
[0], l_mg
->emeta_alloc_type
,
158 pr_err("pblk: cannot use GC emeta\n");
159 goto fail_free_bitmap
;
162 ret
= pblk_line_read_emeta(pblk
, line
, emeta_buf
);
164 pr_err("pblk: line %d read emeta failed (%d)\n", line
->id
, ret
);
165 goto fail_free_emeta
;
168 /* If this read fails, it means that emeta is corrupted. For now, leave
169 * the line untouched. TODO: Implement a recovery routine that scans and
170 * moves all sectors on the line.
173 ret
= pblk_recov_check_emeta(pblk
, emeta_buf
);
175 pr_err("pblk: inconsistent emeta (line %d)\n", line
->id
);
176 goto fail_free_emeta
;
179 lba_list
= emeta_to_lbas(pblk
, emeta_buf
);
181 pr_err("pblk: could not interpret emeta (line %d)\n", line
->id
);
182 goto fail_free_emeta
;
185 spin_lock(&line
->lock
);
186 bitmap_copy(invalid_bitmap
, line
->invalid_bitmap
, lm
->sec_per_line
);
187 sec_left
= pblk_line_vsc(line
);
188 spin_unlock(&line
->lock
);
191 pr_err("pblk: corrupted GC line (%d)\n", line
->id
);
192 goto fail_free_emeta
;
197 gc_rq
= kmalloc(sizeof(struct pblk_gc_rq
), GFP_KERNEL
);
199 goto fail_free_emeta
;
203 bit
= find_next_zero_bit(invalid_bitmap
, lm
->sec_per_line
,
205 if (bit
> line
->emeta_ssec
)
208 gc_rq
->paddr_list
[nr_secs
] = bit
;
209 gc_rq
->lba_list
[nr_secs
++] = le64_to_cpu(lba_list
[bit
]);
210 } while (nr_secs
< pblk
->max_write_pgs
);
212 if (unlikely(!nr_secs
)) {
217 gc_rq
->nr_secs
= nr_secs
;
220 gc_rq_ws
= kmalloc(sizeof(struct pblk_line_ws
), GFP_KERNEL
);
222 goto fail_free_gc_rq
;
224 gc_rq_ws
->pblk
= pblk
;
225 gc_rq_ws
->line
= line
;
226 gc_rq_ws
->priv
= gc_rq
;
228 /* The write GC path can be much slower than the read GC one due to
229 * the budget imposed by the rate-limiter. Balance in case that we get
230 * back pressure from the write GC path.
232 while (down_timeout(&gc
->gc_sem
, msecs_to_jiffies(30000)))
235 kref_get(&line
->ref
);
237 INIT_WORK(&gc_rq_ws
->ws
, pblk_gc_line_ws
);
238 queue_work(gc
->gc_line_reader_wq
, &gc_rq_ws
->ws
);
245 pblk_mfree(emeta_buf
, l_mg
->emeta_alloc_type
);
247 kfree(invalid_bitmap
);
249 kref_put(&line
->ref
, pblk_line_put
);
250 atomic_dec(&gc
->read_inflight_gc
);
257 pblk_mfree(emeta_buf
, l_mg
->emeta_alloc_type
);
259 kfree(invalid_bitmap
);
263 pblk_put_line_back(pblk
, line
);
264 kref_put(&line
->ref
, pblk_line_put
);
265 atomic_dec(&gc
->read_inflight_gc
);
267 pr_err("pblk: Failed to GC line %d\n", line
->id
);
270 static int pblk_gc_line(struct pblk
*pblk
, struct pblk_line
*line
)
272 struct pblk_gc
*gc
= &pblk
->gc
;
273 struct pblk_line_ws
*line_ws
;
275 pr_debug("pblk: line '%d' being reclaimed for GC\n", line
->id
);
277 line_ws
= kmalloc(sizeof(struct pblk_line_ws
), GFP_KERNEL
);
281 line_ws
->pblk
= pblk
;
282 line_ws
->line
= line
;
284 atomic_inc(&gc
->pipeline_gc
);
285 INIT_WORK(&line_ws
->ws
, pblk_gc_line_prepare_ws
);
286 queue_work(gc
->gc_reader_wq
, &line_ws
->ws
);
291 static void pblk_gc_reader_kick(struct pblk_gc
*gc
)
293 wake_up_process(gc
->gc_reader_ts
);
296 static void pblk_gc_kick(struct pblk
*pblk
)
298 struct pblk_gc
*gc
= &pblk
->gc
;
300 pblk_gc_writer_kick(gc
);
301 pblk_gc_reader_kick(gc
);
303 /* If we're shutting down GC, let's not start it up again */
304 if (gc
->gc_enabled
) {
305 wake_up_process(gc
->gc_ts
);
306 mod_timer(&gc
->gc_timer
,
307 jiffies
+ msecs_to_jiffies(GC_TIME_MSECS
));
311 static int pblk_gc_read(struct pblk
*pblk
)
313 struct pblk_gc
*gc
= &pblk
->gc
;
314 struct pblk_line
*line
;
316 spin_lock(&gc
->r_lock
);
317 if (list_empty(&gc
->r_list
)) {
318 spin_unlock(&gc
->r_lock
);
322 line
= list_first_entry(&gc
->r_list
, struct pblk_line
, list
);
323 list_del(&line
->list
);
324 spin_unlock(&gc
->r_lock
);
328 if (pblk_gc_line(pblk
, line
))
329 pr_err("pblk: failed to GC line %d\n", line
->id
);
334 static struct pblk_line
*pblk_gc_get_victim_line(struct pblk
*pblk
,
335 struct list_head
*group_list
)
337 struct pblk_line
*line
, *victim
;
338 int line_vsc
, victim_vsc
;
340 victim
= list_first_entry(group_list
, struct pblk_line
, list
);
341 list_for_each_entry(line
, group_list
, list
) {
342 line_vsc
= le32_to_cpu(*line
->vsc
);
343 victim_vsc
= le32_to_cpu(*victim
->vsc
);
344 if (line_vsc
< victim_vsc
)
351 static bool pblk_gc_should_run(struct pblk_gc
*gc
, struct pblk_rl
*rl
)
353 unsigned int nr_blocks_free
, nr_blocks_need
;
355 nr_blocks_need
= pblk_rl_high_thrs(rl
);
356 nr_blocks_free
= pblk_rl_nr_free_blks(rl
);
358 /* This is not critical, no need to take lock here */
359 return ((gc
->gc_active
) && (nr_blocks_need
> nr_blocks_free
));
362 void pblk_gc_free_full_lines(struct pblk
*pblk
)
364 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
365 struct pblk_gc
*gc
= &pblk
->gc
;
366 struct pblk_line
*line
;
369 spin_lock(&l_mg
->gc_lock
);
370 if (list_empty(&l_mg
->gc_full_list
)) {
371 spin_unlock(&l_mg
->gc_lock
);
375 line
= list_first_entry(&l_mg
->gc_full_list
,
376 struct pblk_line
, list
);
378 spin_lock(&line
->lock
);
379 WARN_ON(line
->state
!= PBLK_LINESTATE_CLOSED
);
380 line
->state
= PBLK_LINESTATE_GC
;
381 spin_unlock(&line
->lock
);
383 list_del(&line
->list
);
384 spin_unlock(&l_mg
->gc_lock
);
386 atomic_inc(&gc
->pipeline_gc
);
387 kref_put(&line
->ref
, pblk_line_put
);
392 * Lines with no valid sectors will be returned to the free list immediately. If
393 * GC is activated - either because the free block count is under the determined
394 * threshold, or because it is being forced from user space - only lines with a
395 * high count of invalid sectors will be recycled.
397 static void pblk_gc_run(struct pblk
*pblk
)
399 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
400 struct pblk_gc
*gc
= &pblk
->gc
;
401 struct pblk_line
*line
;
402 struct list_head
*group_list
;
404 int read_inflight_gc
, gc_group
= 0, prev_group
= 0;
406 pblk_gc_free_full_lines(pblk
);
408 run_gc
= pblk_gc_should_run(&pblk
->gc
, &pblk
->rl
);
409 if (!run_gc
|| (atomic_read(&gc
->read_inflight_gc
) >= PBLK_GC_L_QD
))
413 group_list
= l_mg
->gc_lists
[gc_group
++];
416 spin_lock(&l_mg
->gc_lock
);
417 if (list_empty(group_list
)) {
418 spin_unlock(&l_mg
->gc_lock
);
422 line
= pblk_gc_get_victim_line(pblk
, group_list
);
424 spin_lock(&line
->lock
);
425 WARN_ON(line
->state
!= PBLK_LINESTATE_CLOSED
);
426 line
->state
= PBLK_LINESTATE_GC
;
427 spin_unlock(&line
->lock
);
429 list_del(&line
->list
);
430 spin_unlock(&l_mg
->gc_lock
);
432 spin_lock(&gc
->r_lock
);
433 list_add_tail(&line
->list
, &gc
->r_list
);
434 spin_unlock(&gc
->r_lock
);
436 read_inflight_gc
= atomic_inc_return(&gc
->read_inflight_gc
);
437 pblk_gc_reader_kick(gc
);
441 /* No need to queue up more GC lines than we can handle */
442 run_gc
= pblk_gc_should_run(&pblk
->gc
, &pblk
->rl
);
443 if (!run_gc
|| read_inflight_gc
>= PBLK_GC_L_QD
)
447 if (!prev_group
&& pblk
->rl
.rb_state
> gc_group
&&
448 gc_group
< PBLK_GC_NR_LISTS
)
452 static void pblk_gc_timer(struct timer_list
*t
)
454 struct pblk
*pblk
= from_timer(pblk
, t
, gc
.gc_timer
);
459 static int pblk_gc_ts(void *data
)
461 struct pblk
*pblk
= data
;
463 while (!kthread_should_stop()) {
465 set_current_state(TASK_INTERRUPTIBLE
);
472 static int pblk_gc_writer_ts(void *data
)
474 struct pblk
*pblk
= data
;
476 while (!kthread_should_stop()) {
477 if (!pblk_gc_write(pblk
))
479 set_current_state(TASK_INTERRUPTIBLE
);
486 static int pblk_gc_reader_ts(void *data
)
488 struct pblk
*pblk
= data
;
489 struct pblk_gc
*gc
= &pblk
->gc
;
491 while (!kthread_should_stop()) {
492 if (!pblk_gc_read(pblk
))
494 set_current_state(TASK_INTERRUPTIBLE
);
498 #ifdef CONFIG_NVM_DEBUG
499 pr_info("pblk: flushing gc pipeline, %d lines left\n",
500 atomic_read(&gc
->pipeline_gc
));
504 if (!atomic_read(&gc
->pipeline_gc
))
513 static void pblk_gc_start(struct pblk
*pblk
)
515 pblk
->gc
.gc_active
= 1;
516 pr_debug("pblk: gc start\n");
519 void pblk_gc_should_start(struct pblk
*pblk
)
521 struct pblk_gc
*gc
= &pblk
->gc
;
523 if (gc
->gc_enabled
&& !gc
->gc_active
) {
529 void pblk_gc_should_stop(struct pblk
*pblk
)
531 struct pblk_gc
*gc
= &pblk
->gc
;
533 if (gc
->gc_active
&& !gc
->gc_forced
)
537 void pblk_gc_should_kick(struct pblk
*pblk
)
539 pblk_rl_update_rates(&pblk
->rl
);
542 void pblk_gc_sysfs_state_show(struct pblk
*pblk
, int *gc_enabled
,
545 struct pblk_gc
*gc
= &pblk
->gc
;
547 spin_lock(&gc
->lock
);
548 *gc_enabled
= gc
->gc_enabled
;
549 *gc_active
= gc
->gc_active
;
550 spin_unlock(&gc
->lock
);
553 int pblk_gc_sysfs_force(struct pblk
*pblk
, int force
)
555 struct pblk_gc
*gc
= &pblk
->gc
;
557 if (force
< 0 || force
> 1)
560 spin_lock(&gc
->lock
);
561 gc
->gc_forced
= force
;
567 spin_unlock(&gc
->lock
);
569 pblk_gc_should_start(pblk
);
574 int pblk_gc_init(struct pblk
*pblk
)
576 struct pblk_gc
*gc
= &pblk
->gc
;
579 gc
->gc_ts
= kthread_create(pblk_gc_ts
, pblk
, "pblk-gc-ts");
580 if (IS_ERR(gc
->gc_ts
)) {
581 pr_err("pblk: could not allocate GC main kthread\n");
582 return PTR_ERR(gc
->gc_ts
);
585 gc
->gc_writer_ts
= kthread_create(pblk_gc_writer_ts
, pblk
,
586 "pblk-gc-writer-ts");
587 if (IS_ERR(gc
->gc_writer_ts
)) {
588 pr_err("pblk: could not allocate GC writer kthread\n");
589 ret
= PTR_ERR(gc
->gc_writer_ts
);
590 goto fail_free_main_kthread
;
593 gc
->gc_reader_ts
= kthread_create(pblk_gc_reader_ts
, pblk
,
594 "pblk-gc-reader-ts");
595 if (IS_ERR(gc
->gc_reader_ts
)) {
596 pr_err("pblk: could not allocate GC reader kthread\n");
597 ret
= PTR_ERR(gc
->gc_reader_ts
);
598 goto fail_free_writer_kthread
;
601 timer_setup(&gc
->gc_timer
, pblk_gc_timer
, 0);
602 mod_timer(&gc
->gc_timer
, jiffies
+ msecs_to_jiffies(GC_TIME_MSECS
));
608 atomic_set(&gc
->read_inflight_gc
, 0);
609 atomic_set(&gc
->pipeline_gc
, 0);
611 /* Workqueue that reads valid sectors from a line and submit them to the
612 * GC writer to be recycled.
614 gc
->gc_line_reader_wq
= alloc_workqueue("pblk-gc-line-reader-wq",
615 WQ_MEM_RECLAIM
| WQ_UNBOUND
, PBLK_GC_MAX_READERS
);
616 if (!gc
->gc_line_reader_wq
) {
617 pr_err("pblk: could not allocate GC line reader workqueue\n");
619 goto fail_free_reader_kthread
;
622 /* Workqueue that prepare lines for GC */
623 gc
->gc_reader_wq
= alloc_workqueue("pblk-gc-line_wq",
624 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 1);
625 if (!gc
->gc_reader_wq
) {
626 pr_err("pblk: could not allocate GC reader workqueue\n");
628 goto fail_free_reader_line_wq
;
631 spin_lock_init(&gc
->lock
);
632 spin_lock_init(&gc
->w_lock
);
633 spin_lock_init(&gc
->r_lock
);
635 sema_init(&gc
->gc_sem
, PBLK_GC_RQ_QD
);
637 INIT_LIST_HEAD(&gc
->w_list
);
638 INIT_LIST_HEAD(&gc
->r_list
);
642 fail_free_reader_line_wq
:
643 destroy_workqueue(gc
->gc_line_reader_wq
);
644 fail_free_reader_kthread
:
645 kthread_stop(gc
->gc_reader_ts
);
646 fail_free_writer_kthread
:
647 kthread_stop(gc
->gc_writer_ts
);
648 fail_free_main_kthread
:
649 kthread_stop(gc
->gc_ts
);
654 void pblk_gc_exit(struct pblk
*pblk
)
656 struct pblk_gc
*gc
= &pblk
->gc
;
659 del_timer_sync(&gc
->gc_timer
);
663 kthread_stop(gc
->gc_ts
);
665 if (gc
->gc_reader_ts
)
666 kthread_stop(gc
->gc_reader_ts
);
668 flush_workqueue(gc
->gc_reader_wq
);
669 if (gc
->gc_reader_wq
)
670 destroy_workqueue(gc
->gc_reader_wq
);
672 flush_workqueue(gc
->gc_line_reader_wq
);
673 if (gc
->gc_line_reader_wq
)
674 destroy_workqueue(gc
->gc_line_reader_wq
);
676 if (gc
->gc_writer_ts
)
677 kthread_stop(gc
->gc_writer_ts
);