2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * pblk-gc.c - pblk's garbage collector
19 #include <linux/delay.h>
21 static void pblk_gc_free_gc_rq(struct pblk_gc_rq
*gc_rq
)
27 static int pblk_gc_write(struct pblk
*pblk
)
29 struct pblk_gc
*gc
= &pblk
->gc
;
30 struct pblk_gc_rq
*gc_rq
, *tgc_rq
;
33 spin_lock(&gc
->w_lock
);
34 if (list_empty(&gc
->w_list
)) {
35 spin_unlock(&gc
->w_lock
);
39 list_cut_position(&w_list
, &gc
->w_list
, gc
->w_list
.prev
);
41 spin_unlock(&gc
->w_lock
);
43 list_for_each_entry_safe(gc_rq
, tgc_rq
, &w_list
, list
) {
44 pblk_write_gc_to_cache(pblk
, gc_rq
->data
, gc_rq
->lba_list
,
45 gc_rq
->nr_secs
, gc_rq
->secs_to_gc
,
46 gc_rq
->line
, PBLK_IOTYPE_GC
);
48 list_del(&gc_rq
->list
);
49 kref_put(&gc_rq
->line
->ref
, pblk_line_put
);
50 pblk_gc_free_gc_rq(gc_rq
);
56 static void pblk_gc_writer_kick(struct pblk_gc
*gc
)
58 wake_up_process(gc
->gc_writer_ts
);
62 * Responsible for managing all memory related to a gc request. Also in case of
65 static int pblk_gc_move_valid_secs(struct pblk
*pblk
, struct pblk_gc_rq
*gc_rq
)
67 struct nvm_tgt_dev
*dev
= pblk
->dev
;
68 struct nvm_geo
*geo
= &dev
->geo
;
69 struct pblk_gc
*gc
= &pblk
->gc
;
70 struct pblk_line
*line
= gc_rq
->line
;
72 unsigned int secs_to_gc
;
75 data
= vmalloc(gc_rq
->nr_secs
* geo
->sec_size
);
81 /* Read from GC victim block */
82 if (pblk_submit_read_gc(pblk
, gc_rq
->lba_list
, data
, gc_rq
->nr_secs
,
92 gc_rq
->secs_to_gc
= secs_to_gc
;
95 spin_lock(&gc
->w_lock
);
96 if (gc
->w_entries
>= PBLK_GC_W_QD
) {
97 spin_unlock(&gc
->w_lock
);
98 pblk_gc_writer_kick(&pblk
->gc
);
99 usleep_range(128, 256);
103 list_add_tail(&gc_rq
->list
, &gc
->w_list
);
104 spin_unlock(&gc
->w_lock
);
106 pblk_gc_writer_kick(&pblk
->gc
);
115 kref_put(&line
->ref
, pblk_line_put
);
119 static void pblk_put_line_back(struct pblk
*pblk
, struct pblk_line
*line
)
121 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
122 struct list_head
*move_list
;
124 spin_lock(&line
->lock
);
125 WARN_ON(line
->state
!= PBLK_LINESTATE_GC
);
126 line
->state
= PBLK_LINESTATE_CLOSED
;
127 move_list
= pblk_line_gc_list(pblk
, line
);
128 spin_unlock(&line
->lock
);
131 spin_lock(&l_mg
->gc_lock
);
132 list_add_tail(&line
->list
, move_list
);
133 spin_unlock(&l_mg
->gc_lock
);
137 static void pblk_gc_line_ws(struct work_struct
*work
)
139 struct pblk_line_ws
*line_rq_ws
= container_of(work
,
140 struct pblk_line_ws
, ws
);
141 struct pblk
*pblk
= line_rq_ws
->pblk
;
142 struct pblk_gc
*gc
= &pblk
->gc
;
143 struct pblk_line
*line
= line_rq_ws
->line
;
144 struct pblk_gc_rq
*gc_rq
= line_rq_ws
->priv
;
148 if (pblk_gc_move_valid_secs(pblk
, gc_rq
)) {
149 pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
150 line
->id
, *line
->vsc
,
154 mempool_free(line_rq_ws
, pblk
->line_ws_pool
);
157 static void pblk_gc_line_prepare_ws(struct work_struct
*work
)
159 struct pblk_line_ws
*line_ws
= container_of(work
, struct pblk_line_ws
,
161 struct pblk
*pblk
= line_ws
->pblk
;
162 struct pblk_line
*line
= line_ws
->line
;
163 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
164 struct pblk_line_meta
*lm
= &pblk
->lm
;
165 struct pblk_gc
*gc
= &pblk
->gc
;
166 struct line_emeta
*emeta_buf
;
167 struct pblk_line_ws
*line_rq_ws
;
168 struct pblk_gc_rq
*gc_rq
;
170 int sec_left
, nr_secs
, bit
;
173 emeta_buf
= pblk_malloc(lm
->emeta_len
[0], l_mg
->emeta_alloc_type
,
176 pr_err("pblk: cannot use GC emeta\n");
180 ret
= pblk_line_read_emeta(pblk
, line
, emeta_buf
);
182 pr_err("pblk: line %d read emeta failed (%d)\n", line
->id
, ret
);
183 goto fail_free_emeta
;
186 /* If this read fails, it means that emeta is corrupted. For now, leave
187 * the line untouched. TODO: Implement a recovery routine that scans and
188 * moves all sectors on the line.
190 lba_list
= pblk_recov_get_lba_list(pblk
, emeta_buf
);
192 pr_err("pblk: could not interpret emeta (line %d)\n", line
->id
);
193 goto fail_free_emeta
;
196 sec_left
= pblk_line_vsc(line
);
198 pr_err("pblk: corrupted GC line (%d)\n", line
->id
);
199 goto fail_free_emeta
;
204 gc_rq
= kmalloc(sizeof(struct pblk_gc_rq
), GFP_KERNEL
);
206 goto fail_free_emeta
;
210 bit
= find_next_zero_bit(line
->invalid_bitmap
, lm
->sec_per_line
,
212 if (bit
> line
->emeta_ssec
)
215 gc_rq
->lba_list
[nr_secs
++] = le64_to_cpu(lba_list
[bit
]);
216 } while (nr_secs
< pblk
->max_write_pgs
);
218 if (unlikely(!nr_secs
)) {
223 gc_rq
->nr_secs
= nr_secs
;
226 line_rq_ws
= mempool_alloc(pblk
->line_ws_pool
, GFP_KERNEL
);
228 goto fail_free_gc_rq
;
230 line_rq_ws
->pblk
= pblk
;
231 line_rq_ws
->line
= line
;
232 line_rq_ws
->priv
= gc_rq
;
235 kref_get(&line
->ref
);
237 INIT_WORK(&line_rq_ws
->ws
, pblk_gc_line_ws
);
238 queue_work(gc
->gc_line_reader_wq
, &line_rq_ws
->ws
);
245 pblk_mfree(emeta_buf
, l_mg
->emeta_alloc_type
);
246 mempool_free(line_ws
, pblk
->line_ws_pool
);
248 kref_put(&line
->ref
, pblk_line_put
);
249 atomic_dec(&gc
->inflight_gc
);
256 pblk_mfree(emeta_buf
, l_mg
->emeta_alloc_type
);
257 pblk_put_line_back(pblk
, line
);
258 kref_put(&line
->ref
, pblk_line_put
);
259 mempool_free(line_ws
, pblk
->line_ws_pool
);
260 atomic_dec(&gc
->inflight_gc
);
262 pr_err("pblk: Failed to GC line %d\n", line
->id
);
265 static int pblk_gc_line(struct pblk
*pblk
, struct pblk_line
*line
)
267 struct pblk_gc
*gc
= &pblk
->gc
;
268 struct pblk_line_ws
*line_ws
;
270 pr_debug("pblk: line '%d' being reclaimed for GC\n", line
->id
);
272 line_ws
= mempool_alloc(pblk
->line_ws_pool
, GFP_KERNEL
);
276 line_ws
->pblk
= pblk
;
277 line_ws
->line
= line
;
279 INIT_WORK(&line_ws
->ws
, pblk_gc_line_prepare_ws
);
280 queue_work(gc
->gc_reader_wq
, &line_ws
->ws
);
285 static int pblk_gc_read(struct pblk
*pblk
)
287 struct pblk_gc
*gc
= &pblk
->gc
;
288 struct pblk_line
*line
;
290 spin_lock(&gc
->r_lock
);
291 if (list_empty(&gc
->r_list
)) {
292 spin_unlock(&gc
->r_lock
);
296 line
= list_first_entry(&gc
->r_list
, struct pblk_line
, list
);
297 list_del(&line
->list
);
298 spin_unlock(&gc
->r_lock
);
302 if (pblk_gc_line(pblk
, line
))
303 pr_err("pblk: failed to GC line %d\n", line
->id
);
308 static void pblk_gc_reader_kick(struct pblk_gc
*gc
)
310 wake_up_process(gc
->gc_reader_ts
);
313 static struct pblk_line
*pblk_gc_get_victim_line(struct pblk
*pblk
,
314 struct list_head
*group_list
)
316 struct pblk_line
*line
, *victim
;
317 int line_vsc
, victim_vsc
;
319 victim
= list_first_entry(group_list
, struct pblk_line
, list
);
320 list_for_each_entry(line
, group_list
, list
) {
321 line_vsc
= le32_to_cpu(*line
->vsc
);
322 victim_vsc
= le32_to_cpu(*victim
->vsc
);
323 if (line_vsc
< victim_vsc
)
330 static bool pblk_gc_should_run(struct pblk_gc
*gc
, struct pblk_rl
*rl
)
332 unsigned int nr_blocks_free
, nr_blocks_need
;
334 nr_blocks_need
= pblk_rl_high_thrs(rl
);
335 nr_blocks_free
= pblk_rl_nr_free_blks(rl
);
337 /* This is not critical, no need to take lock here */
338 return ((gc
->gc_active
) && (nr_blocks_need
> nr_blocks_free
));
342 * Lines with no valid sectors will be returned to the free list immediately. If
343 * GC is activated - either because the free block count is under the determined
344 * threshold, or because it is being forced from user space - only lines with a
345 * high count of invalid sectors will be recycled.
347 static void pblk_gc_run(struct pblk
*pblk
)
349 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
350 struct pblk_gc
*gc
= &pblk
->gc
;
351 struct pblk_line
*line
;
352 struct list_head
*group_list
;
354 int inflight_gc
, gc_group
= 0, prev_group
= 0;
357 spin_lock(&l_mg
->gc_lock
);
358 if (list_empty(&l_mg
->gc_full_list
)) {
359 spin_unlock(&l_mg
->gc_lock
);
363 line
= list_first_entry(&l_mg
->gc_full_list
,
364 struct pblk_line
, list
);
366 spin_lock(&line
->lock
);
367 WARN_ON(line
->state
!= PBLK_LINESTATE_CLOSED
);
368 line
->state
= PBLK_LINESTATE_GC
;
369 spin_unlock(&line
->lock
);
371 list_del(&line
->list
);
372 spin_unlock(&l_mg
->gc_lock
);
374 kref_put(&line
->ref
, pblk_line_put
);
377 run_gc
= pblk_gc_should_run(&pblk
->gc
, &pblk
->rl
);
378 if (!run_gc
|| (atomic_read(&gc
->inflight_gc
) >= PBLK_GC_L_QD
))
382 group_list
= l_mg
->gc_lists
[gc_group
++];
385 spin_lock(&l_mg
->gc_lock
);
386 if (list_empty(group_list
)) {
387 spin_unlock(&l_mg
->gc_lock
);
391 line
= pblk_gc_get_victim_line(pblk
, group_list
);
393 spin_lock(&line
->lock
);
394 WARN_ON(line
->state
!= PBLK_LINESTATE_CLOSED
);
395 line
->state
= PBLK_LINESTATE_GC
;
396 spin_unlock(&line
->lock
);
398 list_del(&line
->list
);
399 spin_unlock(&l_mg
->gc_lock
);
401 spin_lock(&gc
->r_lock
);
402 list_add_tail(&line
->list
, &gc
->r_list
);
403 spin_unlock(&gc
->r_lock
);
405 inflight_gc
= atomic_inc_return(&gc
->inflight_gc
);
406 pblk_gc_reader_kick(gc
);
410 /* No need to queue up more GC lines than we can handle */
411 run_gc
= pblk_gc_should_run(&pblk
->gc
, &pblk
->rl
);
412 if (!run_gc
|| inflight_gc
>= PBLK_GC_L_QD
)
416 if (!prev_group
&& pblk
->rl
.rb_state
> gc_group
&&
417 gc_group
< PBLK_GC_NR_LISTS
)
421 void pblk_gc_kick(struct pblk
*pblk
)
423 struct pblk_gc
*gc
= &pblk
->gc
;
425 wake_up_process(gc
->gc_ts
);
426 pblk_gc_writer_kick(gc
);
427 pblk_gc_reader_kick(gc
);
428 mod_timer(&gc
->gc_timer
, jiffies
+ msecs_to_jiffies(GC_TIME_MSECS
));
431 static void pblk_gc_timer(unsigned long data
)
433 struct pblk
*pblk
= (struct pblk
*)data
;
438 static int pblk_gc_ts(void *data
)
440 struct pblk
*pblk
= data
;
442 while (!kthread_should_stop()) {
444 set_current_state(TASK_INTERRUPTIBLE
);
451 static int pblk_gc_writer_ts(void *data
)
453 struct pblk
*pblk
= data
;
455 while (!kthread_should_stop()) {
456 if (!pblk_gc_write(pblk
))
458 set_current_state(TASK_INTERRUPTIBLE
);
465 static int pblk_gc_reader_ts(void *data
)
467 struct pblk
*pblk
= data
;
469 while (!kthread_should_stop()) {
470 if (!pblk_gc_read(pblk
))
472 set_current_state(TASK_INTERRUPTIBLE
);
479 static void pblk_gc_start(struct pblk
*pblk
)
481 pblk
->gc
.gc_active
= 1;
482 pr_debug("pblk: gc start\n");
485 void pblk_gc_should_start(struct pblk
*pblk
)
487 struct pblk_gc
*gc
= &pblk
->gc
;
489 if (gc
->gc_enabled
&& !gc
->gc_active
)
496 * If flush_wq == 1 then no lock should be held by the caller since
497 * flush_workqueue can sleep
499 static void pblk_gc_stop(struct pblk
*pblk
, int flush_wq
)
501 pblk
->gc
.gc_active
= 0;
502 pr_debug("pblk: gc stop\n");
505 void pblk_gc_should_stop(struct pblk
*pblk
)
507 struct pblk_gc
*gc
= &pblk
->gc
;
509 if (gc
->gc_active
&& !gc
->gc_forced
)
510 pblk_gc_stop(pblk
, 0);
513 void pblk_gc_sysfs_state_show(struct pblk
*pblk
, int *gc_enabled
,
516 struct pblk_gc
*gc
= &pblk
->gc
;
518 spin_lock(&gc
->lock
);
519 *gc_enabled
= gc
->gc_enabled
;
520 *gc_active
= gc
->gc_active
;
521 spin_unlock(&gc
->lock
);
524 int pblk_gc_sysfs_force(struct pblk
*pblk
, int force
)
526 struct pblk_gc
*gc
= &pblk
->gc
;
528 if (force
< 0 || force
> 1)
531 spin_lock(&gc
->lock
);
532 gc
->gc_forced
= force
;
538 spin_unlock(&gc
->lock
);
540 pblk_gc_should_start(pblk
);
545 int pblk_gc_init(struct pblk
*pblk
)
547 struct pblk_gc
*gc
= &pblk
->gc
;
550 gc
->gc_ts
= kthread_create(pblk_gc_ts
, pblk
, "pblk-gc-ts");
551 if (IS_ERR(gc
->gc_ts
)) {
552 pr_err("pblk: could not allocate GC main kthread\n");
553 return PTR_ERR(gc
->gc_ts
);
556 gc
->gc_writer_ts
= kthread_create(pblk_gc_writer_ts
, pblk
,
557 "pblk-gc-writer-ts");
558 if (IS_ERR(gc
->gc_writer_ts
)) {
559 pr_err("pblk: could not allocate GC writer kthread\n");
560 ret
= PTR_ERR(gc
->gc_writer_ts
);
561 goto fail_free_main_kthread
;
564 gc
->gc_reader_ts
= kthread_create(pblk_gc_reader_ts
, pblk
,
565 "pblk-gc-reader-ts");
566 if (IS_ERR(gc
->gc_reader_ts
)) {
567 pr_err("pblk: could not allocate GC reader kthread\n");
568 ret
= PTR_ERR(gc
->gc_reader_ts
);
569 goto fail_free_writer_kthread
;
572 setup_timer(&gc
->gc_timer
, pblk_gc_timer
, (unsigned long)pblk
);
573 mod_timer(&gc
->gc_timer
, jiffies
+ msecs_to_jiffies(GC_TIME_MSECS
));
579 atomic_set(&gc
->inflight_gc
, 0);
581 /* Workqueue that reads valid sectors from a line and submit them to the
582 * GC writer to be recycled.
584 gc
->gc_line_reader_wq
= alloc_workqueue("pblk-gc-line-reader-wq",
585 WQ_MEM_RECLAIM
| WQ_UNBOUND
, PBLK_GC_MAX_READERS
);
586 if (!gc
->gc_line_reader_wq
) {
587 pr_err("pblk: could not allocate GC line reader workqueue\n");
589 goto fail_free_reader_kthread
;
592 /* Workqueue that prepare lines for GC */
593 gc
->gc_reader_wq
= alloc_workqueue("pblk-gc-line_wq",
594 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 1);
595 if (!gc
->gc_reader_wq
) {
596 pr_err("pblk: could not allocate GC reader workqueue\n");
598 goto fail_free_reader_line_wq
;
601 spin_lock_init(&gc
->lock
);
602 spin_lock_init(&gc
->w_lock
);
603 spin_lock_init(&gc
->r_lock
);
605 sema_init(&gc
->gc_sem
, 128);
607 INIT_LIST_HEAD(&gc
->w_list
);
608 INIT_LIST_HEAD(&gc
->r_list
);
612 fail_free_reader_line_wq
:
613 destroy_workqueue(gc
->gc_line_reader_wq
);
614 fail_free_reader_kthread
:
615 kthread_stop(gc
->gc_reader_ts
);
616 fail_free_writer_kthread
:
617 kthread_stop(gc
->gc_writer_ts
);
618 fail_free_main_kthread
:
619 kthread_stop(gc
->gc_ts
);
624 void pblk_gc_exit(struct pblk
*pblk
)
626 struct pblk_gc
*gc
= &pblk
->gc
;
628 flush_workqueue(gc
->gc_reader_wq
);
629 flush_workqueue(gc
->gc_line_reader_wq
);
631 del_timer(&gc
->gc_timer
);
632 pblk_gc_stop(pblk
, 1);
635 kthread_stop(gc
->gc_ts
);
637 if (gc
->gc_reader_wq
)
638 destroy_workqueue(gc
->gc_reader_wq
);
640 if (gc
->gc_line_reader_wq
)
641 destroy_workqueue(gc
->gc_line_reader_wq
);
643 if (gc
->gc_writer_ts
)
644 kthread_stop(gc
->gc_writer_ts
);
646 if (gc
->gc_reader_ts
)
647 kthread_stop(gc
->gc_reader_ts
);