2 * Moving/copying garbage collector
4 * Copyright 2012 Google, Inc.
12 #include <trace/events/bcache.h>
20 static bool moving_pred(struct keybuf
*buf
, struct bkey
*k
)
22 struct cache_set
*c
= container_of(buf
, struct cache_set
,
26 for (i
= 0; i
< KEY_PTRS(k
); i
++) {
27 struct cache
*ca
= PTR_CACHE(c
, k
, i
);
28 struct bucket
*g
= PTR_BUCKET(c
, k
, i
);
30 if (GC_SECTORS_USED(g
) < ca
->gc_move_threshold
)
37 /* Moving GC - IO loop */
39 static void moving_io_destructor(struct closure
*cl
)
41 struct moving_io
*io
= container_of(cl
, struct moving_io
, s
.cl
);
45 static void write_moving_finish(struct closure
*cl
)
47 struct moving_io
*io
= container_of(cl
, struct moving_io
, s
.cl
);
48 struct bio
*bio
= &io
->bio
.bio
;
52 bio_for_each_segment_all(bv
, bio
, i
)
53 __free_page(bv
->bv_page
);
55 if (io
->s
.op
.insert_collision
)
56 trace_bcache_gc_copy_collision(&io
->w
->key
);
58 bch_keybuf_del(&io
->s
.op
.c
->moving_gc_keys
, io
->w
);
60 atomic_dec_bug(&io
->s
.op
.c
->in_flight
);
61 closure_wake_up(&io
->s
.op
.c
->moving_gc_wait
);
63 closure_return_with_destructor(cl
, moving_io_destructor
);
66 static void read_moving_endio(struct bio
*bio
, int error
)
68 struct moving_io
*io
= container_of(bio
->bi_private
,
69 struct moving_io
, s
.cl
);
74 bch_bbio_endio(io
->s
.op
.c
, bio
, error
, "reading data to move");
77 static void moving_init(struct moving_io
*io
)
79 struct bio
*bio
= &io
->bio
.bio
;
83 bio_set_prio(bio
, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE
, 0));
85 bio
->bi_size
= KEY_SIZE(&io
->w
->key
) << 9;
86 bio
->bi_max_vecs
= DIV_ROUND_UP(KEY_SIZE(&io
->w
->key
),
88 bio
->bi_private
= &io
->s
.cl
;
89 bio
->bi_io_vec
= bio
->bi_inline_vecs
;
90 bch_bio_map(bio
, NULL
);
93 static void write_moving(struct closure
*cl
)
95 struct search
*s
= container_of(cl
, struct search
, cl
);
96 struct moving_io
*io
= container_of(s
, struct moving_io
, s
);
101 io
->bio
.bio
.bi_sector
= KEY_START(&io
->w
->key
);
103 s
->op
.write_prio
= 1;
104 s
->op
.cache_bio
= &io
->bio
.bio
;
106 s
->writeback
= KEY_DIRTY(&io
->w
->key
);
107 s
->op
.csum
= KEY_CSUM(&io
->w
->key
);
109 s
->op
.type
= BTREE_REPLACE
;
110 bkey_copy(&s
->op
.replace
, &io
->w
->key
);
112 closure_init(&s
->op
.cl
, cl
);
113 bch_insert_data(&s
->op
.cl
);
116 continue_at(cl
, write_moving_finish
, NULL
);
119 static void read_moving_submit(struct closure
*cl
)
121 struct search
*s
= container_of(cl
, struct search
, cl
);
122 struct moving_io
*io
= container_of(s
, struct moving_io
, s
);
123 struct bio
*bio
= &io
->bio
.bio
;
125 bch_submit_bbio(bio
, s
->op
.c
, &io
->w
->key
, 0);
127 continue_at(cl
, write_moving
, bch_gc_wq
);
130 static void read_moving(struct closure
*cl
)
132 struct cache_set
*c
= container_of(cl
, struct cache_set
, moving_gc
);
133 struct keybuf_key
*w
;
134 struct moving_io
*io
;
137 /* XXX: if we error, background writeback could stall indefinitely */
139 while (!test_bit(CACHE_SET_STOPPING
, &c
->flags
)) {
140 w
= bch_keybuf_next_rescan(c
, &c
->moving_gc_keys
,
141 &MAX_KEY
, moving_pred
);
145 io
= kzalloc(sizeof(struct moving_io
) + sizeof(struct bio_vec
)
146 * DIV_ROUND_UP(KEY_SIZE(&w
->key
), PAGE_SECTORS
),
153 io
->s
.op
.inode
= KEY_INODE(&w
->key
);
160 bio
->bi_end_io
= read_moving_endio
;
162 if (bio_alloc_pages(bio
, GFP_KERNEL
))
165 trace_bcache_gc_copy(&w
->key
);
167 closure_call(&io
->s
.cl
, read_moving_submit
, NULL
, &c
->gc
.cl
);
169 if (atomic_inc_return(&c
->in_flight
) >= 64) {
170 closure_wait_event(&c
->moving_gc_wait
, cl
,
171 atomic_read(&c
->in_flight
) < 64);
172 continue_at(cl
, read_moving
, bch_gc_wq
);
177 err
: if (!IS_ERR_OR_NULL(w
->private))
180 bch_keybuf_del(&c
->moving_gc_keys
, w
);
186 static bool bucket_cmp(struct bucket
*l
, struct bucket
*r
)
188 return GC_SECTORS_USED(l
) < GC_SECTORS_USED(r
);
191 static unsigned bucket_heap_top(struct cache
*ca
)
193 return GC_SECTORS_USED(heap_peek(&ca
->heap
));
196 void bch_moving_gc(struct closure
*cl
)
198 struct cache_set
*c
= container_of(cl
, struct cache_set
, gc
.cl
);
203 if (!c
->copy_gc_enabled
)
206 mutex_lock(&c
->bucket_lock
);
208 for_each_cache(ca
, c
, i
) {
209 unsigned sectors_to_move
= 0;
210 unsigned reserve_sectors
= ca
->sb
.bucket_size
*
211 min(fifo_used(&ca
->free
), ca
->free
.size
/ 2);
215 for_each_bucket(b
, ca
) {
216 if (!GC_SECTORS_USED(b
))
219 if (!heap_full(&ca
->heap
)) {
220 sectors_to_move
+= GC_SECTORS_USED(b
);
221 heap_add(&ca
->heap
, b
, bucket_cmp
);
222 } else if (bucket_cmp(b
, heap_peek(&ca
->heap
))) {
223 sectors_to_move
-= bucket_heap_top(ca
);
224 sectors_to_move
+= GC_SECTORS_USED(b
);
226 ca
->heap
.data
[0] = b
;
227 heap_sift(&ca
->heap
, 0, bucket_cmp
);
231 while (sectors_to_move
> reserve_sectors
) {
232 heap_pop(&ca
->heap
, b
, bucket_cmp
);
233 sectors_to_move
-= GC_SECTORS_USED(b
);
236 ca
->gc_move_threshold
= bucket_heap_top(ca
);
238 pr_debug("threshold %u", ca
->gc_move_threshold
);
241 mutex_unlock(&c
->bucket_lock
);
243 c
->moving_gc_keys
.last_scanned
= ZERO_KEY
;
245 closure_init(&c
->moving_gc
, cl
);
246 read_moving(&c
->moving_gc
);
251 void bch_moving_init_cache_set(struct cache_set
*c
)
253 bch_keybuf_init(&c
->moving_gc_keys
);