1 // SPDX-License-Identifier: GPL-2.0
4 #include "btree_update.h"
11 #include "rebalance.h"
13 #include "subvolume.h"
16 #include <linux/sched/signal.h>
18 static inline unsigned bkey_type_to_indirect(const struct bkey
*k
)
22 return KEY_TYPE_reflink_v
;
23 case KEY_TYPE_inline_data
:
24 return KEY_TYPE_indirect_inline_data
;
30 /* reflink pointers */
32 int bch2_reflink_p_validate(struct bch_fs
*c
, struct bkey_s_c k
,
33 enum bch_validate_flags flags
)
35 struct bkey_s_c_reflink_p p
= bkey_s_c_to_reflink_p(k
);
38 bkey_fsck_err_on(le64_to_cpu(p
.v
->idx
) < le32_to_cpu(p
.v
->front_pad
),
39 c
, reflink_p_front_pad_bad
,
40 "idx < front_pad (%llu < %u)",
41 le64_to_cpu(p
.v
->idx
), le32_to_cpu(p
.v
->front_pad
));
46 void bch2_reflink_p_to_text(struct printbuf
*out
, struct bch_fs
*c
,
49 struct bkey_s_c_reflink_p p
= bkey_s_c_to_reflink_p(k
);
51 prt_printf(out
, "idx %llu front_pad %u back_pad %u",
52 le64_to_cpu(p
.v
->idx
),
53 le32_to_cpu(p
.v
->front_pad
),
54 le32_to_cpu(p
.v
->back_pad
));
57 bool bch2_reflink_p_merge(struct bch_fs
*c
, struct bkey_s _l
, struct bkey_s_c _r
)
59 struct bkey_s_reflink_p l
= bkey_s_to_reflink_p(_l
);
60 struct bkey_s_c_reflink_p r
= bkey_s_c_to_reflink_p(_r
);
63 * Disabled for now, the triggers code needs to be reworked for merging
64 * of reflink pointers to work:
68 if (le64_to_cpu(l
.v
->idx
) + l
.k
->size
!= le64_to_cpu(r
.v
->idx
))
71 bch2_key_resize(l
.k
, l
.k
->size
+ r
.k
->size
);
75 static int trans_trigger_reflink_p_segment(struct btree_trans
*trans
,
76 struct bkey_s_c_reflink_p p
, u64
*idx
,
77 enum btree_iter_update_trigger_flags flags
)
79 struct bch_fs
*c
= trans
->c
;
80 struct btree_iter iter
;
83 int add
= !(flags
& BTREE_TRIGGER_overwrite
) ? 1 : -1;
84 struct printbuf buf
= PRINTBUF
;
87 k
= bch2_bkey_get_mut_noupdate(trans
, &iter
,
88 BTREE_ID_reflink
, POS(0, *idx
),
89 BTREE_ITER_with_updates
);
90 ret
= PTR_ERR_OR_ZERO(k
);
94 refcount
= bkey_refcount(bkey_i_to_s(k
));
96 bch2_bkey_val_to_text(&buf
, c
, p
.s_c
);
97 bch2_trans_inconsistent(trans
,
98 "nonexistent indirect extent at %llu while marking\n %s",
104 if (!*refcount
&& (flags
& BTREE_TRIGGER_overwrite
)) {
105 bch2_bkey_val_to_text(&buf
, c
, p
.s_c
);
106 bch2_trans_inconsistent(trans
,
107 "indirect extent refcount underflow at %llu while marking\n %s",
113 if (flags
& BTREE_TRIGGER_insert
) {
114 struct bch_reflink_p
*v
= (struct bch_reflink_p
*) p
.v
;
117 pad
= max_t(s64
, le32_to_cpu(v
->front_pad
),
118 le64_to_cpu(v
->idx
) - bkey_start_offset(&k
->k
));
119 BUG_ON(pad
> U32_MAX
);
120 v
->front_pad
= cpu_to_le32(pad
);
122 pad
= max_t(s64
, le32_to_cpu(v
->back_pad
),
123 k
->k
.p
.offset
- p
.k
->size
- le64_to_cpu(v
->idx
));
124 BUG_ON(pad
> U32_MAX
);
125 v
->back_pad
= cpu_to_le32(pad
);
128 le64_add_cpu(refcount
, add
);
130 bch2_btree_iter_set_pos_to_extent_start(&iter
);
131 ret
= bch2_trans_update(trans
, &iter
, k
, 0);
135 *idx
= k
->k
.p
.offset
;
137 bch2_trans_iter_exit(trans
, &iter
);
142 static s64
gc_trigger_reflink_p_segment(struct btree_trans
*trans
,
143 struct bkey_s_c_reflink_p p
, u64
*idx
,
144 enum btree_iter_update_trigger_flags flags
,
147 struct bch_fs
*c
= trans
->c
;
148 struct reflink_gc
*r
;
149 int add
= !(flags
& BTREE_TRIGGER_overwrite
) ? 1 : -1;
150 u64 start
= le64_to_cpu(p
.v
->idx
);
151 u64 end
= le64_to_cpu(p
.v
->idx
) + p
.k
->size
;
152 u64 next_idx
= end
+ le32_to_cpu(p
.v
->back_pad
);
154 struct printbuf buf
= PRINTBUF
;
156 if (r_idx
>= c
->reflink_gc_nr
)
159 r
= genradix_ptr(&c
->reflink_gc_table
, r_idx
);
160 next_idx
= min(next_idx
, r
->offset
- r
->size
);
164 BUG_ON((s64
) r
->refcount
+ add
< 0);
166 if (flags
& BTREE_TRIGGER_gc
)
171 BUG_ON(!(flags
& BTREE_TRIGGER_check_repair
));
173 if (fsck_err(trans
, reflink_p_to_missing_reflink_v
,
174 "pointer to missing indirect extent\n"
176 " missing range %llu-%llu",
177 (bch2_bkey_val_to_text(&buf
, c
, p
.s_c
), buf
.buf
),
179 struct bkey_i
*update
= bch2_bkey_make_mut_noupdate(trans
, p
.s_c
);
180 ret
= PTR_ERR_OR_ZERO(update
);
184 if (next_idx
<= start
) {
185 bkey_i_to_reflink_p(update
)->v
.front_pad
= cpu_to_le32(start
- next_idx
);
186 } else if (*idx
>= end
) {
187 bkey_i_to_reflink_p(update
)->v
.back_pad
= cpu_to_le32(*idx
- end
);
189 bkey_error_init(update
);
190 update
->k
.p
= p
.k
->p
;
191 update
->k
.size
= p
.k
->size
;
192 set_bkey_val_u64s(&update
->k
, 0);
195 ret
= bch2_btree_insert_trans(trans
, BTREE_ID_extents
, update
, BTREE_TRIGGER_norun
);
205 static int __trigger_reflink_p(struct btree_trans
*trans
,
206 enum btree_id btree_id
, unsigned level
, struct bkey_s_c k
,
207 enum btree_iter_update_trigger_flags flags
)
209 struct bch_fs
*c
= trans
->c
;
210 struct bkey_s_c_reflink_p p
= bkey_s_c_to_reflink_p(k
);
213 u64 idx
= le64_to_cpu(p
.v
->idx
) - le32_to_cpu(p
.v
->front_pad
);
214 u64 end
= le64_to_cpu(p
.v
->idx
) + p
.k
->size
+ le32_to_cpu(p
.v
->back_pad
);
216 if (flags
& BTREE_TRIGGER_transactional
) {
217 while (idx
< end
&& !ret
)
218 ret
= trans_trigger_reflink_p_segment(trans
, p
, &idx
, flags
);
221 if (flags
& (BTREE_TRIGGER_check_repair
|BTREE_TRIGGER_gc
)) {
222 size_t l
= 0, r
= c
->reflink_gc_nr
;
225 size_t m
= l
+ (r
- l
) / 2;
226 struct reflink_gc
*ref
= genradix_ptr(&c
->reflink_gc_table
, m
);
227 if (ref
->offset
<= idx
)
233 while (idx
< end
&& !ret
)
234 ret
= gc_trigger_reflink_p_segment(trans
, p
, &idx
, flags
, l
++);
240 int bch2_trigger_reflink_p(struct btree_trans
*trans
,
241 enum btree_id btree_id
, unsigned level
,
244 enum btree_iter_update_trigger_flags flags
)
246 if ((flags
& BTREE_TRIGGER_transactional
) &&
247 (flags
& BTREE_TRIGGER_insert
)) {
248 struct bch_reflink_p
*v
= bkey_s_to_reflink_p(new).v
;
250 v
->front_pad
= v
->back_pad
= 0;
253 return trigger_run_overwrite_then_insert(__trigger_reflink_p
, trans
, btree_id
, level
, old
, new, flags
);
256 /* indirect extents */
258 int bch2_reflink_v_validate(struct bch_fs
*c
, struct bkey_s_c k
,
259 enum bch_validate_flags flags
)
261 return bch2_bkey_ptrs_validate(c
, k
, flags
);
264 void bch2_reflink_v_to_text(struct printbuf
*out
, struct bch_fs
*c
,
267 struct bkey_s_c_reflink_v r
= bkey_s_c_to_reflink_v(k
);
269 prt_printf(out
, "refcount: %llu ", le64_to_cpu(r
.v
->refcount
));
271 bch2_bkey_ptrs_to_text(out
, c
, k
);
275 Currently disabled
, needs to be debugged
:
277 bool bch2_reflink_v_merge(struct bch_fs
*c
, struct bkey_s _l
, struct bkey_s_c _r
)
279 struct bkey_s_reflink_v l
= bkey_s_to_reflink_v(_l
);
280 struct bkey_s_c_reflink_v r
= bkey_s_c_to_reflink_v(_r
);
282 return l
.v
->refcount
== r
.v
->refcount
&& bch2_extent_merge(c
, _l
, _r
);
287 check_indirect_extent_deleting(struct bkey_s
new,
288 enum btree_iter_update_trigger_flags
*flags
)
290 if ((*flags
& BTREE_TRIGGER_insert
) && !*bkey_refcount(new)) {
291 new.k
->type
= KEY_TYPE_deleted
;
293 set_bkey_val_u64s(new.k
, 0);
294 *flags
&= ~BTREE_TRIGGER_insert
;
298 int bch2_trigger_reflink_v(struct btree_trans
*trans
,
299 enum btree_id btree_id
, unsigned level
,
300 struct bkey_s_c old
, struct bkey_s
new,
301 enum btree_iter_update_trigger_flags flags
)
303 if ((flags
& BTREE_TRIGGER_transactional
) &&
304 (flags
& BTREE_TRIGGER_insert
))
305 check_indirect_extent_deleting(new, &flags
);
307 return bch2_trigger_extent(trans
, btree_id
, level
, old
, new, flags
);
310 /* indirect inline data */
312 int bch2_indirect_inline_data_validate(struct bch_fs
*c
, struct bkey_s_c k
,
313 enum bch_validate_flags flags
)
318 void bch2_indirect_inline_data_to_text(struct printbuf
*out
,
319 struct bch_fs
*c
, struct bkey_s_c k
)
321 struct bkey_s_c_indirect_inline_data d
= bkey_s_c_to_indirect_inline_data(k
);
322 unsigned datalen
= bkey_inline_data_bytes(k
.k
);
324 prt_printf(out
, "refcount %llu datalen %u: %*phN",
325 le64_to_cpu(d
.v
->refcount
), datalen
,
326 min(datalen
, 32U), d
.v
->data
);
329 int bch2_trigger_indirect_inline_data(struct btree_trans
*trans
,
330 enum btree_id btree_id
, unsigned level
,
331 struct bkey_s_c old
, struct bkey_s
new,
332 enum btree_iter_update_trigger_flags flags
)
334 check_indirect_extent_deleting(new, &flags
);
339 static int bch2_make_extent_indirect(struct btree_trans
*trans
,
340 struct btree_iter
*extent_iter
,
343 struct bch_fs
*c
= trans
->c
;
344 struct btree_iter reflink_iter
= { NULL
};
347 struct bkey_i_reflink_p
*r_p
;
351 if (orig
->k
.type
== KEY_TYPE_inline_data
)
352 bch2_check_set_feature(c
, BCH_FEATURE_reflink_inline_data
);
354 bch2_trans_iter_init(trans
, &reflink_iter
, BTREE_ID_reflink
, POS_MAX
,
356 k
= bch2_btree_iter_peek_prev(&reflink_iter
);
361 r_v
= bch2_trans_kmalloc(trans
, sizeof(__le64
) + bkey_bytes(&orig
->k
));
362 ret
= PTR_ERR_OR_ZERO(r_v
);
367 r_v
->k
.type
= bkey_type_to_indirect(&orig
->k
);
368 r_v
->k
.p
= reflink_iter
.pos
;
369 bch2_key_resize(&r_v
->k
, orig
->k
.size
);
370 r_v
->k
.bversion
= orig
->k
.bversion
;
372 set_bkey_val_bytes(&r_v
->k
, sizeof(__le64
) + bkey_val_bytes(&orig
->k
));
374 refcount
= bkey_refcount(bkey_i_to_s(r_v
));
376 memcpy(refcount
+ 1, &orig
->v
, bkey_val_bytes(&orig
->k
));
378 ret
= bch2_trans_update(trans
, &reflink_iter
, r_v
, 0);
383 * orig is in a bkey_buf which statically allocates 5 64s for the val,
384 * so we know it will be big enough:
386 orig
->k
.type
= KEY_TYPE_reflink_p
;
387 r_p
= bkey_i_to_reflink_p(orig
);
388 set_bkey_val_bytes(&r_p
->k
, sizeof(r_p
->v
));
390 /* FORTIFY_SOURCE is broken here, and doesn't provide unsafe_memset() */
391 #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
392 __underlying_memset(&r_p
->v
, 0, sizeof(r_p
->v
));
394 memset(&r_p
->v
, 0, sizeof(r_p
->v
));
397 r_p
->v
.idx
= cpu_to_le64(bkey_start_offset(&r_v
->k
));
399 ret
= bch2_trans_update(trans
, extent_iter
, &r_p
->k_i
,
400 BTREE_UPDATE_internal_snapshot_node
);
402 bch2_trans_iter_exit(trans
, &reflink_iter
);
407 static struct bkey_s_c
get_next_src(struct btree_iter
*iter
, struct bpos end
)
412 for_each_btree_key_upto_continue_norestart(*iter
, end
, 0, k
, ret
) {
413 if (bkey_extent_is_unwritten(k
))
416 if (bkey_extent_is_data(k
.k
))
420 if (bkey_ge(iter
->pos
, end
))
421 bch2_btree_iter_set_pos(iter
, end
);
422 return ret
? bkey_s_c_err(ret
) : bkey_s_c_null
;
425 s64
bch2_remap_range(struct bch_fs
*c
,
426 subvol_inum dst_inum
, u64 dst_offset
,
427 subvol_inum src_inum
, u64 src_offset
,
429 u64 new_i_size
, s64
*i_sectors_delta
)
431 struct btree_trans
*trans
;
432 struct btree_iter dst_iter
, src_iter
;
433 struct bkey_s_c src_k
;
434 struct bkey_buf new_dst
, new_src
;
435 struct bpos dst_start
= POS(dst_inum
.inum
, dst_offset
);
436 struct bpos src_start
= POS(src_inum
.inum
, src_offset
);
437 struct bpos dst_end
= dst_start
, src_end
= src_start
;
438 struct bch_io_opts opts
;
439 struct bpos src_want
;
441 u32 dst_snapshot
, src_snapshot
;
442 int ret
= 0, ret2
= 0;
444 if (!bch2_write_ref_tryget(c
, BCH_WRITE_REF_reflink
))
445 return -BCH_ERR_erofs_no_writes
;
447 bch2_check_set_feature(c
, BCH_FEATURE_reflink
);
449 dst_end
.offset
+= remap_sectors
;
450 src_end
.offset
+= remap_sectors
;
452 bch2_bkey_buf_init(&new_dst
);
453 bch2_bkey_buf_init(&new_src
);
454 trans
= bch2_trans_get(c
);
456 ret
= bch2_inum_opts_get(trans
, src_inum
, &opts
);
460 bch2_trans_iter_init(trans
, &src_iter
, BTREE_ID_extents
, src_start
,
462 bch2_trans_iter_init(trans
, &dst_iter
, BTREE_ID_extents
, dst_start
,
466 bch2_err_matches(ret
, BCH_ERR_transaction_restart
)) &&
467 bkey_lt(dst_iter
.pos
, dst_end
)) {
468 struct disk_reservation disk_res
= { 0 };
470 bch2_trans_begin(trans
);
472 if (fatal_signal_pending(current
)) {
477 ret
= bch2_subvolume_get_snapshot(trans
, src_inum
.subvol
,
482 bch2_btree_iter_set_snapshot(&src_iter
, src_snapshot
);
484 ret
= bch2_subvolume_get_snapshot(trans
, dst_inum
.subvol
,
489 bch2_btree_iter_set_snapshot(&dst_iter
, dst_snapshot
);
491 if (dst_inum
.inum
< src_inum
.inum
) {
492 /* Avoid some lock cycle transaction restarts */
493 ret
= bch2_btree_iter_traverse(&dst_iter
);
498 dst_done
= dst_iter
.pos
.offset
- dst_start
.offset
;
499 src_want
= POS(src_start
.inode
, src_start
.offset
+ dst_done
);
500 bch2_btree_iter_set_pos(&src_iter
, src_want
);
502 src_k
= get_next_src(&src_iter
, src_end
);
503 ret
= bkey_err(src_k
);
507 if (bkey_lt(src_want
, src_iter
.pos
)) {
508 ret
= bch2_fpunch_at(trans
, &dst_iter
, dst_inum
,
510 dst_iter
.pos
.offset
+
511 src_iter
.pos
.offset
- src_want
.offset
),
516 if (src_k
.k
->type
!= KEY_TYPE_reflink_p
) {
517 bch2_btree_iter_set_pos_to_extent_start(&src_iter
);
519 bch2_bkey_buf_reassemble(&new_src
, c
, src_k
);
520 src_k
= bkey_i_to_s_c(new_src
.k
);
522 ret
= bch2_make_extent_indirect(trans
, &src_iter
,
527 BUG_ON(src_k
.k
->type
!= KEY_TYPE_reflink_p
);
530 if (src_k
.k
->type
== KEY_TYPE_reflink_p
) {
531 struct bkey_s_c_reflink_p src_p
=
532 bkey_s_c_to_reflink_p(src_k
);
533 struct bkey_i_reflink_p
*dst_p
=
534 bkey_reflink_p_init(new_dst
.k
);
536 u64 offset
= le64_to_cpu(src_p
.v
->idx
) +
538 bkey_start_offset(src_k
.k
));
540 dst_p
->v
.idx
= cpu_to_le64(offset
);
545 new_dst
.k
->k
.p
= dst_iter
.pos
;
546 bch2_key_resize(&new_dst
.k
->k
,
547 min(src_k
.k
->p
.offset
- src_want
.offset
,
548 dst_end
.offset
- dst_iter
.pos
.offset
));
550 ret
= bch2_bkey_set_needs_rebalance(c
, new_dst
.k
, &opts
) ?:
551 bch2_extent_update(trans
, dst_inum
, &dst_iter
,
552 new_dst
.k
, &disk_res
,
553 new_i_size
, i_sectors_delta
,
555 bch2_disk_reservation_put(c
, &disk_res
);
557 bch2_trans_iter_exit(trans
, &dst_iter
);
558 bch2_trans_iter_exit(trans
, &src_iter
);
560 BUG_ON(!ret
&& !bkey_eq(dst_iter
.pos
, dst_end
));
561 BUG_ON(bkey_gt(dst_iter
.pos
, dst_end
));
563 dst_done
= dst_iter
.pos
.offset
- dst_start
.offset
;
564 new_i_size
= min(dst_iter
.pos
.offset
<< 9, new_i_size
);
567 struct bch_inode_unpacked inode_u
;
568 struct btree_iter inode_iter
= { NULL
};
570 bch2_trans_begin(trans
);
572 ret2
= bch2_inode_peek(trans
, &inode_iter
, &inode_u
,
573 dst_inum
, BTREE_ITER_intent
);
576 inode_u
.bi_size
< new_i_size
) {
577 inode_u
.bi_size
= new_i_size
;
578 ret2
= bch2_inode_write(trans
, &inode_iter
, &inode_u
) ?:
579 bch2_trans_commit(trans
, NULL
, NULL
,
580 BCH_TRANS_COMMIT_no_enospc
);
583 bch2_trans_iter_exit(trans
, &inode_iter
);
584 } while (bch2_err_matches(ret2
, BCH_ERR_transaction_restart
));
586 bch2_trans_put(trans
);
587 bch2_bkey_buf_exit(&new_src
, c
);
588 bch2_bkey_buf_exit(&new_dst
, c
);
590 bch2_write_ref_put(c
, BCH_WRITE_REF_reflink
);
592 return dst_done
?: ret
?: ret2
;