1 // SPDX-License-Identifier: GPL-2.0
5 #include "bkey_methods.h"
6 #include "btree_update.h"
12 #include "subvolume.h"
14 #include <linux/dcache.h>
16 static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d
)
18 if (bkey_val_bytes(d
.k
) < offsetof(struct bch_dirent
, d_name
))
21 unsigned bkey_u64s
= bkey_val_u64s(d
.k
);
22 unsigned bkey_bytes
= bkey_u64s
* sizeof(u64
);
23 u64 last_u64
= ((u64
*)d
.v
)[bkey_u64s
- 1];
25 unsigned trailing_nuls
= last_u64
? __builtin_ctzll(last_u64
) / 8 : 64 / 8;
27 unsigned trailing_nuls
= last_u64
? __builtin_clzll(last_u64
) / 8 : 64 / 8;
31 offsetof(struct bch_dirent
, d_name
) -
35 struct qstr
bch2_dirent_get_name(struct bkey_s_c_dirent d
)
37 return (struct qstr
) QSTR_INIT(d
.v
->d_name
, bch2_dirent_name_bytes(d
));
40 static u64
bch2_dirent_hash(const struct bch_hash_info
*info
,
41 const struct qstr
*name
)
43 struct bch_str_hash_ctx ctx
;
45 bch2_str_hash_init(&ctx
, info
);
46 bch2_str_hash_update(&ctx
, info
, name
->name
, name
->len
);
48 /* [0,2) reserved for dots */
49 return max_t(u64
, bch2_str_hash_end(&ctx
, info
), 2);
52 static u64
dirent_hash_key(const struct bch_hash_info
*info
, const void *key
)
54 return bch2_dirent_hash(info
, key
);
57 static u64
dirent_hash_bkey(const struct bch_hash_info
*info
, struct bkey_s_c k
)
59 struct bkey_s_c_dirent d
= bkey_s_c_to_dirent(k
);
60 struct qstr name
= bch2_dirent_get_name(d
);
62 return bch2_dirent_hash(info
, &name
);
65 static bool dirent_cmp_key(struct bkey_s_c _l
, const void *_r
)
67 struct bkey_s_c_dirent l
= bkey_s_c_to_dirent(_l
);
68 const struct qstr l_name
= bch2_dirent_get_name(l
);
69 const struct qstr
*r_name
= _r
;
71 return !qstr_eq(l_name
, *r_name
);
74 static bool dirent_cmp_bkey(struct bkey_s_c _l
, struct bkey_s_c _r
)
76 struct bkey_s_c_dirent l
= bkey_s_c_to_dirent(_l
);
77 struct bkey_s_c_dirent r
= bkey_s_c_to_dirent(_r
);
78 const struct qstr l_name
= bch2_dirent_get_name(l
);
79 const struct qstr r_name
= bch2_dirent_get_name(r
);
81 return !qstr_eq(l_name
, r_name
);
84 static bool dirent_is_visible(subvol_inum inum
, struct bkey_s_c k
)
86 struct bkey_s_c_dirent d
= bkey_s_c_to_dirent(k
);
88 if (d
.v
->d_type
== DT_SUBVOL
)
89 return le32_to_cpu(d
.v
->d_parent_subvol
) == inum
.subvol
;
93 const struct bch_hash_desc bch2_dirent_hash_desc
= {
94 .btree_id
= BTREE_ID_dirents
,
95 .key_type
= KEY_TYPE_dirent
,
96 .hash_key
= dirent_hash_key
,
97 .hash_bkey
= dirent_hash_bkey
,
98 .cmp_key
= dirent_cmp_key
,
99 .cmp_bkey
= dirent_cmp_bkey
,
100 .is_visible
= dirent_is_visible
,
103 int bch2_dirent_validate(struct bch_fs
*c
, struct bkey_s_c k
,
104 enum bch_validate_flags flags
)
106 struct bkey_s_c_dirent d
= bkey_s_c_to_dirent(k
);
107 struct qstr d_name
= bch2_dirent_get_name(d
);
110 bkey_fsck_err_on(!d_name
.len
,
111 c
, dirent_empty_name
,
114 bkey_fsck_err_on(bkey_val_u64s(k
.k
) > dirent_val_u64s(d_name
.len
),
115 c
, dirent_val_too_big
,
116 "value too big (%zu > %u)",
117 bkey_val_u64s(k
.k
), dirent_val_u64s(d_name
.len
));
120 * Check new keys don't exceed the max length
121 * (older keys may be larger.)
123 bkey_fsck_err_on((flags
& BCH_VALIDATE_commit
) && d_name
.len
> BCH_NAME_MAX
,
124 c
, dirent_name_too_long
,
125 "dirent name too big (%u > %u)",
126 d_name
.len
, BCH_NAME_MAX
);
128 bkey_fsck_err_on(d_name
.len
!= strnlen(d_name
.name
, d_name
.len
),
129 c
, dirent_name_embedded_nul
,
130 "dirent has stray data after name's NUL");
132 bkey_fsck_err_on((d_name
.len
== 1 && !memcmp(d_name
.name
, ".", 1)) ||
133 (d_name
.len
== 2 && !memcmp(d_name
.name
, "..", 2)),
134 c
, dirent_name_dot_or_dotdot
,
137 bkey_fsck_err_on(memchr(d_name
.name
, '/', d_name
.len
),
138 c
, dirent_name_has_slash
,
141 bkey_fsck_err_on(d
.v
->d_type
!= DT_SUBVOL
&&
142 le64_to_cpu(d
.v
->d_inum
) == d
.k
->p
.inode
,
144 "dirent points to own directory");
149 void bch2_dirent_to_text(struct printbuf
*out
, struct bch_fs
*c
, struct bkey_s_c k
)
151 struct bkey_s_c_dirent d
= bkey_s_c_to_dirent(k
);
152 struct qstr d_name
= bch2_dirent_get_name(d
);
154 prt_printf(out
, "%.*s -> ", d_name
.len
, d_name
.name
);
156 if (d
.v
->d_type
!= DT_SUBVOL
)
157 prt_printf(out
, "%llu", le64_to_cpu(d
.v
->d_inum
));
159 prt_printf(out
, "%u -> %u",
160 le32_to_cpu(d
.v
->d_parent_subvol
),
161 le32_to_cpu(d
.v
->d_child_subvol
));
163 prt_printf(out
, " type %s", bch2_d_type_str(d
.v
->d_type
));
166 static struct bkey_i_dirent
*dirent_create_key(struct btree_trans
*trans
,
167 subvol_inum dir
, u8 type
,
168 const struct qstr
*name
, u64 dst
)
170 struct bkey_i_dirent
*dirent
;
171 unsigned u64s
= BKEY_U64s
+ dirent_val_u64s(name
->len
);
173 if (name
->len
> BCH_NAME_MAX
)
174 return ERR_PTR(-ENAMETOOLONG
);
176 BUG_ON(u64s
> U8_MAX
);
178 dirent
= bch2_trans_kmalloc(trans
, u64s
* sizeof(u64
));
182 bkey_dirent_init(&dirent
->k_i
);
183 dirent
->k
.u64s
= u64s
;
185 if (type
!= DT_SUBVOL
) {
186 dirent
->v
.d_inum
= cpu_to_le64(dst
);
188 dirent
->v
.d_parent_subvol
= cpu_to_le32(dir
.subvol
);
189 dirent
->v
.d_child_subvol
= cpu_to_le32(dst
);
192 dirent
->v
.d_type
= type
;
194 memcpy(dirent
->v
.d_name
, name
->name
, name
->len
);
195 memset(dirent
->v
.d_name
+ name
->len
, 0,
196 bkey_val_bytes(&dirent
->k
) -
197 offsetof(struct bch_dirent
, d_name
) -
200 EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent
)) != name
->len
);
205 int bch2_dirent_create_snapshot(struct btree_trans
*trans
,
206 u32 dir_subvol
, u64 dir
, u32 snapshot
,
207 const struct bch_hash_info
*hash_info
,
208 u8 type
, const struct qstr
*name
, u64 dst_inum
,
210 enum btree_iter_update_trigger_flags flags
)
212 subvol_inum dir_inum
= { .subvol
= dir_subvol
, .inum
= dir
};
213 struct bkey_i_dirent
*dirent
;
216 dirent
= dirent_create_key(trans
, dir_inum
, type
, name
, dst_inum
);
217 ret
= PTR_ERR_OR_ZERO(dirent
);
221 dirent
->k
.p
.inode
= dir
;
222 dirent
->k
.p
.snapshot
= snapshot
;
224 ret
= bch2_hash_set_in_snapshot(trans
, bch2_dirent_hash_desc
, hash_info
,
225 dir_inum
, snapshot
, &dirent
->k_i
,
226 flags
|BTREE_UPDATE_internal_snapshot_node
);
227 *dir_offset
= dirent
->k
.p
.offset
;
232 int bch2_dirent_create(struct btree_trans
*trans
, subvol_inum dir
,
233 const struct bch_hash_info
*hash_info
,
234 u8 type
, const struct qstr
*name
, u64 dst_inum
,
236 enum btree_iter_update_trigger_flags flags
)
238 struct bkey_i_dirent
*dirent
;
241 dirent
= dirent_create_key(trans
, dir
, type
, name
, dst_inum
);
242 ret
= PTR_ERR_OR_ZERO(dirent
);
246 ret
= bch2_hash_set(trans
, bch2_dirent_hash_desc
, hash_info
,
247 dir
, &dirent
->k_i
, flags
);
248 *dir_offset
= dirent
->k
.p
.offset
;
253 int bch2_dirent_read_target(struct btree_trans
*trans
, subvol_inum dir
,
254 struct bkey_s_c_dirent d
, subvol_inum
*target
)
256 struct bch_subvolume s
;
259 if (d
.v
->d_type
== DT_SUBVOL
&&
260 le32_to_cpu(d
.v
->d_parent_subvol
) != dir
.subvol
)
263 if (likely(d
.v
->d_type
!= DT_SUBVOL
)) {
264 target
->subvol
= dir
.subvol
;
265 target
->inum
= le64_to_cpu(d
.v
->d_inum
);
267 target
->subvol
= le32_to_cpu(d
.v
->d_child_subvol
);
269 ret
= bch2_subvolume_get(trans
, target
->subvol
, true, BTREE_ITER_cached
, &s
);
271 target
->inum
= le64_to_cpu(s
.inode
);
277 int bch2_dirent_rename(struct btree_trans
*trans
,
278 subvol_inum src_dir
, struct bch_hash_info
*src_hash
,
279 subvol_inum dst_dir
, struct bch_hash_info
*dst_hash
,
280 const struct qstr
*src_name
, subvol_inum
*src_inum
, u64
*src_offset
,
281 const struct qstr
*dst_name
, subvol_inum
*dst_inum
, u64
*dst_offset
,
282 enum bch_rename_mode mode
)
284 struct btree_iter src_iter
= { NULL
};
285 struct btree_iter dst_iter
= { NULL
};
286 struct bkey_s_c old_src
, old_dst
= bkey_s_c_null
;
287 struct bkey_i_dirent
*new_src
= NULL
, *new_dst
= NULL
;
288 struct bpos dst_pos
=
289 POS(dst_dir
.inum
, bch2_dirent_hash(dst_hash
, dst_name
));
290 unsigned src_update_flags
= 0;
291 bool delete_src
, delete_dst
;
294 memset(src_inum
, 0, sizeof(*src_inum
));
295 memset(dst_inum
, 0, sizeof(*dst_inum
));
298 old_src
= bch2_hash_lookup(trans
, &src_iter
, bch2_dirent_hash_desc
,
299 src_hash
, src_dir
, src_name
,
301 ret
= bkey_err(old_src
);
305 ret
= bch2_dirent_read_target(trans
, src_dir
,
306 bkey_s_c_to_dirent(old_src
), src_inum
);
311 if (mode
== BCH_RENAME
) {
313 * Note that we're _not_ checking if the target already exists -
314 * we're relying on the VFS to do that check for us for
317 ret
= bch2_hash_hole(trans
, &dst_iter
, bch2_dirent_hash_desc
,
318 dst_hash
, dst_dir
, dst_name
);
322 old_dst
= bch2_hash_lookup(trans
, &dst_iter
, bch2_dirent_hash_desc
,
323 dst_hash
, dst_dir
, dst_name
,
325 ret
= bkey_err(old_dst
);
329 ret
= bch2_dirent_read_target(trans
, dst_dir
,
330 bkey_s_c_to_dirent(old_dst
), dst_inum
);
335 if (mode
!= BCH_RENAME_EXCHANGE
)
336 *src_offset
= dst_iter
.pos
.offset
;
338 /* Create new dst key: */
339 new_dst
= dirent_create_key(trans
, dst_dir
, 0, dst_name
, 0);
340 ret
= PTR_ERR_OR_ZERO(new_dst
);
344 dirent_copy_target(new_dst
, bkey_s_c_to_dirent(old_src
));
345 new_dst
->k
.p
= dst_iter
.pos
;
347 /* Create new src key: */
348 if (mode
== BCH_RENAME_EXCHANGE
) {
349 new_src
= dirent_create_key(trans
, src_dir
, 0, src_name
, 0);
350 ret
= PTR_ERR_OR_ZERO(new_src
);
354 dirent_copy_target(new_src
, bkey_s_c_to_dirent(old_dst
));
355 new_src
->k
.p
= src_iter
.pos
;
357 new_src
= bch2_trans_kmalloc(trans
, sizeof(struct bkey_i
));
358 ret
= PTR_ERR_OR_ZERO(new_src
);
362 bkey_init(&new_src
->k
);
363 new_src
->k
.p
= src_iter
.pos
;
365 if (bkey_le(dst_pos
, src_iter
.pos
) &&
366 bkey_lt(src_iter
.pos
, dst_iter
.pos
)) {
368 * We have a hash collision for the new dst key,
369 * and new_src - the key we're deleting - is between
370 * new_dst's hashed slot and the slot we're going to be
371 * inserting it into - oops. This will break the hash
372 * table if we don't deal with it:
374 if (mode
== BCH_RENAME
) {
376 * If we're not overwriting, we can just insert
377 * new_dst at the src position:
380 new_src
->k
.p
= src_iter
.pos
;
383 /* If we're overwriting, we can't insert new_dst
384 * at a different slot because it has to
385 * overwrite old_dst - just make sure to use a
386 * whiteout when deleting src:
388 new_src
->k
.type
= KEY_TYPE_hash_whiteout
;
391 /* Check if we need a whiteout to delete src: */
392 ret
= bch2_hash_needs_whiteout(trans
, bch2_dirent_hash_desc
,
393 src_hash
, &src_iter
);
398 new_src
->k
.type
= KEY_TYPE_hash_whiteout
;
402 if (new_dst
->v
.d_type
== DT_SUBVOL
)
403 new_dst
->v
.d_parent_subvol
= cpu_to_le32(dst_dir
.subvol
);
405 if ((mode
== BCH_RENAME_EXCHANGE
) &&
406 new_src
->v
.d_type
== DT_SUBVOL
)
407 new_src
->v
.d_parent_subvol
= cpu_to_le32(src_dir
.subvol
);
409 ret
= bch2_trans_update(trans
, &dst_iter
, &new_dst
->k_i
, 0);
414 * If we're deleting a subvolume we need to really delete the dirent,
415 * not just emit a whiteout in the current snapshot - there can only be
416 * single dirent that points to a given subvolume.
418 * IOW, we don't maintain multiple versions in different snapshots of
419 * dirents that point to subvolumes - dirents that point to subvolumes
420 * are only visible in one particular subvolume so it's not necessary,
421 * and it would be particularly confusing for fsck to have to deal with.
423 delete_src
= bkey_s_c_to_dirent(old_src
).v
->d_type
== DT_SUBVOL
&&
424 new_src
->k
.p
.snapshot
!= old_src
.k
->p
.snapshot
;
426 delete_dst
= old_dst
.k
&&
427 bkey_s_c_to_dirent(old_dst
).v
->d_type
== DT_SUBVOL
&&
428 new_dst
->k
.p
.snapshot
!= old_dst
.k
->p
.snapshot
;
430 if (!delete_src
|| !bkey_deleted(&new_src
->k
)) {
431 ret
= bch2_trans_update(trans
, &src_iter
, &new_src
->k_i
, src_update_flags
);
437 bch2_btree_iter_set_snapshot(&src_iter
, old_src
.k
->p
.snapshot
);
438 ret
= bch2_btree_iter_traverse(&src_iter
) ?:
439 bch2_btree_delete_at(trans
, &src_iter
, BTREE_UPDATE_internal_snapshot_node
);
445 bch2_btree_iter_set_snapshot(&dst_iter
, old_dst
.k
->p
.snapshot
);
446 ret
= bch2_btree_iter_traverse(&dst_iter
) ?:
447 bch2_btree_delete_at(trans
, &dst_iter
, BTREE_UPDATE_internal_snapshot_node
);
452 if (mode
== BCH_RENAME_EXCHANGE
)
453 *src_offset
= new_src
->k
.p
.offset
;
454 *dst_offset
= new_dst
->k
.p
.offset
;
456 bch2_trans_iter_exit(trans
, &src_iter
);
457 bch2_trans_iter_exit(trans
, &dst_iter
);
461 int bch2_dirent_lookup_trans(struct btree_trans
*trans
,
462 struct btree_iter
*iter
,
464 const struct bch_hash_info
*hash_info
,
465 const struct qstr
*name
, subvol_inum
*inum
,
468 struct bkey_s_c k
= bch2_hash_lookup(trans
, iter
, bch2_dirent_hash_desc
,
469 hash_info
, dir
, name
, flags
);
470 int ret
= bkey_err(k
);
474 ret
= bch2_dirent_read_target(trans
, dir
, bkey_s_c_to_dirent(k
), inum
);
479 bch2_trans_iter_exit(trans
, iter
);
483 u64
bch2_dirent_lookup(struct bch_fs
*c
, subvol_inum dir
,
484 const struct bch_hash_info
*hash_info
,
485 const struct qstr
*name
, subvol_inum
*inum
)
487 struct btree_trans
*trans
= bch2_trans_get(c
);
488 struct btree_iter iter
= { NULL
};
490 int ret
= lockrestart_do(trans
,
491 bch2_dirent_lookup_trans(trans
, &iter
, dir
, hash_info
, name
, inum
, 0));
492 bch2_trans_iter_exit(trans
, &iter
);
493 bch2_trans_put(trans
);
497 int bch2_empty_dir_snapshot(struct btree_trans
*trans
, u64 dir
, u32 subvol
, u32 snapshot
)
499 struct btree_iter iter
;
503 for_each_btree_key_upto_norestart(trans
, iter
, BTREE_ID_dirents
,
504 SPOS(dir
, 0, snapshot
),
505 POS(dir
, U64_MAX
), 0, k
, ret
)
506 if (k
.k
->type
== KEY_TYPE_dirent
) {
507 struct bkey_s_c_dirent d
= bkey_s_c_to_dirent(k
);
508 if (d
.v
->d_type
== DT_SUBVOL
&& le32_to_cpu(d
.v
->d_parent_subvol
) != subvol
)
510 ret
= -BCH_ERR_ENOTEMPTY_dir_not_empty
;
513 bch2_trans_iter_exit(trans
, &iter
);
518 int bch2_empty_dir_trans(struct btree_trans
*trans
, subvol_inum dir
)
522 return bch2_subvolume_get_snapshot(trans
, dir
.subvol
, &snapshot
) ?:
523 bch2_empty_dir_snapshot(trans
, dir
.inum
, dir
.subvol
, snapshot
);
526 static int bch2_dir_emit(struct dir_context
*ctx
, struct bkey_s_c_dirent d
, subvol_inum target
)
528 struct qstr name
= bch2_dirent_get_name(d
);
530 * Although not required by the kernel code, updating ctx->pos is needed
531 * for the bcachefs FUSE driver. Without this update, the FUSE
532 * implementation will be stuck in an infinite loop when reading
533 * directories (via the bcachefs_fuse_readdir callback).
534 * In kernel space, ctx->pos is updated by the VFS code.
536 ctx
->pos
= d
.k
->p
.offset
;
537 bool ret
= dir_emit(ctx
, name
.name
,
540 vfs_d_type(d
.v
->d_type
));
542 ctx
->pos
= d
.k
->p
.offset
+ 1;
546 int bch2_readdir(struct bch_fs
*c
, subvol_inum inum
, struct dir_context
*ctx
)
549 bch2_bkey_buf_init(&sk
);
551 int ret
= bch2_trans_run(c
,
552 for_each_btree_key_in_subvolume_upto(trans
, iter
, BTREE_ID_dirents
,
553 POS(inum
.inum
, ctx
->pos
),
554 POS(inum
.inum
, U64_MAX
),
555 inum
.subvol
, 0, k
, ({
556 if (k
.k
->type
!= KEY_TYPE_dirent
)
559 /* dir_emit() can fault and block: */
560 bch2_bkey_buf_reassemble(&sk
, c
, k
);
561 struct bkey_s_c_dirent dirent
= bkey_i_to_s_c_dirent(sk
.k
);
564 int ret2
= bch2_dirent_read_target(trans
, inum
, dirent
, &target
);
568 ret2
?: drop_locks_do(trans
, bch2_dir_emit(ctx
, dirent
, target
));
571 bch2_bkey_buf_exit(&sk
, c
);
573 return ret
< 0 ? ret
: 0;