1 // SPDX-License-Identifier: GPL-2.0
4 #include "btree_cache.h"
5 #include "disk_groups.h"
9 #include "sb-members.h"
12 void bch2_dev_missing(struct bch_fs
*c
, unsigned dev
)
14 if (dev
!= BCH_SB_MEMBER_INVALID
)
15 bch2_fs_inconsistent(c
, "pointer to nonexistent device %u", dev
);
18 void bch2_dev_bucket_missing(struct bch_fs
*c
, struct bpos bucket
)
20 bch2_fs_inconsistent(c
, "pointer to nonexistent bucket %llu:%llu", bucket
.inode
, bucket
.offset
);
23 #define x(t, n, ...) [n] = #t,
24 static const char * const bch2_iops_measurements
[] = {
25 BCH_IOPS_MEASUREMENTS()
29 char * const bch2_member_error_strs
[] = {
30 BCH_MEMBER_ERROR_TYPES()
35 /* Code for bch_sb_field_members_v1: */
37 struct bch_member
*bch2_members_v2_get_mut(struct bch_sb
*sb
, int i
)
39 return __bch2_members_v2_get_mut(bch2_sb_field_get(sb
, members_v2
), i
);
42 static struct bch_member
members_v2_get(struct bch_sb_field_members_v2
*mi
, int i
)
44 struct bch_member ret
, *p
= __bch2_members_v2_get_mut(mi
, i
);
45 memset(&ret
, 0, sizeof(ret
));
46 memcpy(&ret
, p
, min_t(size_t, le16_to_cpu(mi
->member_bytes
), sizeof(ret
)));
50 static struct bch_member
*members_v1_get_mut(struct bch_sb_field_members_v1
*mi
, int i
)
52 return (void *) mi
->_members
+ (i
* BCH_MEMBER_V1_BYTES
);
55 static struct bch_member
members_v1_get(struct bch_sb_field_members_v1
*mi
, int i
)
57 struct bch_member ret
, *p
= members_v1_get_mut(mi
, i
);
58 memset(&ret
, 0, sizeof(ret
));
59 memcpy(&ret
, p
, min_t(size_t, BCH_MEMBER_V1_BYTES
, sizeof(ret
)));
63 struct bch_member
bch2_sb_member_get(struct bch_sb
*sb
, int i
)
65 struct bch_sb_field_members_v2
*mi2
= bch2_sb_field_get(sb
, members_v2
);
67 return members_v2_get(mi2
, i
);
68 struct bch_sb_field_members_v1
*mi1
= bch2_sb_field_get(sb
, members_v1
);
69 return members_v1_get(mi1
, i
);
72 static int sb_members_v2_resize_entries(struct bch_fs
*c
)
74 struct bch_sb_field_members_v2
*mi
= bch2_sb_field_get(c
->disk_sb
.sb
, members_v2
);
76 if (le16_to_cpu(mi
->member_bytes
) < sizeof(struct bch_member
)) {
77 unsigned u64s
= DIV_ROUND_UP((sizeof(*mi
) + sizeof(mi
->_members
[0]) *
78 c
->disk_sb
.sb
->nr_devices
), 8);
80 mi
= bch2_sb_field_resize(&c
->disk_sb
, members_v2
, u64s
);
82 return -BCH_ERR_ENOSPC_sb_members_v2
;
84 for (int i
= c
->disk_sb
.sb
->nr_devices
- 1; i
>= 0; --i
) {
85 void *dst
= (void *) mi
->_members
+ (i
* sizeof(struct bch_member
));
86 memmove(dst
, __bch2_members_v2_get_mut(mi
, i
), le16_to_cpu(mi
->member_bytes
));
87 memset(dst
+ le16_to_cpu(mi
->member_bytes
),
88 0, (sizeof(struct bch_member
) - le16_to_cpu(mi
->member_bytes
)));
90 mi
->member_bytes
= cpu_to_le16(sizeof(struct bch_member
));
95 int bch2_sb_members_v2_init(struct bch_fs
*c
)
97 struct bch_sb_field_members_v1
*mi1
;
98 struct bch_sb_field_members_v2
*mi2
;
100 if (!bch2_sb_field_get(c
->disk_sb
.sb
, members_v2
)) {
101 mi2
= bch2_sb_field_resize(&c
->disk_sb
, members_v2
,
102 DIV_ROUND_UP(sizeof(*mi2
) +
103 sizeof(struct bch_member
) * c
->sb
.nr_devices
,
105 mi1
= bch2_sb_field_get(c
->disk_sb
.sb
, members_v1
);
106 memcpy(&mi2
->_members
[0], &mi1
->_members
[0],
107 BCH_MEMBER_V1_BYTES
* c
->sb
.nr_devices
);
108 memset(&mi2
->pad
[0], 0, sizeof(mi2
->pad
));
109 mi2
->member_bytes
= cpu_to_le16(BCH_MEMBER_V1_BYTES
);
112 return sb_members_v2_resize_entries(c
);
115 int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle
*disk_sb
)
117 struct bch_sb_field_members_v1
*mi1
;
118 struct bch_sb_field_members_v2
*mi2
;
120 mi1
= bch2_sb_field_resize(disk_sb
, members_v1
,
121 DIV_ROUND_UP(sizeof(*mi1
) + BCH_MEMBER_V1_BYTES
*
122 disk_sb
->sb
->nr_devices
, sizeof(u64
)));
124 return -BCH_ERR_ENOSPC_sb_members
;
126 mi2
= bch2_sb_field_get(disk_sb
->sb
, members_v2
);
128 for (unsigned i
= 0; i
< disk_sb
->sb
->nr_devices
; i
++)
129 memcpy(members_v1_get_mut(mi1
, i
), __bch2_members_v2_get_mut(mi2
, i
), BCH_MEMBER_V1_BYTES
);
134 static int validate_member(struct printbuf
*err
,
139 if (le64_to_cpu(m
.nbuckets
) > BCH_MEMBER_NBUCKETS_MAX
) {
140 prt_printf(err
, "device %u: too many buckets (got %llu, max %u)",
141 i
, le64_to_cpu(m
.nbuckets
), BCH_MEMBER_NBUCKETS_MAX
);
142 return -BCH_ERR_invalid_sb_members
;
145 if (le64_to_cpu(m
.nbuckets
) -
146 le16_to_cpu(m
.first_bucket
) < BCH_MIN_NR_NBUCKETS
) {
147 prt_printf(err
, "device %u: not enough buckets (got %llu, max %u)",
148 i
, le64_to_cpu(m
.nbuckets
), BCH_MIN_NR_NBUCKETS
);
149 return -BCH_ERR_invalid_sb_members
;
152 if (le16_to_cpu(m
.bucket_size
) <
153 le16_to_cpu(sb
->block_size
)) {
154 prt_printf(err
, "device %u: bucket size %u smaller than block size %u",
155 i
, le16_to_cpu(m
.bucket_size
), le16_to_cpu(sb
->block_size
));
156 return -BCH_ERR_invalid_sb_members
;
159 if (le16_to_cpu(m
.bucket_size
) <
160 BCH_SB_BTREE_NODE_SIZE(sb
)) {
161 prt_printf(err
, "device %u: bucket size %u smaller than btree node size %llu",
162 i
, le16_to_cpu(m
.bucket_size
), BCH_SB_BTREE_NODE_SIZE(sb
));
163 return -BCH_ERR_invalid_sb_members
;
166 if (m
.btree_bitmap_shift
>= BCH_MI_BTREE_BITMAP_SHIFT_MAX
) {
167 prt_printf(err
, "device %u: invalid btree_bitmap_shift %u", i
, m
.btree_bitmap_shift
);
168 return -BCH_ERR_invalid_sb_members
;
174 static void member_to_text(struct printbuf
*out
,
176 struct bch_sb_field_disk_groups
*gi
,
180 unsigned data_have
= bch2_sb_dev_has_data(sb
, i
);
181 u64 bucket_size
= le16_to_cpu(m
.bucket_size
);
182 u64 device_size
= le64_to_cpu(m
.nbuckets
) * bucket_size
;
184 if (!bch2_member_alive(&m
))
187 prt_printf(out
, "Device:\t%u\n", i
);
189 printbuf_indent_add(out
, 2);
191 prt_printf(out
, "Label:\t");
192 if (BCH_MEMBER_GROUP(&m
)) {
193 unsigned idx
= BCH_MEMBER_GROUP(&m
) - 1;
195 if (idx
< disk_groups_nr(gi
))
196 prt_printf(out
, "%s (%u)",
197 gi
->entries
[idx
].label
, idx
);
199 prt_printf(out
, "(bad disk labels section)");
201 prt_printf(out
, "(none)");
205 prt_printf(out
, "UUID:\t");
206 pr_uuid(out
, m
.uuid
.b
);
209 prt_printf(out
, "Size:\t");
210 prt_units_u64(out
, device_size
<< 9);
213 for (unsigned i
= 0; i
< BCH_MEMBER_ERROR_NR
; i
++)
214 prt_printf(out
, "%s errors:\t%llu\n", bch2_member_error_strs
[i
], le64_to_cpu(m
.errors
[i
]));
216 for (unsigned i
= 0; i
< BCH_IOPS_NR
; i
++)
217 prt_printf(out
, "%s iops:\t%u\n", bch2_iops_measurements
[i
], le32_to_cpu(m
.iops
[i
]));
219 prt_printf(out
, "Bucket size:\t");
220 prt_units_u64(out
, bucket_size
<< 9);
223 prt_printf(out
, "First bucket:\t%u\n", le16_to_cpu(m
.first_bucket
));
224 prt_printf(out
, "Buckets:\t%llu\n", le64_to_cpu(m
.nbuckets
));
226 prt_printf(out
, "Last mount:\t");
228 bch2_prt_datetime(out
, le64_to_cpu(m
.last_mount
));
230 prt_printf(out
, "(never)");
233 prt_printf(out
, "Last superblock write:\t%llu\n", le64_to_cpu(m
.seq
));
235 prt_printf(out
, "State:\t%s\n",
236 BCH_MEMBER_STATE(&m
) < BCH_MEMBER_STATE_NR
237 ? bch2_member_states
[BCH_MEMBER_STATE(&m
)]
240 prt_printf(out
, "Data allowed:\t");
241 if (BCH_MEMBER_DATA_ALLOWED(&m
))
242 prt_bitflags(out
, __bch2_data_types
, BCH_MEMBER_DATA_ALLOWED(&m
));
244 prt_printf(out
, "(none)");
247 prt_printf(out
, "Has data:\t");
249 prt_bitflags(out
, __bch2_data_types
, data_have
);
251 prt_printf(out
, "(none)");
254 prt_printf(out
, "Btree allocated bitmap blocksize:\t");
255 if (m
.btree_bitmap_shift
< 64)
256 prt_units_u64(out
, 1ULL << m
.btree_bitmap_shift
);
258 prt_printf(out
, "(invalid shift %u)", m
.btree_bitmap_shift
);
261 prt_printf(out
, "Btree allocated bitmap:\t");
262 bch2_prt_u64_base2_nbits(out
, le64_to_cpu(m
.btree_allocated_bitmap
), 64);
265 prt_printf(out
, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(&m
) ? BCH_MEMBER_DURABILITY(&m
) - 1 : 1);
267 prt_printf(out
, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m
));
268 prt_printf(out
, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m
));
270 printbuf_indent_sub(out
, 2);
273 static int bch2_sb_members_v1_validate(struct bch_sb
*sb
, struct bch_sb_field
*f
,
274 enum bch_validate_flags flags
, struct printbuf
*err
)
276 struct bch_sb_field_members_v1
*mi
= field_to_type(f
, members_v1
);
279 if ((void *) members_v1_get_mut(mi
, sb
->nr_devices
) > vstruct_end(&mi
->field
)) {
280 prt_printf(err
, "too many devices for section size");
281 return -BCH_ERR_invalid_sb_members
;
284 for (i
= 0; i
< sb
->nr_devices
; i
++) {
285 struct bch_member m
= members_v1_get(mi
, i
);
287 int ret
= validate_member(err
, m
, sb
, i
);
295 static void bch2_sb_members_v1_to_text(struct printbuf
*out
, struct bch_sb
*sb
,
296 struct bch_sb_field
*f
)
298 struct bch_sb_field_members_v1
*mi
= field_to_type(f
, members_v1
);
299 struct bch_sb_field_disk_groups
*gi
= bch2_sb_field_get(sb
, disk_groups
);
302 for (i
= 0; i
< sb
->nr_devices
; i
++)
303 member_to_text(out
, members_v1_get(mi
, i
), gi
, sb
, i
);
306 const struct bch_sb_field_ops bch_sb_field_ops_members_v1
= {
307 .validate
= bch2_sb_members_v1_validate
,
308 .to_text
= bch2_sb_members_v1_to_text
,
311 static void bch2_sb_members_v2_to_text(struct printbuf
*out
, struct bch_sb
*sb
,
312 struct bch_sb_field
*f
)
314 struct bch_sb_field_members_v2
*mi
= field_to_type(f
, members_v2
);
315 struct bch_sb_field_disk_groups
*gi
= bch2_sb_field_get(sb
, disk_groups
);
318 for (i
= 0; i
< sb
->nr_devices
; i
++)
319 member_to_text(out
, members_v2_get(mi
, i
), gi
, sb
, i
);
322 static int bch2_sb_members_v2_validate(struct bch_sb
*sb
, struct bch_sb_field
*f
,
323 enum bch_validate_flags flags
, struct printbuf
*err
)
325 struct bch_sb_field_members_v2
*mi
= field_to_type(f
, members_v2
);
326 size_t mi_bytes
= (void *) __bch2_members_v2_get_mut(mi
, sb
->nr_devices
) -
329 if (mi_bytes
> vstruct_bytes(&mi
->field
)) {
330 prt_printf(err
, "section too small (%zu > %zu)",
331 mi_bytes
, vstruct_bytes(&mi
->field
));
332 return -BCH_ERR_invalid_sb_members
;
335 for (unsigned i
= 0; i
< sb
->nr_devices
; i
++) {
336 int ret
= validate_member(err
, members_v2_get(mi
, i
), sb
, i
);
344 const struct bch_sb_field_ops bch_sb_field_ops_members_v2
= {
345 .validate
= bch2_sb_members_v2_validate
,
346 .to_text
= bch2_sb_members_v2_to_text
,
349 void bch2_sb_members_from_cpu(struct bch_fs
*c
)
351 struct bch_sb_field_members_v2
*mi
= bch2_sb_field_get(c
->disk_sb
.sb
, members_v2
);
354 for_each_member_device_rcu(c
, ca
, NULL
) {
355 struct bch_member
*m
= __bch2_members_v2_get_mut(mi
, ca
->dev_idx
);
357 for (unsigned e
= 0; e
< BCH_MEMBER_ERROR_NR
; e
++)
358 m
->errors
[e
] = cpu_to_le64(atomic64_read(&ca
->errors
[e
]));
363 void bch2_dev_io_errors_to_text(struct printbuf
*out
, struct bch_dev
*ca
)
365 struct bch_fs
*c
= ca
->fs
;
368 mutex_lock(&ca
->fs
->sb_lock
);
369 m
= bch2_sb_member_get(c
->disk_sb
.sb
, ca
->dev_idx
);
370 mutex_unlock(&ca
->fs
->sb_lock
);
372 printbuf_tabstop_push(out
, 12);
374 prt_str(out
, "IO errors since filesystem creation");
377 printbuf_indent_add(out
, 2);
378 for (unsigned i
= 0; i
< BCH_MEMBER_ERROR_NR
; i
++)
379 prt_printf(out
, "%s:\t%llu\n", bch2_member_error_strs
[i
], atomic64_read(&ca
->errors
[i
]));
380 printbuf_indent_sub(out
, 2);
382 prt_str(out
, "IO errors since ");
383 bch2_pr_time_units(out
, (ktime_get_real_seconds() - le64_to_cpu(m
.errors_reset_time
)) * NSEC_PER_SEC
);
384 prt_str(out
, " ago");
387 printbuf_indent_add(out
, 2);
388 for (unsigned i
= 0; i
< BCH_MEMBER_ERROR_NR
; i
++)
389 prt_printf(out
, "%s:\t%llu\n", bch2_member_error_strs
[i
],
390 atomic64_read(&ca
->errors
[i
]) - le64_to_cpu(m
.errors_at_reset
[i
]));
391 printbuf_indent_sub(out
, 2);
394 void bch2_dev_errors_reset(struct bch_dev
*ca
)
396 struct bch_fs
*c
= ca
->fs
;
397 struct bch_member
*m
;
399 mutex_lock(&c
->sb_lock
);
400 m
= bch2_members_v2_get_mut(c
->disk_sb
.sb
, ca
->dev_idx
);
401 for (unsigned i
= 0; i
< ARRAY_SIZE(m
->errors_at_reset
); i
++)
402 m
->errors_at_reset
[i
] = cpu_to_le64(atomic64_read(&ca
->errors
[i
]));
403 m
->errors_reset_time
= cpu_to_le64(ktime_get_real_seconds());
406 mutex_unlock(&c
->sb_lock
);
410 * Per member "range has btree nodes" bitmap:
412 * This is so that if we ever have to run the btree node scan to repair we don't
413 * have to scan full devices:
416 bool bch2_dev_btree_bitmap_marked(struct bch_fs
*c
, struct bkey_s_c k
)
420 bkey_for_each_ptr(bch2_bkey_ptrs_c(k
), ptr
) {
421 struct bch_dev
*ca
= bch2_dev_rcu(c
, ptr
->dev
);
425 if (!bch2_dev_btree_bitmap_marked_sectors(ca
, ptr
->offset
, btree_sectors(c
))) {
434 static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2
*mi
, unsigned dev
,
435 u64 start
, unsigned sectors
)
437 struct bch_member
*m
= __bch2_members_v2_get_mut(mi
, dev
);
438 u64 bitmap
= le64_to_cpu(m
->btree_allocated_bitmap
);
440 u64 end
= start
+ sectors
;
442 int resize
= ilog2(roundup_pow_of_two(end
)) - (m
->btree_bitmap_shift
+ 6);
446 for (unsigned i
= 0; i
< 64; i
++)
447 if (bitmap
& BIT_ULL(i
))
448 new_bitmap
|= BIT_ULL(i
>> resize
);
450 m
->btree_bitmap_shift
+= resize
;
453 BUG_ON(m
->btree_bitmap_shift
>= BCH_MI_BTREE_BITMAP_SHIFT_MAX
);
454 BUG_ON(end
> 64ULL << m
->btree_bitmap_shift
);
456 for (unsigned bit
= start
>> m
->btree_bitmap_shift
;
457 (u64
) bit
<< m
->btree_bitmap_shift
< end
;
459 bitmap
|= BIT_ULL(bit
);
461 m
->btree_allocated_bitmap
= cpu_to_le64(bitmap
);
464 void bch2_dev_btree_bitmap_mark(struct bch_fs
*c
, struct bkey_s_c k
)
466 lockdep_assert_held(&c
->sb_lock
);
468 struct bch_sb_field_members_v2
*mi
= bch2_sb_field_get(c
->disk_sb
.sb
, members_v2
);
469 bkey_for_each_ptr(bch2_bkey_ptrs_c(k
), ptr
) {
470 if (!bch2_member_exists(c
->disk_sb
.sb
, ptr
->dev
))
473 __bch2_dev_btree_bitmap_mark(mi
, ptr
->dev
, ptr
->offset
, btree_sectors(c
));
477 unsigned bch2_sb_nr_devices(const struct bch_sb
*sb
)
481 for (unsigned i
= 0; i
< sb
->nr_devices
; i
++)
482 nr
+= bch2_member_exists((struct bch_sb
*) sb
, i
);
486 int bch2_sb_member_alloc(struct bch_fs
*c
)
488 unsigned dev_idx
= c
->sb
.nr_devices
;
489 struct bch_sb_field_members_v2
*mi
;
493 u64 best_last_mount
= 0;
495 if (dev_idx
< BCH_SB_MEMBERS_MAX
)
498 for (dev_idx
= 0; dev_idx
< BCH_SB_MEMBERS_MAX
; dev_idx
++) {
499 /* eventually BCH_SB_MEMBERS_MAX will be raised */
500 if (dev_idx
== BCH_SB_MEMBER_INVALID
)
503 struct bch_member m
= bch2_sb_member_get(c
->disk_sb
.sb
, dev_idx
);
504 if (bch2_member_alive(&m
))
507 u64 last_mount
= le64_to_cpu(m
.last_mount
);
508 if (best
< 0 || last_mount
< best_last_mount
) {
510 best_last_mount
= last_mount
;
518 return -BCH_ERR_ENOSPC_sb_members
;
520 nr_devices
= max_t(unsigned, dev_idx
+ 1, c
->sb
.nr_devices
);
522 mi
= bch2_sb_field_get(c
->disk_sb
.sb
, members_v2
);
523 u64s
= DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2
) +
524 le16_to_cpu(mi
->member_bytes
) * nr_devices
, sizeof(u64
));
526 mi
= bch2_sb_field_resize(&c
->disk_sb
, members_v2
, u64s
);
528 return -BCH_ERR_ENOSPC_sb_members
;
530 c
->disk_sb
.sb
->nr_devices
= nr_devices
;