1 // SPDX-License-Identifier: GPL-2.0
3 #include "btree_iter.h"
6 #include "recovery_passes.h"
8 #include "thread_with_file.h"
10 #define FSCK_ERR_RATELIMIT_NR 10
12 bool bch2_inconsistent_error(struct bch_fs
*c
)
14 set_bit(BCH_FS_error
, &c
->flags
);
16 switch (c
->opts
.errors
) {
17 case BCH_ON_ERROR_continue
:
19 case BCH_ON_ERROR_fix_safe
:
21 if (bch2_fs_emergency_read_only(c
))
22 bch_err(c
, "inconsistency detected - emergency read only at journal seq %llu",
23 journal_cur_seq(&c
->journal
));
25 case BCH_ON_ERROR_panic
:
26 panic(bch2_fmt(c
, "panic after error"));
33 int bch2_topology_error(struct bch_fs
*c
)
35 set_bit(BCH_FS_topology_error
, &c
->flags
);
36 if (!test_bit(BCH_FS_fsck_running
, &c
->flags
)) {
37 bch2_inconsistent_error(c
);
38 return -BCH_ERR_btree_need_topology_repair
;
40 return bch2_run_explicit_recovery_pass(c
, BCH_RECOVERY_PASS_check_topology
) ?:
41 -BCH_ERR_btree_node_read_validate_error
;
45 void bch2_fatal_error(struct bch_fs
*c
)
47 if (bch2_fs_emergency_read_only(c
))
48 bch_err(c
, "fatal error - emergency read only");
51 void bch2_io_error_work(struct work_struct
*work
)
53 struct bch_dev
*ca
= container_of(work
, struct bch_dev
, io_error_work
);
54 struct bch_fs
*c
= ca
->fs
;
57 down_write(&c
->state_lock
);
58 dev
= bch2_dev_state_allowed(c
, ca
, BCH_MEMBER_STATE_ro
,
59 BCH_FORCE_IF_DEGRADED
);
61 ? __bch2_dev_set_state(c
, ca
, BCH_MEMBER_STATE_ro
,
62 BCH_FORCE_IF_DEGRADED
)
63 : bch2_fs_emergency_read_only(c
))
65 "too many IO errors, setting %s RO",
66 dev
? "device" : "filesystem");
67 up_write(&c
->state_lock
);
70 void bch2_io_error(struct bch_dev
*ca
, enum bch_member_error_type type
)
72 atomic64_inc(&ca
->errors
[type
]);
73 //queue_work(system_long_wq, &ca->io_error_work);
83 static enum ask_yn
parse_yn_response(char *buf
)
102 static enum ask_yn
bch2_fsck_ask_yn(struct bch_fs
*c
, struct btree_trans
*trans
)
104 struct stdio_redirect
*stdio
= c
->stdio
;
106 if (c
->stdio_filter
&& c
->stdio_filter
!= current
)
113 bch2_trans_unlock(trans
);
115 unsigned long unlock_long_at
= trans
? jiffies
+ HZ
* 2 : 0;
116 darray_char line
= {};
121 bch2_print(c
, " (y,n, or Y,N for all errors of this type) ");
124 ? max_t(long, unlock_long_at
- jiffies
, 0)
125 : MAX_SCHEDULE_TIMEOUT
;
127 int r
= bch2_stdio_redirect_readline_timeout(stdio
, &line
, t
);
129 bch2_trans_unlock_long(trans
);
139 darray_last(line
) = '\0';
140 } while ((ret
= parse_yn_response(line
.data
)) < 0);
147 #include "tools-util.h"
149 static enum ask_yn
bch2_fsck_ask_yn(struct bch_fs
*c
, struct btree_trans
*trans
)
156 fputs(" (y,n, or Y,N for all errors of this type) ", stdout
);
159 if (getline(&buf
, &buflen
, stdin
) < 0)
160 die("error reading from standard input");
161 } while ((ret
= parse_yn_response(buf
)) < 0);
169 static struct fsck_err_state
*fsck_err_get(struct bch_fs
*c
, const char *fmt
)
171 struct fsck_err_state
*s
;
173 if (!test_bit(BCH_FS_fsck_running
, &c
->flags
))
176 list_for_each_entry(s
, &c
->fsck_error_msgs
, list
)
179 * move it to the head of the list: repeated fsck errors
182 list_move(&s
->list
, &c
->fsck_error_msgs
);
186 s
= kzalloc(sizeof(*s
), GFP_NOFS
);
188 if (!c
->fsck_alloc_msgs_err
)
189 bch_err(c
, "kmalloc err, cannot ratelimit fsck errs");
190 c
->fsck_alloc_msgs_err
= true;
194 INIT_LIST_HEAD(&s
->list
);
196 list_add(&s
->list
, &c
->fsck_error_msgs
);
200 /* s/fix?/fixing/ s/recreate?/recreating/ */
201 static void prt_actioning(struct printbuf
*out
, const char *action
)
203 unsigned len
= strlen(action
);
205 BUG_ON(action
[len
- 1] != '?');
208 if (action
[len
- 1] == 'e')
211 prt_bytes(out
, action
, len
);
215 static const u8 fsck_flags_extra
[] = {
216 #define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags,
221 int __bch2_fsck_err(struct bch_fs
*c
,
222 struct btree_trans
*trans
,
223 enum bch_fsck_flags flags
,
224 enum bch_sb_error_id err
,
225 const char *fmt
, ...)
227 struct fsck_err_state
*s
= NULL
;
229 bool print
= true, suppressing
= false, inconsistent
= false;
230 struct printbuf buf
= PRINTBUF
, *out
= &buf
;
231 int ret
= -BCH_ERR_fsck_ignore
;
232 const char *action_orig
= "fix?", *action
= action_orig
;
236 if (!WARN_ON(err
>= ARRAY_SIZE(fsck_flags_extra
)))
237 flags
|= fsck_flags_extra
[err
];
243 * Ugly: if there's a transaction in the current task it has to be
244 * passed in to unlock if we prompt for user input.
246 * But, plumbing a transaction and transaction restarts into
247 * bkey_validate() is problematic.
250 * - make all bkey errors AUTOFIX, they're simple anyways (we just
252 * - and we don't need to warn if we're not prompting
254 WARN_ON((flags
& FSCK_CAN_FIX
) &&
255 !(flags
& FSCK_AUTOFIX
) &&
257 bch2_current_has_btree_trans(c
));
259 if ((flags
& FSCK_CAN_FIX
) &&
260 test_bit(err
, c
->sb
.errors_silent
))
261 return -BCH_ERR_fsck_fix
;
263 bch2_sb_error_count(c
, err
);
266 prt_vprintf(out
, fmt
, args
);
269 /* Custom fix/continue/recreate/etc.? */
270 if (out
->buf
[out
->pos
- 1] == '?') {
271 const char *p
= strrchr(out
->buf
, ',');
273 out
->pos
= p
- out
->buf
;
274 action
= kstrdup(p
+ 2, GFP_KERNEL
);
282 mutex_lock(&c
->fsck_error_msgs_lock
);
283 s
= fsck_err_get(c
, fmt
);
286 * We may be called multiple times for the same error on
287 * transaction restart - this memoizes instead of asking the user
288 * multiple times for the same error:
290 if (s
->last_msg
&& !strcmp(buf
.buf
, s
->last_msg
)) {
292 mutex_unlock(&c
->fsck_error_msgs_lock
);
297 s
->last_msg
= kstrdup(buf
.buf
, GFP_KERNEL
);
299 mutex_unlock(&c
->fsck_error_msgs_lock
);
304 if (c
->opts
.ratelimit_errors
&&
305 !(flags
& FSCK_NO_RATELIMIT
) &&
306 s
->nr
>= FSCK_ERR_RATELIMIT_NR
) {
307 if (s
->nr
== FSCK_ERR_RATELIMIT_NR
)
316 #ifdef BCACHEFS_LOG_PREFIX
317 if (!strncmp(fmt
, "bcachefs:", 9))
318 prt_printf(out
, bch2_log_msg(c
, ""));
321 if ((flags
& FSCK_CAN_FIX
) &&
322 (flags
& FSCK_AUTOFIX
) &&
323 (c
->opts
.errors
== BCH_ON_ERROR_continue
||
324 c
->opts
.errors
== BCH_ON_ERROR_fix_safe
)) {
326 prt_actioning(out
, action
);
327 ret
= -BCH_ERR_fsck_fix
;
328 } else if (!test_bit(BCH_FS_fsck_running
, &c
->flags
)) {
329 if (c
->opts
.errors
!= BCH_ON_ERROR_continue
||
330 !(flags
& (FSCK_CAN_FIX
|FSCK_CAN_IGNORE
))) {
331 prt_str(out
, ", shutting down");
333 ret
= -BCH_ERR_fsck_errors_not_fixed
;
334 } else if (flags
& FSCK_CAN_FIX
) {
336 prt_actioning(out
, action
);
337 ret
= -BCH_ERR_fsck_fix
;
339 prt_str(out
, ", continuing");
340 ret
= -BCH_ERR_fsck_ignore
;
342 } else if (c
->opts
.fix_errors
== FSCK_FIX_exit
) {
343 prt_str(out
, ", exiting");
344 ret
= -BCH_ERR_fsck_errors_not_fixed
;
345 } else if (flags
& FSCK_CAN_FIX
) {
346 int fix
= s
&& s
->fix
348 : c
->opts
.fix_errors
;
350 if (fix
== FSCK_FIX_ask
) {
352 prt_str(out
, action
);
354 if (bch2_fs_stdio_redirect(c
))
355 bch2_print(c
, "%s", out
->buf
);
357 bch2_print_string_as_lines(KERN_ERR
, out
->buf
);
360 int ask
= bch2_fsck_ask_yn(c
, trans
);
363 ret
= bch2_trans_relock(trans
);
365 mutex_unlock(&c
->fsck_error_msgs_lock
);
370 if (ask
>= YN_ALLNO
&& s
)
371 s
->fix
= ask
== YN_ALLNO
377 : -BCH_ERR_fsck_ignore
;
378 } else if (fix
== FSCK_FIX_yes
||
379 (c
->opts
.nochanges
&&
380 !(flags
& FSCK_CAN_IGNORE
))) {
382 prt_actioning(out
, action
);
383 ret
= -BCH_ERR_fsck_fix
;
385 prt_str(out
, ", not ");
386 prt_actioning(out
, action
);
388 } else if (flags
& FSCK_NEED_FSCK
) {
389 prt_str(out
, " (run fsck to correct)");
391 prt_str(out
, " (repair unimplemented)");
394 if (ret
== -BCH_ERR_fsck_ignore
&&
395 (c
->opts
.fix_errors
== FSCK_FIX_exit
||
396 !(flags
& FSCK_CAN_IGNORE
)))
397 ret
= -BCH_ERR_fsck_errors_not_fixed
;
400 test_bit(BCH_FS_fsck_running
, &c
->flags
) &&
401 (ret
!= -BCH_ERR_fsck_fix
&&
402 ret
!= -BCH_ERR_fsck_ignore
);
408 if (bch2_fs_stdio_redirect(c
))
409 bch2_print(c
, "%s\n", out
->buf
);
411 bch2_print_string_as_lines(KERN_ERR
, out
->buf
);
415 bch_err(c
, "Unable to continue, halting");
416 else if (suppressing
)
417 bch_err(c
, "Ratelimiting new instances of previous error");
422 mutex_unlock(&c
->fsck_error_msgs_lock
);
425 bch2_inconsistent_error(c
);
427 if (ret
== -BCH_ERR_fsck_fix
) {
428 set_bit(BCH_FS_errors_fixed
, &c
->flags
);
430 set_bit(BCH_FS_errors_not_fixed
, &c
->flags
);
431 set_bit(BCH_FS_error
, &c
->flags
);
434 if (action
!= action_orig
)
440 int __bch2_bkey_fsck_err(struct bch_fs
*c
,
442 enum bch_validate_flags validate_flags
,
443 enum bch_sb_error_id err
,
444 const char *fmt
, ...)
446 if (validate_flags
& BCH_VALIDATE_silent
)
447 return -BCH_ERR_fsck_delete_bkey
;
449 unsigned fsck_flags
= 0;
450 if (!(validate_flags
& (BCH_VALIDATE_write
|BCH_VALIDATE_commit
)))
451 fsck_flags
|= FSCK_AUTOFIX
|FSCK_CAN_FIX
;
453 struct printbuf buf
= PRINTBUF
;
456 prt_str(&buf
, "invalid bkey ");
457 bch2_bkey_val_to_text(&buf
, c
, k
);
458 prt_str(&buf
, "\n ");
460 prt_vprintf(&buf
, fmt
, args
);
462 prt_str(&buf
, ": delete?");
464 int ret
= __bch2_fsck_err(c
, NULL
, fsck_flags
, err
, "%s", buf
.buf
);
469 void bch2_flush_fsck_errs(struct bch_fs
*c
)
471 struct fsck_err_state
*s
, *n
;
473 mutex_lock(&c
->fsck_error_msgs_lock
);
475 list_for_each_entry_safe(s
, n
, &c
->fsck_error_msgs
, list
) {
476 if (s
->ratelimited
&& s
->last_msg
)
477 bch_err(c
, "Saw %llu errors like:\n %s", s
->nr
, s
->last_msg
);
484 mutex_unlock(&c
->fsck_error_msgs_lock
);