1 // SPDX-License-Identifier: GPL-2.0
6 #include "journal_seq_blacklist.h"
10 * journal_seq_blacklist machinery:
12 * To guarantee order of btree updates after a crash, we need to detect when a
13 * btree node entry (bset) is newer than the newest journal entry that was
14 * successfully written, and ignore it - effectively ignoring any btree updates
15 * that didn't make it into the journal.
17 * If we didn't do this, we might have two btree nodes, a and b, both with
18 * updates that weren't written to the journal yet: if b was updated after a,
19 * but b was flushed and not a - oops; on recovery we'll find that the updates
20 * to b happened, but not the updates to a that happened before it.
22 * Ignoring bsets that are newer than the newest journal entry is always safe,
23 * because everything they contain will also have been journalled - and must
24 * still be present in the journal on disk until a journal entry has been
25 * written _after_ that bset was written.
27 * To accomplish this, bsets record the newest journal sequence number they
28 * contain updates for; then, on startup, the btree code queries the journal
29 * code to ask "Is this sequence number newer than the newest journal entry? If
32 * When this happens, we must blacklist that journal sequence number: the
33 * journal must not write any entries with that sequence number, and it must
34 * record that it was blacklisted so that a) on recovery we don't think we have
35 * missing journal entries and b) so that the btree code continues to ignore
36 * that bset, until that btree node is rewritten.
39 static unsigned sb_blacklist_u64s(unsigned nr
)
41 struct bch_sb_field_journal_seq_blacklist
*bl
;
43 return (sizeof(*bl
) + sizeof(bl
->start
[0]) * nr
) / sizeof(u64
);
46 int bch2_journal_seq_blacklist_add(struct bch_fs
*c
, u64 start
, u64 end
)
48 struct bch_sb_field_journal_seq_blacklist
*bl
;
52 mutex_lock(&c
->sb_lock
);
53 bl
= bch2_sb_field_get(c
->disk_sb
.sb
, journal_seq_blacklist
);
54 nr
= blacklist_nr_entries(bl
);
57 struct journal_seq_blacklist_entry
*e
=
60 if (end
< le64_to_cpu(e
->start
))
63 if (start
> le64_to_cpu(e
->end
)) {
69 * Entry is contiguous or overlapping with new entry: merge it
70 * with new entry, and delete:
73 start
= min(start
, le64_to_cpu(e
->start
));
74 end
= max(end
, le64_to_cpu(e
->end
));
75 array_remove_item(bl
->start
, nr
, i
);
78 bl
= bch2_sb_field_resize(&c
->disk_sb
, journal_seq_blacklist
,
79 sb_blacklist_u64s(nr
+ 1));
81 ret
= -BCH_ERR_ENOSPC_sb_journal_seq_blacklist
;
85 array_insert_item(bl
->start
, nr
, i
, ((struct journal_seq_blacklist_entry
) {
86 .start
= cpu_to_le64(start
),
87 .end
= cpu_to_le64(end
),
89 c
->disk_sb
.sb
->features
[0] |= cpu_to_le64(1ULL << BCH_FEATURE_journal_seq_blacklist_v3
);
91 ret
= bch2_write_super(c
);
93 mutex_unlock(&c
->sb_lock
);
95 return ret
?: bch2_blacklist_table_initialize(c
);
98 static int journal_seq_blacklist_table_cmp(const void *_l
, const void *_r
)
100 const struct journal_seq_blacklist_table_entry
*l
= _l
;
101 const struct journal_seq_blacklist_table_entry
*r
= _r
;
103 return cmp_int(l
->start
, r
->start
);
106 bool bch2_journal_seq_is_blacklisted(struct bch_fs
*c
, u64 seq
,
109 struct journal_seq_blacklist_table
*t
= c
->journal_seq_blacklist_table
;
110 struct journal_seq_blacklist_table_entry search
= { .start
= seq
};
116 idx
= eytzinger0_find_le(t
->entries
, t
->nr
,
117 sizeof(t
->entries
[0]),
118 journal_seq_blacklist_table_cmp
,
123 BUG_ON(t
->entries
[idx
].start
> seq
);
125 if (seq
>= t
->entries
[idx
].end
)
129 t
->entries
[idx
].dirty
= true;
133 int bch2_blacklist_table_initialize(struct bch_fs
*c
)
135 struct bch_sb_field_journal_seq_blacklist
*bl
=
136 bch2_sb_field_get(c
->disk_sb
.sb
, journal_seq_blacklist
);
137 struct journal_seq_blacklist_table
*t
;
138 unsigned i
, nr
= blacklist_nr_entries(bl
);
143 t
= kzalloc(struct_size(t
, entries
, nr
), GFP_KERNEL
);
145 return -BCH_ERR_ENOMEM_blacklist_table_init
;
149 for (i
= 0; i
< nr
; i
++) {
150 t
->entries
[i
].start
= le64_to_cpu(bl
->start
[i
].start
);
151 t
->entries
[i
].end
= le64_to_cpu(bl
->start
[i
].end
);
154 eytzinger0_sort(t
->entries
,
156 sizeof(t
->entries
[0]),
157 journal_seq_blacklist_table_cmp
,
160 kfree(c
->journal_seq_blacklist_table
);
161 c
->journal_seq_blacklist_table
= t
;
165 static int bch2_sb_journal_seq_blacklist_validate(struct bch_sb
*sb
, struct bch_sb_field
*f
,
166 enum bch_validate_flags flags
, struct printbuf
*err
)
168 struct bch_sb_field_journal_seq_blacklist
*bl
=
169 field_to_type(f
, journal_seq_blacklist
);
170 unsigned i
, nr
= blacklist_nr_entries(bl
);
172 for (i
= 0; i
< nr
; i
++) {
173 struct journal_seq_blacklist_entry
*e
= bl
->start
+ i
;
175 if (le64_to_cpu(e
->start
) >=
176 le64_to_cpu(e
->end
)) {
177 prt_printf(err
, "entry %u start >= end (%llu >= %llu)",
178 i
, le64_to_cpu(e
->start
), le64_to_cpu(e
->end
));
179 return -BCH_ERR_invalid_sb_journal_seq_blacklist
;
183 le64_to_cpu(e
[0].end
) >
184 le64_to_cpu(e
[1].start
)) {
185 prt_printf(err
, "entry %u out of order with next entry (%llu > %llu)",
186 i
+ 1, le64_to_cpu(e
[0].end
), le64_to_cpu(e
[1].start
));
187 return -BCH_ERR_invalid_sb_journal_seq_blacklist
;
194 static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf
*out
,
196 struct bch_sb_field
*f
)
198 struct bch_sb_field_journal_seq_blacklist
*bl
=
199 field_to_type(f
, journal_seq_blacklist
);
200 struct journal_seq_blacklist_entry
*i
;
201 unsigned nr
= blacklist_nr_entries(bl
);
203 for (i
= bl
->start
; i
< bl
->start
+ nr
; i
++) {
205 prt_printf(out
, " ");
207 prt_printf(out
, "%llu-%llu",
208 le64_to_cpu(i
->start
),
209 le64_to_cpu(i
->end
));
214 const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist
= {
215 .validate
= bch2_sb_journal_seq_blacklist_validate
,
216 .to_text
= bch2_sb_journal_seq_blacklist_to_text
219 bool bch2_blacklist_entries_gc(struct bch_fs
*c
)
221 struct journal_seq_blacklist_entry
*src
, *dst
;
223 struct bch_sb_field_journal_seq_blacklist
*bl
=
224 bch2_sb_field_get(c
->disk_sb
.sb
, journal_seq_blacklist
);
228 unsigned nr
= blacklist_nr_entries(bl
);
231 struct journal_seq_blacklist_table
*t
= c
->journal_seq_blacklist_table
;
235 for (src
= bl
->start
, i
= t
->nr
== 0 ? 0 : eytzinger0_first(t
->nr
);
236 src
< bl
->start
+ nr
;
237 src
++, i
= eytzinger0_next(i
, nr
)) {
238 BUG_ON(t
->entries
[i
].start
!= le64_to_cpu(src
->start
));
239 BUG_ON(t
->entries
[i
].end
!= le64_to_cpu(src
->end
));
241 if (t
->entries
[i
].dirty
|| t
->entries
[i
].end
>= c
->journal
.oldest_seq_found_ondisk
)
245 unsigned new_nr
= dst
- bl
->start
;
249 bch_verbose(c
, "nr blacklist entries was %u, now %u", nr
, new_nr
);
251 bl
= bch2_sb_field_resize(&c
->disk_sb
, journal_seq_blacklist
,
252 new_nr
? sb_blacklist_u64s(new_nr
) : 0);
253 BUG_ON(new_nr
&& !bl
);