2 * Copyright (C) STRATO AG 2011. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 * This module can be used to catch cases when the btrfs kernel
21 * code executes write requests to the disk that bring the file
22 * system in an inconsistent state. In such a state, a power-loss
23 * or kernel panic event would cause that the data on disk is
24 * lost or at least damaged.
26 * Code is added that examines all block write requests during
27 * runtime (including writes of the super block). Three rules
28 * are verified and an error is printed on violation of the
30 * 1. It is not allowed to write a disk block which is
31 * currently referenced by the super block (either directly
33 * 2. When a super block is written, it is verified that all
34 * referenced (directly or indirectly) blocks fulfill the
35 * following requirements:
36 * 2a. All referenced blocks have either been present when
37 * the file system was mounted, (i.e., they have been
38 * referenced by the super block) or they have been
39 * written since then and the write completion callback
40 * was called and no write error was indicated and a
41 * FLUSH request to the device where these blocks are
42 * located was received and completed.
43 * 2b. All referenced blocks need to have a generation
44 * number which is equal to the parent's number.
46 * One issue that was found using this module was that the log
47 * tree on disk became temporarily corrupted because disk blocks
48 * that had been in use for the log tree had been freed and
49 * reused too early, while being referenced by the written super
52 * The search term in the kernel log that can be used to filter
53 * on the existence of detected integrity issues is
56 * The integrity check is enabled via mount options. These
57 * mount options are only supported if the integrity check
58 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
60 * Example #1, apply integrity checks to all metadata:
61 * mount /dev/sdb1 /mnt -o check_int
63 * Example #2, apply integrity checks to all metadata and
65 * mount /dev/sdb1 /mnt -o check_int_data
67 * Example #3, apply integrity checks to all metadata and dump
68 * the tree that the super block references to kernel messages
69 * each time after a super block was written:
70 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
72 * If the integrity check tool is included and activated in
73 * the mount options, plenty of kernel memory is used, and
74 * plenty of additional CPU cycles are spent. Enabling this
75 * functionality is not intended for normal use. In most
76 * cases, unless you are a btrfs developer who needs to verify
77 * the integrity of (super)-block write requests, do not
78 * enable the config option BTRFS_FS_CHECK_INTEGRITY to
79 * include and compile the integrity check tool.
82 #include <linux/sched.h>
83 #include <linux/slab.h>
84 #include <linux/buffer_head.h>
85 #include <linux/mutex.h>
86 #include <linux/crc32c.h>
87 #include <linux/genhd.h>
88 #include <linux/blkdev.h>
91 #include "transaction.h"
92 #include "extent_io.h"
94 #include "print-tree.h"
96 #include "check-integrity.h"
97 #include "rcu-string.h"
99 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
100 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
101 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
102 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
103 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
104 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
105 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
106 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters,
107 * excluding " [...]" */
108 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
111 * The definition of the bitmask fields for the print_mask.
112 * They are specified with the mount option check_integrity_print_mask.
114 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001
115 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002
116 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004
117 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008
118 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010
119 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020
120 #define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040
121 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080
122 #define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100
123 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200
124 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
125 #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
126 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
128 struct btrfsic_dev_state
;
129 struct btrfsic_state
;
131 struct btrfsic_block
{
132 u32 magic_num
; /* only used for debug purposes */
133 unsigned int is_metadata
:1; /* if it is meta-data, not data-data */
134 unsigned int is_superblock
:1; /* if it is one of the superblocks */
135 unsigned int is_iodone
:1; /* if is done by lower subsystem */
136 unsigned int iodone_w_error
:1; /* error was indicated to endio */
137 unsigned int never_written
:1; /* block was added because it was
138 * referenced, not because it was
140 unsigned int mirror_num
; /* large enough to hold
141 * BTRFS_SUPER_MIRROR_MAX */
142 struct btrfsic_dev_state
*dev_state
;
143 u64 dev_bytenr
; /* key, physical byte num on disk */
144 u64 logical_bytenr
; /* logical byte num on disk */
146 struct btrfs_disk_key disk_key
; /* extra info to print in case of
147 * issues, will not always be correct */
148 struct list_head collision_resolving_node
; /* list node */
149 struct list_head all_blocks_node
; /* list node */
151 /* the following two lists contain block_link items */
152 struct list_head ref_to_list
; /* list */
153 struct list_head ref_from_list
; /* list */
154 struct btrfsic_block
*next_in_same_bio
;
155 void *orig_bio_bh_private
;
159 } orig_bio_bh_end_io
;
160 int submit_bio_bh_rw
;
161 u64 flush_gen
; /* only valid if !never_written */
165 * Elements of this type are allocated dynamically and required because
166 * each block object can refer to and can be ref from multiple blocks.
167 * The key to lookup them in the hashtable is the dev_bytenr of
168 * the block ref to plus the one from the block refered from.
169 * The fact that they are searchable via a hashtable and that a
170 * ref_cnt is maintained is not required for the btrfs integrity
171 * check algorithm itself, it is only used to make the output more
172 * beautiful in case that an error is detected (an error is defined
173 * as a write operation to a block while that block is still referenced).
175 struct btrfsic_block_link
{
176 u32 magic_num
; /* only used for debug purposes */
178 struct list_head node_ref_to
; /* list node */
179 struct list_head node_ref_from
; /* list node */
180 struct list_head collision_resolving_node
; /* list node */
181 struct btrfsic_block
*block_ref_to
;
182 struct btrfsic_block
*block_ref_from
;
183 u64 parent_generation
;
186 struct btrfsic_dev_state
{
187 u32 magic_num
; /* only used for debug purposes */
188 struct block_device
*bdev
;
189 struct btrfsic_state
*state
;
190 struct list_head collision_resolving_node
; /* list node */
191 struct btrfsic_block dummy_block_for_bio_bh_flush
;
193 char name
[BDEVNAME_SIZE
];
196 struct btrfsic_block_hashtable
{
197 struct list_head table
[BTRFSIC_BLOCK_HASHTABLE_SIZE
];
200 struct btrfsic_block_link_hashtable
{
201 struct list_head table
[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE
];
204 struct btrfsic_dev_state_hashtable
{
205 struct list_head table
[BTRFSIC_DEV2STATE_HASHTABLE_SIZE
];
208 struct btrfsic_block_data_ctx
{
209 u64 start
; /* virtual bytenr */
210 u64 dev_bytenr
; /* physical bytenr on device */
212 struct btrfsic_dev_state
*dev
;
218 /* This structure is used to implement recursion without occupying
219 * any stack space, refer to btrfsic_process_metablock() */
220 struct btrfsic_stack_frame
{
228 struct btrfsic_block
*block
;
229 struct btrfsic_block_data_ctx
*block_ctx
;
230 struct btrfsic_block
*next_block
;
231 struct btrfsic_block_data_ctx next_block_ctx
;
232 struct btrfs_header
*hdr
;
233 struct btrfsic_stack_frame
*prev
;
236 /* Some state per mounted filesystem */
237 struct btrfsic_state
{
239 int include_extent_data
;
241 struct list_head all_blocks_list
;
242 struct btrfsic_block_hashtable block_hashtable
;
243 struct btrfsic_block_link_hashtable block_link_hashtable
;
244 struct btrfs_root
*root
;
245 u64 max_superblock_generation
;
246 struct btrfsic_block
*latest_superblock
;
251 static void btrfsic_block_init(struct btrfsic_block
*b
);
252 static struct btrfsic_block
*btrfsic_block_alloc(void);
253 static void btrfsic_block_free(struct btrfsic_block
*b
);
254 static void btrfsic_block_link_init(struct btrfsic_block_link
*n
);
255 static struct btrfsic_block_link
*btrfsic_block_link_alloc(void);
256 static void btrfsic_block_link_free(struct btrfsic_block_link
*n
);
257 static void btrfsic_dev_state_init(struct btrfsic_dev_state
*ds
);
258 static struct btrfsic_dev_state
*btrfsic_dev_state_alloc(void);
259 static void btrfsic_dev_state_free(struct btrfsic_dev_state
*ds
);
260 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable
*h
);
261 static void btrfsic_block_hashtable_add(struct btrfsic_block
*b
,
262 struct btrfsic_block_hashtable
*h
);
263 static void btrfsic_block_hashtable_remove(struct btrfsic_block
*b
);
264 static struct btrfsic_block
*btrfsic_block_hashtable_lookup(
265 struct block_device
*bdev
,
267 struct btrfsic_block_hashtable
*h
);
268 static void btrfsic_block_link_hashtable_init(
269 struct btrfsic_block_link_hashtable
*h
);
270 static void btrfsic_block_link_hashtable_add(
271 struct btrfsic_block_link
*l
,
272 struct btrfsic_block_link_hashtable
*h
);
273 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link
*l
);
274 static struct btrfsic_block_link
*btrfsic_block_link_hashtable_lookup(
275 struct block_device
*bdev_ref_to
,
276 u64 dev_bytenr_ref_to
,
277 struct block_device
*bdev_ref_from
,
278 u64 dev_bytenr_ref_from
,
279 struct btrfsic_block_link_hashtable
*h
);
280 static void btrfsic_dev_state_hashtable_init(
281 struct btrfsic_dev_state_hashtable
*h
);
282 static void btrfsic_dev_state_hashtable_add(
283 struct btrfsic_dev_state
*ds
,
284 struct btrfsic_dev_state_hashtable
*h
);
285 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state
*ds
);
286 static struct btrfsic_dev_state
*btrfsic_dev_state_hashtable_lookup(
287 struct block_device
*bdev
,
288 struct btrfsic_dev_state_hashtable
*h
);
289 static struct btrfsic_stack_frame
*btrfsic_stack_frame_alloc(void);
290 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame
*sf
);
291 static int btrfsic_process_superblock(struct btrfsic_state
*state
,
292 struct btrfs_fs_devices
*fs_devices
);
293 static int btrfsic_process_metablock(struct btrfsic_state
*state
,
294 struct btrfsic_block
*block
,
295 struct btrfsic_block_data_ctx
*block_ctx
,
296 int limit_nesting
, int force_iodone_flag
);
297 static void btrfsic_read_from_block_data(
298 struct btrfsic_block_data_ctx
*block_ctx
,
299 void *dst
, u32 offset
, size_t len
);
300 static int btrfsic_create_link_to_next_block(
301 struct btrfsic_state
*state
,
302 struct btrfsic_block
*block
,
303 struct btrfsic_block_data_ctx
304 *block_ctx
, u64 next_bytenr
,
306 struct btrfsic_block_data_ctx
*next_block_ctx
,
307 struct btrfsic_block
**next_blockp
,
308 int force_iodone_flag
,
309 int *num_copiesp
, int *mirror_nump
,
310 struct btrfs_disk_key
*disk_key
,
311 u64 parent_generation
);
312 static int btrfsic_handle_extent_data(struct btrfsic_state
*state
,
313 struct btrfsic_block
*block
,
314 struct btrfsic_block_data_ctx
*block_ctx
,
315 u32 item_offset
, int force_iodone_flag
);
316 static int btrfsic_map_block(struct btrfsic_state
*state
, u64 bytenr
, u32 len
,
317 struct btrfsic_block_data_ctx
*block_ctx_out
,
319 static int btrfsic_map_superblock(struct btrfsic_state
*state
, u64 bytenr
,
320 u32 len
, struct block_device
*bdev
,
321 struct btrfsic_block_data_ctx
*block_ctx_out
);
322 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx
*block_ctx
);
323 static int btrfsic_read_block(struct btrfsic_state
*state
,
324 struct btrfsic_block_data_ctx
*block_ctx
);
325 static void btrfsic_dump_database(struct btrfsic_state
*state
);
326 static void btrfsic_complete_bio_end_io(struct bio
*bio
, int err
);
327 static int btrfsic_test_for_metadata(struct btrfsic_state
*state
,
328 char **datav
, unsigned int num_pages
);
329 static void btrfsic_process_written_block(struct btrfsic_dev_state
*dev_state
,
330 u64 dev_bytenr
, char **mapped_datav
,
331 unsigned int num_pages
,
332 struct bio
*bio
, int *bio_is_patched
,
333 struct buffer_head
*bh
,
334 int submit_bio_bh_rw
);
335 static int btrfsic_process_written_superblock(
336 struct btrfsic_state
*state
,
337 struct btrfsic_block
*const block
,
338 struct btrfs_super_block
*const super_hdr
);
339 static void btrfsic_bio_end_io(struct bio
*bp
, int bio_error_status
);
340 static void btrfsic_bh_end_io(struct buffer_head
*bh
, int uptodate
);
341 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state
*state
,
342 const struct btrfsic_block
*block
,
343 int recursion_level
);
344 static int btrfsic_check_all_ref_blocks(struct btrfsic_state
*state
,
345 struct btrfsic_block
*const block
,
346 int recursion_level
);
347 static void btrfsic_print_add_link(const struct btrfsic_state
*state
,
348 const struct btrfsic_block_link
*l
);
349 static void btrfsic_print_rem_link(const struct btrfsic_state
*state
,
350 const struct btrfsic_block_link
*l
);
351 static char btrfsic_get_block_type(const struct btrfsic_state
*state
,
352 const struct btrfsic_block
*block
);
353 static void btrfsic_dump_tree(const struct btrfsic_state
*state
);
354 static void btrfsic_dump_tree_sub(const struct btrfsic_state
*state
,
355 const struct btrfsic_block
*block
,
357 static struct btrfsic_block_link
*btrfsic_block_link_lookup_or_add(
358 struct btrfsic_state
*state
,
359 struct btrfsic_block_data_ctx
*next_block_ctx
,
360 struct btrfsic_block
*next_block
,
361 struct btrfsic_block
*from_block
,
362 u64 parent_generation
);
363 static struct btrfsic_block
*btrfsic_block_lookup_or_add(
364 struct btrfsic_state
*state
,
365 struct btrfsic_block_data_ctx
*block_ctx
,
366 const char *additional_string
,
372 static int btrfsic_process_superblock_dev_mirror(
373 struct btrfsic_state
*state
,
374 struct btrfsic_dev_state
*dev_state
,
375 struct btrfs_device
*device
,
376 int superblock_mirror_num
,
377 struct btrfsic_dev_state
**selected_dev_state
,
378 struct btrfs_super_block
*selected_super
);
379 static struct btrfsic_dev_state
*btrfsic_dev_state_lookup(
380 struct block_device
*bdev
);
381 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state
*state
,
383 struct btrfsic_dev_state
*dev_state
,
386 static struct mutex btrfsic_mutex
;
387 static int btrfsic_is_initialized
;
388 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable
;
391 static void btrfsic_block_init(struct btrfsic_block
*b
)
393 b
->magic_num
= BTRFSIC_BLOCK_MAGIC_NUMBER
;
396 b
->logical_bytenr
= 0;
397 b
->generation
= BTRFSIC_GENERATION_UNKNOWN
;
398 b
->disk_key
.objectid
= 0;
399 b
->disk_key
.type
= 0;
400 b
->disk_key
.offset
= 0;
402 b
->is_superblock
= 0;
404 b
->iodone_w_error
= 0;
405 b
->never_written
= 0;
407 b
->next_in_same_bio
= NULL
;
408 b
->orig_bio_bh_private
= NULL
;
409 b
->orig_bio_bh_end_io
.bio
= NULL
;
410 INIT_LIST_HEAD(&b
->collision_resolving_node
);
411 INIT_LIST_HEAD(&b
->all_blocks_node
);
412 INIT_LIST_HEAD(&b
->ref_to_list
);
413 INIT_LIST_HEAD(&b
->ref_from_list
);
414 b
->submit_bio_bh_rw
= 0;
418 static struct btrfsic_block
*btrfsic_block_alloc(void)
420 struct btrfsic_block
*b
;
422 b
= kzalloc(sizeof(*b
), GFP_NOFS
);
424 btrfsic_block_init(b
);
429 static void btrfsic_block_free(struct btrfsic_block
*b
)
431 BUG_ON(!(NULL
== b
|| BTRFSIC_BLOCK_MAGIC_NUMBER
== b
->magic_num
));
435 static void btrfsic_block_link_init(struct btrfsic_block_link
*l
)
437 l
->magic_num
= BTRFSIC_BLOCK_LINK_MAGIC_NUMBER
;
439 INIT_LIST_HEAD(&l
->node_ref_to
);
440 INIT_LIST_HEAD(&l
->node_ref_from
);
441 INIT_LIST_HEAD(&l
->collision_resolving_node
);
442 l
->block_ref_to
= NULL
;
443 l
->block_ref_from
= NULL
;
446 static struct btrfsic_block_link
*btrfsic_block_link_alloc(void)
448 struct btrfsic_block_link
*l
;
450 l
= kzalloc(sizeof(*l
), GFP_NOFS
);
452 btrfsic_block_link_init(l
);
457 static void btrfsic_block_link_free(struct btrfsic_block_link
*l
)
459 BUG_ON(!(NULL
== l
|| BTRFSIC_BLOCK_LINK_MAGIC_NUMBER
== l
->magic_num
));
463 static void btrfsic_dev_state_init(struct btrfsic_dev_state
*ds
)
465 ds
->magic_num
= BTRFSIC_DEV2STATE_MAGIC_NUMBER
;
469 INIT_LIST_HEAD(&ds
->collision_resolving_node
);
470 ds
->last_flush_gen
= 0;
471 btrfsic_block_init(&ds
->dummy_block_for_bio_bh_flush
);
472 ds
->dummy_block_for_bio_bh_flush
.is_iodone
= 1;
473 ds
->dummy_block_for_bio_bh_flush
.dev_state
= ds
;
476 static struct btrfsic_dev_state
*btrfsic_dev_state_alloc(void)
478 struct btrfsic_dev_state
*ds
;
480 ds
= kzalloc(sizeof(*ds
), GFP_NOFS
);
482 btrfsic_dev_state_init(ds
);
487 static void btrfsic_dev_state_free(struct btrfsic_dev_state
*ds
)
489 BUG_ON(!(NULL
== ds
||
490 BTRFSIC_DEV2STATE_MAGIC_NUMBER
== ds
->magic_num
));
494 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable
*h
)
498 for (i
= 0; i
< BTRFSIC_BLOCK_HASHTABLE_SIZE
; i
++)
499 INIT_LIST_HEAD(h
->table
+ i
);
502 static void btrfsic_block_hashtable_add(struct btrfsic_block
*b
,
503 struct btrfsic_block_hashtable
*h
)
505 const unsigned int hashval
=
506 (((unsigned int)(b
->dev_bytenr
>> 16)) ^
507 ((unsigned int)((uintptr_t)b
->dev_state
->bdev
))) &
508 (BTRFSIC_BLOCK_HASHTABLE_SIZE
- 1);
510 list_add(&b
->collision_resolving_node
, h
->table
+ hashval
);
513 static void btrfsic_block_hashtable_remove(struct btrfsic_block
*b
)
515 list_del(&b
->collision_resolving_node
);
518 static struct btrfsic_block
*btrfsic_block_hashtable_lookup(
519 struct block_device
*bdev
,
521 struct btrfsic_block_hashtable
*h
)
523 const unsigned int hashval
=
524 (((unsigned int)(dev_bytenr
>> 16)) ^
525 ((unsigned int)((uintptr_t)bdev
))) &
526 (BTRFSIC_BLOCK_HASHTABLE_SIZE
- 1);
527 struct list_head
*elem
;
529 list_for_each(elem
, h
->table
+ hashval
) {
530 struct btrfsic_block
*const b
=
531 list_entry(elem
, struct btrfsic_block
,
532 collision_resolving_node
);
534 if (b
->dev_state
->bdev
== bdev
&& b
->dev_bytenr
== dev_bytenr
)
541 static void btrfsic_block_link_hashtable_init(
542 struct btrfsic_block_link_hashtable
*h
)
546 for (i
= 0; i
< BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE
; i
++)
547 INIT_LIST_HEAD(h
->table
+ i
);
550 static void btrfsic_block_link_hashtable_add(
551 struct btrfsic_block_link
*l
,
552 struct btrfsic_block_link_hashtable
*h
)
554 const unsigned int hashval
=
555 (((unsigned int)(l
->block_ref_to
->dev_bytenr
>> 16)) ^
556 ((unsigned int)(l
->block_ref_from
->dev_bytenr
>> 16)) ^
557 ((unsigned int)((uintptr_t)l
->block_ref_to
->dev_state
->bdev
)) ^
558 ((unsigned int)((uintptr_t)l
->block_ref_from
->dev_state
->bdev
)))
559 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE
- 1);
561 BUG_ON(NULL
== l
->block_ref_to
);
562 BUG_ON(NULL
== l
->block_ref_from
);
563 list_add(&l
->collision_resolving_node
, h
->table
+ hashval
);
566 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link
*l
)
568 list_del(&l
->collision_resolving_node
);
571 static struct btrfsic_block_link
*btrfsic_block_link_hashtable_lookup(
572 struct block_device
*bdev_ref_to
,
573 u64 dev_bytenr_ref_to
,
574 struct block_device
*bdev_ref_from
,
575 u64 dev_bytenr_ref_from
,
576 struct btrfsic_block_link_hashtable
*h
)
578 const unsigned int hashval
=
579 (((unsigned int)(dev_bytenr_ref_to
>> 16)) ^
580 ((unsigned int)(dev_bytenr_ref_from
>> 16)) ^
581 ((unsigned int)((uintptr_t)bdev_ref_to
)) ^
582 ((unsigned int)((uintptr_t)bdev_ref_from
))) &
583 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE
- 1);
584 struct list_head
*elem
;
586 list_for_each(elem
, h
->table
+ hashval
) {
587 struct btrfsic_block_link
*const l
=
588 list_entry(elem
, struct btrfsic_block_link
,
589 collision_resolving_node
);
591 BUG_ON(NULL
== l
->block_ref_to
);
592 BUG_ON(NULL
== l
->block_ref_from
);
593 if (l
->block_ref_to
->dev_state
->bdev
== bdev_ref_to
&&
594 l
->block_ref_to
->dev_bytenr
== dev_bytenr_ref_to
&&
595 l
->block_ref_from
->dev_state
->bdev
== bdev_ref_from
&&
596 l
->block_ref_from
->dev_bytenr
== dev_bytenr_ref_from
)
603 static void btrfsic_dev_state_hashtable_init(
604 struct btrfsic_dev_state_hashtable
*h
)
608 for (i
= 0; i
< BTRFSIC_DEV2STATE_HASHTABLE_SIZE
; i
++)
609 INIT_LIST_HEAD(h
->table
+ i
);
612 static void btrfsic_dev_state_hashtable_add(
613 struct btrfsic_dev_state
*ds
,
614 struct btrfsic_dev_state_hashtable
*h
)
616 const unsigned int hashval
=
617 (((unsigned int)((uintptr_t)ds
->bdev
)) &
618 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE
- 1));
620 list_add(&ds
->collision_resolving_node
, h
->table
+ hashval
);
623 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state
*ds
)
625 list_del(&ds
->collision_resolving_node
);
628 static struct btrfsic_dev_state
*btrfsic_dev_state_hashtable_lookup(
629 struct block_device
*bdev
,
630 struct btrfsic_dev_state_hashtable
*h
)
632 const unsigned int hashval
=
633 (((unsigned int)((uintptr_t)bdev
)) &
634 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE
- 1));
635 struct list_head
*elem
;
637 list_for_each(elem
, h
->table
+ hashval
) {
638 struct btrfsic_dev_state
*const ds
=
639 list_entry(elem
, struct btrfsic_dev_state
,
640 collision_resolving_node
);
642 if (ds
->bdev
== bdev
)
649 static int btrfsic_process_superblock(struct btrfsic_state
*state
,
650 struct btrfs_fs_devices
*fs_devices
)
653 struct btrfs_super_block
*selected_super
;
654 struct list_head
*dev_head
= &fs_devices
->devices
;
655 struct btrfs_device
*device
;
656 struct btrfsic_dev_state
*selected_dev_state
= NULL
;
659 BUG_ON(NULL
== state
);
660 selected_super
= kzalloc(sizeof(*selected_super
), GFP_NOFS
);
661 if (NULL
== selected_super
) {
662 printk(KERN_INFO
"btrfsic: error, kmalloc failed!\n");
666 list_for_each_entry(device
, dev_head
, dev_list
) {
668 struct btrfsic_dev_state
*dev_state
;
670 if (!device
->bdev
|| !device
->name
)
673 dev_state
= btrfsic_dev_state_lookup(device
->bdev
);
674 BUG_ON(NULL
== dev_state
);
675 for (i
= 0; i
< BTRFS_SUPER_MIRROR_MAX
; i
++) {
676 ret
= btrfsic_process_superblock_dev_mirror(
677 state
, dev_state
, device
, i
,
678 &selected_dev_state
, selected_super
);
679 if (0 != ret
&& 0 == i
) {
680 kfree(selected_super
);
686 if (NULL
== state
->latest_superblock
) {
687 printk(KERN_INFO
"btrfsic: no superblock found!\n");
688 kfree(selected_super
);
692 state
->csum_size
= btrfs_super_csum_size(selected_super
);
694 for (pass
= 0; pass
< 3; pass
++) {
701 next_bytenr
= btrfs_super_root(selected_super
);
702 if (state
->print_mask
&
703 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
704 printk(KERN_INFO
"root@%llu\n", next_bytenr
);
707 next_bytenr
= btrfs_super_chunk_root(selected_super
);
708 if (state
->print_mask
&
709 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
710 printk(KERN_INFO
"chunk@%llu\n", next_bytenr
);
713 next_bytenr
= btrfs_super_log_root(selected_super
);
714 if (0 == next_bytenr
)
716 if (state
->print_mask
&
717 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
718 printk(KERN_INFO
"log@%llu\n", next_bytenr
);
723 btrfs_num_copies(state
->root
->fs_info
,
724 next_bytenr
, state
->metablock_size
);
725 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
726 printk(KERN_INFO
"num_copies(log_bytenr=%llu) = %d\n",
727 next_bytenr
, num_copies
);
729 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
730 struct btrfsic_block
*next_block
;
731 struct btrfsic_block_data_ctx tmp_next_block_ctx
;
732 struct btrfsic_block_link
*l
;
734 ret
= btrfsic_map_block(state
, next_bytenr
,
735 state
->metablock_size
,
739 printk(KERN_INFO
"btrfsic:"
740 " btrfsic_map_block(root @%llu,"
741 " mirror %d) failed!\n",
742 next_bytenr
, mirror_num
);
743 kfree(selected_super
);
747 next_block
= btrfsic_block_hashtable_lookup(
748 tmp_next_block_ctx
.dev
->bdev
,
749 tmp_next_block_ctx
.dev_bytenr
,
750 &state
->block_hashtable
);
751 BUG_ON(NULL
== next_block
);
753 l
= btrfsic_block_link_hashtable_lookup(
754 tmp_next_block_ctx
.dev
->bdev
,
755 tmp_next_block_ctx
.dev_bytenr
,
756 state
->latest_superblock
->dev_state
->
758 state
->latest_superblock
->dev_bytenr
,
759 &state
->block_link_hashtable
);
762 ret
= btrfsic_read_block(state
, &tmp_next_block_ctx
);
763 if (ret
< (int)PAGE_CACHE_SIZE
) {
765 "btrfsic: read @logical %llu failed!\n",
766 tmp_next_block_ctx
.start
);
767 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
768 kfree(selected_super
);
772 ret
= btrfsic_process_metablock(state
,
775 BTRFS_MAX_LEVEL
+ 3, 1);
776 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
780 kfree(selected_super
);
784 static int btrfsic_process_superblock_dev_mirror(
785 struct btrfsic_state
*state
,
786 struct btrfsic_dev_state
*dev_state
,
787 struct btrfs_device
*device
,
788 int superblock_mirror_num
,
789 struct btrfsic_dev_state
**selected_dev_state
,
790 struct btrfs_super_block
*selected_super
)
792 struct btrfs_super_block
*super_tmp
;
794 struct buffer_head
*bh
;
795 struct btrfsic_block
*superblock_tmp
;
797 struct block_device
*const superblock_bdev
= device
->bdev
;
799 /* super block bytenr is always the unmapped device bytenr */
800 dev_bytenr
= btrfs_sb_offset(superblock_mirror_num
);
801 if (dev_bytenr
+ BTRFS_SUPER_INFO_SIZE
> device
->total_bytes
)
803 bh
= __bread(superblock_bdev
, dev_bytenr
/ 4096,
804 BTRFS_SUPER_INFO_SIZE
);
807 super_tmp
= (struct btrfs_super_block
*)
808 (bh
->b_data
+ (dev_bytenr
& 4095));
810 if (btrfs_super_bytenr(super_tmp
) != dev_bytenr
||
811 btrfs_super_magic(super_tmp
) != BTRFS_MAGIC
||
812 memcmp(device
->uuid
, super_tmp
->dev_item
.uuid
, BTRFS_UUID_SIZE
) ||
813 btrfs_super_nodesize(super_tmp
) != state
->metablock_size
||
814 btrfs_super_leafsize(super_tmp
) != state
->metablock_size
||
815 btrfs_super_sectorsize(super_tmp
) != state
->datablock_size
) {
821 btrfsic_block_hashtable_lookup(superblock_bdev
,
823 &state
->block_hashtable
);
824 if (NULL
== superblock_tmp
) {
825 superblock_tmp
= btrfsic_block_alloc();
826 if (NULL
== superblock_tmp
) {
827 printk(KERN_INFO
"btrfsic: error, kmalloc failed!\n");
831 /* for superblock, only the dev_bytenr makes sense */
832 superblock_tmp
->dev_bytenr
= dev_bytenr
;
833 superblock_tmp
->dev_state
= dev_state
;
834 superblock_tmp
->logical_bytenr
= dev_bytenr
;
835 superblock_tmp
->generation
= btrfs_super_generation(super_tmp
);
836 superblock_tmp
->is_metadata
= 1;
837 superblock_tmp
->is_superblock
= 1;
838 superblock_tmp
->is_iodone
= 1;
839 superblock_tmp
->never_written
= 0;
840 superblock_tmp
->mirror_num
= 1 + superblock_mirror_num
;
841 if (state
->print_mask
& BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE
)
842 printk_in_rcu(KERN_INFO
"New initial S-block (bdev %p, %s)"
843 " @%llu (%s/%llu/%d)\n",
845 rcu_str_deref(device
->name
), dev_bytenr
,
846 dev_state
->name
, dev_bytenr
,
847 superblock_mirror_num
);
848 list_add(&superblock_tmp
->all_blocks_node
,
849 &state
->all_blocks_list
);
850 btrfsic_block_hashtable_add(superblock_tmp
,
851 &state
->block_hashtable
);
854 /* select the one with the highest generation field */
855 if (btrfs_super_generation(super_tmp
) >
856 state
->max_superblock_generation
||
857 0 == state
->max_superblock_generation
) {
858 memcpy(selected_super
, super_tmp
, sizeof(*selected_super
));
859 *selected_dev_state
= dev_state
;
860 state
->max_superblock_generation
=
861 btrfs_super_generation(super_tmp
);
862 state
->latest_superblock
= superblock_tmp
;
865 for (pass
= 0; pass
< 3; pass
++) {
869 const char *additional_string
= NULL
;
870 struct btrfs_disk_key tmp_disk_key
;
872 tmp_disk_key
.type
= BTRFS_ROOT_ITEM_KEY
;
873 tmp_disk_key
.offset
= 0;
876 btrfs_set_disk_key_objectid(&tmp_disk_key
,
877 BTRFS_ROOT_TREE_OBJECTID
);
878 additional_string
= "initial root ";
879 next_bytenr
= btrfs_super_root(super_tmp
);
882 btrfs_set_disk_key_objectid(&tmp_disk_key
,
883 BTRFS_CHUNK_TREE_OBJECTID
);
884 additional_string
= "initial chunk ";
885 next_bytenr
= btrfs_super_chunk_root(super_tmp
);
888 btrfs_set_disk_key_objectid(&tmp_disk_key
,
889 BTRFS_TREE_LOG_OBJECTID
);
890 additional_string
= "initial log ";
891 next_bytenr
= btrfs_super_log_root(super_tmp
);
892 if (0 == next_bytenr
)
898 btrfs_num_copies(state
->root
->fs_info
,
899 next_bytenr
, state
->metablock_size
);
900 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
901 printk(KERN_INFO
"num_copies(log_bytenr=%llu) = %d\n",
902 next_bytenr
, num_copies
);
903 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
904 struct btrfsic_block
*next_block
;
905 struct btrfsic_block_data_ctx tmp_next_block_ctx
;
906 struct btrfsic_block_link
*l
;
908 if (btrfsic_map_block(state
, next_bytenr
,
909 state
->metablock_size
,
912 printk(KERN_INFO
"btrfsic: btrfsic_map_block("
913 "bytenr @%llu, mirror %d) failed!\n",
914 next_bytenr
, mirror_num
);
919 next_block
= btrfsic_block_lookup_or_add(
920 state
, &tmp_next_block_ctx
,
921 additional_string
, 1, 1, 0,
923 if (NULL
== next_block
) {
924 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
929 next_block
->disk_key
= tmp_disk_key
;
930 next_block
->generation
= BTRFSIC_GENERATION_UNKNOWN
;
931 l
= btrfsic_block_link_lookup_or_add(
932 state
, &tmp_next_block_ctx
,
933 next_block
, superblock_tmp
,
934 BTRFSIC_GENERATION_UNKNOWN
);
935 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
942 if (state
->print_mask
& BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES
)
943 btrfsic_dump_tree_sub(state
, superblock_tmp
, 0);
949 static struct btrfsic_stack_frame
*btrfsic_stack_frame_alloc(void)
951 struct btrfsic_stack_frame
*sf
;
953 sf
= kzalloc(sizeof(*sf
), GFP_NOFS
);
955 printk(KERN_INFO
"btrfsic: alloc memory failed!\n");
957 sf
->magic
= BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER
;
961 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame
*sf
)
963 BUG_ON(!(NULL
== sf
||
964 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER
== sf
->magic
));
968 static int btrfsic_process_metablock(
969 struct btrfsic_state
*state
,
970 struct btrfsic_block
*const first_block
,
971 struct btrfsic_block_data_ctx
*const first_block_ctx
,
972 int first_limit_nesting
, int force_iodone_flag
)
974 struct btrfsic_stack_frame initial_stack_frame
= { 0 };
975 struct btrfsic_stack_frame
*sf
;
976 struct btrfsic_stack_frame
*next_stack
;
977 struct btrfs_header
*const first_hdr
=
978 (struct btrfs_header
*)first_block_ctx
->datav
[0];
981 sf
= &initial_stack_frame
;
984 sf
->limit_nesting
= first_limit_nesting
;
985 sf
->block
= first_block
;
986 sf
->block_ctx
= first_block_ctx
;
987 sf
->next_block
= NULL
;
991 continue_with_new_stack_frame
:
992 sf
->block
->generation
= le64_to_cpu(sf
->hdr
->generation
);
993 if (0 == sf
->hdr
->level
) {
994 struct btrfs_leaf
*const leafhdr
=
995 (struct btrfs_leaf
*)sf
->hdr
;
998 sf
->nr
= btrfs_stack_header_nritems(&leafhdr
->header
);
1000 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1002 "leaf %llu items %d generation %llu"
1004 sf
->block_ctx
->start
, sf
->nr
,
1005 btrfs_stack_header_generation(
1007 btrfs_stack_header_owner(
1011 continue_with_current_leaf_stack_frame
:
1012 if (0 == sf
->num_copies
|| sf
->mirror_num
> sf
->num_copies
) {
1017 if (sf
->i
< sf
->nr
) {
1018 struct btrfs_item disk_item
;
1019 u32 disk_item_offset
=
1020 (uintptr_t)(leafhdr
->items
+ sf
->i
) -
1022 struct btrfs_disk_key
*disk_key
;
1027 if (disk_item_offset
+ sizeof(struct btrfs_item
) >
1028 sf
->block_ctx
->len
) {
1029 leaf_item_out_of_bounce_error
:
1031 "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1032 sf
->block_ctx
->start
,
1033 sf
->block_ctx
->dev
->name
);
1034 goto one_stack_frame_backwards
;
1036 btrfsic_read_from_block_data(sf
->block_ctx
,
1039 sizeof(struct btrfs_item
));
1040 item_offset
= btrfs_stack_item_offset(&disk_item
);
1041 item_size
= btrfs_stack_item_offset(&disk_item
);
1042 disk_key
= &disk_item
.key
;
1043 type
= btrfs_disk_key_type(disk_key
);
1045 if (BTRFS_ROOT_ITEM_KEY
== type
) {
1046 struct btrfs_root_item root_item
;
1047 u32 root_item_offset
;
1050 root_item_offset
= item_offset
+
1051 offsetof(struct btrfs_leaf
, items
);
1052 if (root_item_offset
+ item_size
>
1054 goto leaf_item_out_of_bounce_error
;
1055 btrfsic_read_from_block_data(
1056 sf
->block_ctx
, &root_item
,
1059 next_bytenr
= btrfs_root_bytenr(&root_item
);
1062 btrfsic_create_link_to_next_block(
1068 &sf
->next_block_ctx
,
1074 btrfs_root_generation(
1077 goto one_stack_frame_backwards
;
1079 if (NULL
!= sf
->next_block
) {
1080 struct btrfs_header
*const next_hdr
=
1081 (struct btrfs_header
*)
1082 sf
->next_block_ctx
.datav
[0];
1085 btrfsic_stack_frame_alloc();
1086 if (NULL
== next_stack
) {
1087 btrfsic_release_block_ctx(
1090 goto one_stack_frame_backwards
;
1094 next_stack
->block
= sf
->next_block
;
1095 next_stack
->block_ctx
=
1096 &sf
->next_block_ctx
;
1097 next_stack
->next_block
= NULL
;
1098 next_stack
->hdr
= next_hdr
;
1099 next_stack
->limit_nesting
=
1100 sf
->limit_nesting
- 1;
1101 next_stack
->prev
= sf
;
1103 goto continue_with_new_stack_frame
;
1105 } else if (BTRFS_EXTENT_DATA_KEY
== type
&&
1106 state
->include_extent_data
) {
1107 sf
->error
= btrfsic_handle_extent_data(
1114 goto one_stack_frame_backwards
;
1117 goto continue_with_current_leaf_stack_frame
;
1120 struct btrfs_node
*const nodehdr
= (struct btrfs_node
*)sf
->hdr
;
1123 sf
->nr
= btrfs_stack_header_nritems(&nodehdr
->header
);
1125 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1126 printk(KERN_INFO
"node %llu level %d items %d"
1127 " generation %llu owner %llu\n",
1128 sf
->block_ctx
->start
,
1129 nodehdr
->header
.level
, sf
->nr
,
1130 btrfs_stack_header_generation(
1132 btrfs_stack_header_owner(
1136 continue_with_current_node_stack_frame
:
1137 if (0 == sf
->num_copies
|| sf
->mirror_num
> sf
->num_copies
) {
1142 if (sf
->i
< sf
->nr
) {
1143 struct btrfs_key_ptr key_ptr
;
1147 key_ptr_offset
= (uintptr_t)(nodehdr
->ptrs
+ sf
->i
) -
1149 if (key_ptr_offset
+ sizeof(struct btrfs_key_ptr
) >
1150 sf
->block_ctx
->len
) {
1152 "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1153 sf
->block_ctx
->start
,
1154 sf
->block_ctx
->dev
->name
);
1155 goto one_stack_frame_backwards
;
1157 btrfsic_read_from_block_data(
1158 sf
->block_ctx
, &key_ptr
, key_ptr_offset
,
1159 sizeof(struct btrfs_key_ptr
));
1160 next_bytenr
= btrfs_stack_key_blockptr(&key_ptr
);
1162 sf
->error
= btrfsic_create_link_to_next_block(
1168 &sf
->next_block_ctx
,
1174 btrfs_stack_key_generation(&key_ptr
));
1176 goto one_stack_frame_backwards
;
1178 if (NULL
!= sf
->next_block
) {
1179 struct btrfs_header
*const next_hdr
=
1180 (struct btrfs_header
*)
1181 sf
->next_block_ctx
.datav
[0];
1183 next_stack
= btrfsic_stack_frame_alloc();
1184 if (NULL
== next_stack
)
1185 goto one_stack_frame_backwards
;
1188 next_stack
->block
= sf
->next_block
;
1189 next_stack
->block_ctx
= &sf
->next_block_ctx
;
1190 next_stack
->next_block
= NULL
;
1191 next_stack
->hdr
= next_hdr
;
1192 next_stack
->limit_nesting
=
1193 sf
->limit_nesting
- 1;
1194 next_stack
->prev
= sf
;
1196 goto continue_with_new_stack_frame
;
1199 goto continue_with_current_node_stack_frame
;
1203 one_stack_frame_backwards
:
1204 if (NULL
!= sf
->prev
) {
1205 struct btrfsic_stack_frame
*const prev
= sf
->prev
;
1207 /* the one for the initial block is freed in the caller */
1208 btrfsic_release_block_ctx(sf
->block_ctx
);
1211 prev
->error
= sf
->error
;
1212 btrfsic_stack_frame_free(sf
);
1214 goto one_stack_frame_backwards
;
1217 btrfsic_stack_frame_free(sf
);
1219 goto continue_with_new_stack_frame
;
1221 BUG_ON(&initial_stack_frame
!= sf
);
1227 static void btrfsic_read_from_block_data(
1228 struct btrfsic_block_data_ctx
*block_ctx
,
1229 void *dstv
, u32 offset
, size_t len
)
1232 size_t offset_in_page
;
1234 char *dst
= (char *)dstv
;
1235 size_t start_offset
= block_ctx
->start
& ((u64
)PAGE_CACHE_SIZE
- 1);
1236 unsigned long i
= (start_offset
+ offset
) >> PAGE_CACHE_SHIFT
;
1238 WARN_ON(offset
+ len
> block_ctx
->len
);
1239 offset_in_page
= (start_offset
+ offset
) & (PAGE_CACHE_SIZE
- 1);
1242 cur
= min(len
, ((size_t)PAGE_CACHE_SIZE
- offset_in_page
));
1243 BUG_ON(i
>= (block_ctx
->len
+ PAGE_CACHE_SIZE
- 1) >>
1245 kaddr
= block_ctx
->datav
[i
];
1246 memcpy(dst
, kaddr
+ offset_in_page
, cur
);
1255 static int btrfsic_create_link_to_next_block(
1256 struct btrfsic_state
*state
,
1257 struct btrfsic_block
*block
,
1258 struct btrfsic_block_data_ctx
*block_ctx
,
1261 struct btrfsic_block_data_ctx
*next_block_ctx
,
1262 struct btrfsic_block
**next_blockp
,
1263 int force_iodone_flag
,
1264 int *num_copiesp
, int *mirror_nump
,
1265 struct btrfs_disk_key
*disk_key
,
1266 u64 parent_generation
)
1268 struct btrfsic_block
*next_block
= NULL
;
1270 struct btrfsic_block_link
*l
;
1271 int did_alloc_block_link
;
1272 int block_was_created
;
1274 *next_blockp
= NULL
;
1275 if (0 == *num_copiesp
) {
1277 btrfs_num_copies(state
->root
->fs_info
,
1278 next_bytenr
, state
->metablock_size
);
1279 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
1280 printk(KERN_INFO
"num_copies(log_bytenr=%llu) = %d\n",
1281 next_bytenr
, *num_copiesp
);
1285 if (*mirror_nump
> *num_copiesp
)
1288 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1290 "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1292 ret
= btrfsic_map_block(state
, next_bytenr
,
1293 state
->metablock_size
,
1294 next_block_ctx
, *mirror_nump
);
1297 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1298 next_bytenr
, *mirror_nump
);
1299 btrfsic_release_block_ctx(next_block_ctx
);
1300 *next_blockp
= NULL
;
1304 next_block
= btrfsic_block_lookup_or_add(state
,
1305 next_block_ctx
, "referenced ",
1306 1, force_iodone_flag
,
1309 &block_was_created
);
1310 if (NULL
== next_block
) {
1311 btrfsic_release_block_ctx(next_block_ctx
);
1312 *next_blockp
= NULL
;
1315 if (block_was_created
) {
1317 next_block
->generation
= BTRFSIC_GENERATION_UNKNOWN
;
1319 if (next_block
->logical_bytenr
!= next_bytenr
&&
1320 !(!next_block
->is_metadata
&&
1321 0 == next_block
->logical_bytenr
)) {
1323 "Referenced block @%llu (%s/%llu/%d)"
1324 " found in hash table, %c,"
1325 " bytenr mismatch (!= stored %llu).\n",
1326 next_bytenr
, next_block_ctx
->dev
->name
,
1327 next_block_ctx
->dev_bytenr
, *mirror_nump
,
1328 btrfsic_get_block_type(state
, next_block
),
1329 next_block
->logical_bytenr
);
1330 } else if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1332 "Referenced block @%llu (%s/%llu/%d)"
1333 " found in hash table, %c.\n",
1334 next_bytenr
, next_block_ctx
->dev
->name
,
1335 next_block_ctx
->dev_bytenr
, *mirror_nump
,
1336 btrfsic_get_block_type(state
, next_block
));
1337 next_block
->logical_bytenr
= next_bytenr
;
1339 next_block
->mirror_num
= *mirror_nump
;
1340 l
= btrfsic_block_link_hashtable_lookup(
1341 next_block_ctx
->dev
->bdev
,
1342 next_block_ctx
->dev_bytenr
,
1343 block_ctx
->dev
->bdev
,
1344 block_ctx
->dev_bytenr
,
1345 &state
->block_link_hashtable
);
1348 next_block
->disk_key
= *disk_key
;
1350 l
= btrfsic_block_link_alloc();
1352 printk(KERN_INFO
"btrfsic: error, kmalloc failed!\n");
1353 btrfsic_release_block_ctx(next_block_ctx
);
1354 *next_blockp
= NULL
;
1358 did_alloc_block_link
= 1;
1359 l
->block_ref_to
= next_block
;
1360 l
->block_ref_from
= block
;
1362 l
->parent_generation
= parent_generation
;
1364 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1365 btrfsic_print_add_link(state
, l
);
1367 list_add(&l
->node_ref_to
, &block
->ref_to_list
);
1368 list_add(&l
->node_ref_from
, &next_block
->ref_from_list
);
1370 btrfsic_block_link_hashtable_add(l
,
1371 &state
->block_link_hashtable
);
1373 did_alloc_block_link
= 0;
1374 if (0 == limit_nesting
) {
1376 l
->parent_generation
= parent_generation
;
1377 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1378 btrfsic_print_add_link(state
, l
);
1382 if (limit_nesting
> 0 && did_alloc_block_link
) {
1383 ret
= btrfsic_read_block(state
, next_block_ctx
);
1384 if (ret
< (int)next_block_ctx
->len
) {
1386 "btrfsic: read block @logical %llu failed!\n",
1388 btrfsic_release_block_ctx(next_block_ctx
);
1389 *next_blockp
= NULL
;
1393 *next_blockp
= next_block
;
1395 *next_blockp
= NULL
;
1402 static int btrfsic_handle_extent_data(
1403 struct btrfsic_state
*state
,
1404 struct btrfsic_block
*block
,
1405 struct btrfsic_block_data_ctx
*block_ctx
,
1406 u32 item_offset
, int force_iodone_flag
)
1409 struct btrfs_file_extent_item file_extent_item
;
1410 u64 file_extent_item_offset
;
1414 struct btrfsic_block_link
*l
;
1416 file_extent_item_offset
= offsetof(struct btrfs_leaf
, items
) +
1418 if (file_extent_item_offset
+
1419 offsetof(struct btrfs_file_extent_item
, disk_num_bytes
) >
1422 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1423 block_ctx
->start
, block_ctx
->dev
->name
);
1427 btrfsic_read_from_block_data(block_ctx
, &file_extent_item
,
1428 file_extent_item_offset
,
1429 offsetof(struct btrfs_file_extent_item
, disk_num_bytes
));
1430 if (BTRFS_FILE_EXTENT_REG
!= file_extent_item
.type
||
1431 btrfs_stack_file_extent_disk_bytenr(&file_extent_item
) == 0) {
1432 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERY_VERBOSE
)
1433 printk(KERN_INFO
"extent_data: type %u, disk_bytenr = %llu\n",
1434 file_extent_item
.type
,
1435 btrfs_stack_file_extent_disk_bytenr(
1436 &file_extent_item
));
1440 if (file_extent_item_offset
+ sizeof(struct btrfs_file_extent_item
) >
1443 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1444 block_ctx
->start
, block_ctx
->dev
->name
);
1447 btrfsic_read_from_block_data(block_ctx
, &file_extent_item
,
1448 file_extent_item_offset
,
1449 sizeof(struct btrfs_file_extent_item
));
1450 next_bytenr
= btrfs_stack_file_extent_disk_bytenr(&file_extent_item
) +
1451 btrfs_stack_file_extent_offset(&file_extent_item
);
1452 generation
= btrfs_stack_file_extent_generation(&file_extent_item
);
1453 num_bytes
= btrfs_stack_file_extent_num_bytes(&file_extent_item
);
1454 generation
= btrfs_stack_file_extent_generation(&file_extent_item
);
1456 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERY_VERBOSE
)
1457 printk(KERN_INFO
"extent_data: type %u, disk_bytenr = %llu,"
1458 " offset = %llu, num_bytes = %llu\n",
1459 file_extent_item
.type
,
1460 btrfs_stack_file_extent_disk_bytenr(&file_extent_item
),
1461 btrfs_stack_file_extent_offset(&file_extent_item
),
1463 while (num_bytes
> 0) {
1468 if (num_bytes
> state
->datablock_size
)
1469 chunk_len
= state
->datablock_size
;
1471 chunk_len
= num_bytes
;
1474 btrfs_num_copies(state
->root
->fs_info
,
1475 next_bytenr
, state
->datablock_size
);
1476 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
1477 printk(KERN_INFO
"num_copies(log_bytenr=%llu) = %d\n",
1478 next_bytenr
, num_copies
);
1479 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
1480 struct btrfsic_block_data_ctx next_block_ctx
;
1481 struct btrfsic_block
*next_block
;
1482 int block_was_created
;
1484 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1485 printk(KERN_INFO
"btrfsic_handle_extent_data("
1486 "mirror_num=%d)\n", mirror_num
);
1487 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERY_VERBOSE
)
1489 "\tdisk_bytenr = %llu, num_bytes %u\n",
1490 next_bytenr
, chunk_len
);
1491 ret
= btrfsic_map_block(state
, next_bytenr
,
1492 chunk_len
, &next_block_ctx
,
1496 "btrfsic: btrfsic_map_block(@%llu,"
1497 " mirror=%d) failed!\n",
1498 next_bytenr
, mirror_num
);
1502 next_block
= btrfsic_block_lookup_or_add(
1510 &block_was_created
);
1511 if (NULL
== next_block
) {
1513 "btrfsic: error, kmalloc failed!\n");
1514 btrfsic_release_block_ctx(&next_block_ctx
);
1517 if (!block_was_created
) {
1518 if (next_block
->logical_bytenr
!= next_bytenr
&&
1519 !(!next_block
->is_metadata
&&
1520 0 == next_block
->logical_bytenr
)) {
1523 " @%llu (%s/%llu/%d)"
1524 " found in hash table, D,"
1526 " (!= stored %llu).\n",
1528 next_block_ctx
.dev
->name
,
1529 next_block_ctx
.dev_bytenr
,
1531 next_block
->logical_bytenr
);
1533 next_block
->logical_bytenr
= next_bytenr
;
1534 next_block
->mirror_num
= mirror_num
;
1537 l
= btrfsic_block_link_lookup_or_add(state
,
1541 btrfsic_release_block_ctx(&next_block_ctx
);
1546 next_bytenr
+= chunk_len
;
1547 num_bytes
-= chunk_len
;
1553 static int btrfsic_map_block(struct btrfsic_state
*state
, u64 bytenr
, u32 len
,
1554 struct btrfsic_block_data_ctx
*block_ctx_out
,
1559 struct btrfs_bio
*multi
= NULL
;
1560 struct btrfs_device
*device
;
1563 ret
= btrfs_map_block(state
->root
->fs_info
, READ
,
1564 bytenr
, &length
, &multi
, mirror_num
);
1567 block_ctx_out
->start
= 0;
1568 block_ctx_out
->dev_bytenr
= 0;
1569 block_ctx_out
->len
= 0;
1570 block_ctx_out
->dev
= NULL
;
1571 block_ctx_out
->datav
= NULL
;
1572 block_ctx_out
->pagev
= NULL
;
1573 block_ctx_out
->mem_to_free
= NULL
;
1578 device
= multi
->stripes
[0].dev
;
1579 block_ctx_out
->dev
= btrfsic_dev_state_lookup(device
->bdev
);
1580 block_ctx_out
->dev_bytenr
= multi
->stripes
[0].physical
;
1581 block_ctx_out
->start
= bytenr
;
1582 block_ctx_out
->len
= len
;
1583 block_ctx_out
->datav
= NULL
;
1584 block_ctx_out
->pagev
= NULL
;
1585 block_ctx_out
->mem_to_free
= NULL
;
1588 if (NULL
== block_ctx_out
->dev
) {
1590 printk(KERN_INFO
"btrfsic: error, cannot lookup dev (#1)!\n");
1596 static int btrfsic_map_superblock(struct btrfsic_state
*state
, u64 bytenr
,
1597 u32 len
, struct block_device
*bdev
,
1598 struct btrfsic_block_data_ctx
*block_ctx_out
)
1600 block_ctx_out
->dev
= btrfsic_dev_state_lookup(bdev
);
1601 block_ctx_out
->dev_bytenr
= bytenr
;
1602 block_ctx_out
->start
= bytenr
;
1603 block_ctx_out
->len
= len
;
1604 block_ctx_out
->datav
= NULL
;
1605 block_ctx_out
->pagev
= NULL
;
1606 block_ctx_out
->mem_to_free
= NULL
;
1607 if (NULL
!= block_ctx_out
->dev
) {
1610 printk(KERN_INFO
"btrfsic: error, cannot lookup dev (#2)!\n");
1615 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx
*block_ctx
)
1617 if (block_ctx
->mem_to_free
) {
1618 unsigned int num_pages
;
1620 BUG_ON(!block_ctx
->datav
);
1621 BUG_ON(!block_ctx
->pagev
);
1622 num_pages
= (block_ctx
->len
+ (u64
)PAGE_CACHE_SIZE
- 1) >>
1624 while (num_pages
> 0) {
1626 if (block_ctx
->datav
[num_pages
]) {
1627 kunmap(block_ctx
->pagev
[num_pages
]);
1628 block_ctx
->datav
[num_pages
] = NULL
;
1630 if (block_ctx
->pagev
[num_pages
]) {
1631 __free_page(block_ctx
->pagev
[num_pages
]);
1632 block_ctx
->pagev
[num_pages
] = NULL
;
1636 kfree(block_ctx
->mem_to_free
);
1637 block_ctx
->mem_to_free
= NULL
;
1638 block_ctx
->pagev
= NULL
;
1639 block_ctx
->datav
= NULL
;
1643 static int btrfsic_read_block(struct btrfsic_state
*state
,
1644 struct btrfsic_block_data_ctx
*block_ctx
)
1646 unsigned int num_pages
;
1651 BUG_ON(block_ctx
->datav
);
1652 BUG_ON(block_ctx
->pagev
);
1653 BUG_ON(block_ctx
->mem_to_free
);
1654 if (block_ctx
->dev_bytenr
& ((u64
)PAGE_CACHE_SIZE
- 1)) {
1656 "btrfsic: read_block() with unaligned bytenr %llu\n",
1657 block_ctx
->dev_bytenr
);
1661 num_pages
= (block_ctx
->len
+ (u64
)PAGE_CACHE_SIZE
- 1) >>
1663 block_ctx
->mem_to_free
= kzalloc((sizeof(*block_ctx
->datav
) +
1664 sizeof(*block_ctx
->pagev
)) *
1665 num_pages
, GFP_NOFS
);
1666 if (!block_ctx
->mem_to_free
)
1668 block_ctx
->datav
= block_ctx
->mem_to_free
;
1669 block_ctx
->pagev
= (struct page
**)(block_ctx
->datav
+ num_pages
);
1670 for (i
= 0; i
< num_pages
; i
++) {
1671 block_ctx
->pagev
[i
] = alloc_page(GFP_NOFS
);
1672 if (!block_ctx
->pagev
[i
])
1676 dev_bytenr
= block_ctx
->dev_bytenr
;
1677 for (i
= 0; i
< num_pages
;) {
1680 DECLARE_COMPLETION_ONSTACK(complete
);
1682 bio
= btrfs_io_bio_alloc(GFP_NOFS
, num_pages
- i
);
1685 "btrfsic: bio_alloc() for %u pages failed!\n",
1689 bio
->bi_bdev
= block_ctx
->dev
->bdev
;
1690 bio
->bi_sector
= dev_bytenr
>> 9;
1691 bio
->bi_end_io
= btrfsic_complete_bio_end_io
;
1692 bio
->bi_private
= &complete
;
1694 for (j
= i
; j
< num_pages
; j
++) {
1695 ret
= bio_add_page(bio
, block_ctx
->pagev
[j
],
1696 PAGE_CACHE_SIZE
, 0);
1697 if (PAGE_CACHE_SIZE
!= ret
)
1702 "btrfsic: error, failed to add a single page!\n");
1705 submit_bio(READ
, bio
);
1707 /* this will also unplug the queue */
1708 wait_for_completion(&complete
);
1710 if (!test_bit(BIO_UPTODATE
, &bio
->bi_flags
)) {
1712 "btrfsic: read error at logical %llu dev %s!\n",
1713 block_ctx
->start
, block_ctx
->dev
->name
);
1718 dev_bytenr
+= (j
- i
) * PAGE_CACHE_SIZE
;
1721 for (i
= 0; i
< num_pages
; i
++) {
1722 block_ctx
->datav
[i
] = kmap(block_ctx
->pagev
[i
]);
1723 if (!block_ctx
->datav
[i
]) {
1724 printk(KERN_INFO
"btrfsic: kmap() failed (dev %s)!\n",
1725 block_ctx
->dev
->name
);
1730 return block_ctx
->len
;
1733 static void btrfsic_complete_bio_end_io(struct bio
*bio
, int err
)
1735 complete((struct completion
*)bio
->bi_private
);
1738 static void btrfsic_dump_database(struct btrfsic_state
*state
)
1740 struct list_head
*elem_all
;
1742 BUG_ON(NULL
== state
);
1744 printk(KERN_INFO
"all_blocks_list:\n");
1745 list_for_each(elem_all
, &state
->all_blocks_list
) {
1746 const struct btrfsic_block
*const b_all
=
1747 list_entry(elem_all
, struct btrfsic_block
,
1749 struct list_head
*elem_ref_to
;
1750 struct list_head
*elem_ref_from
;
1752 printk(KERN_INFO
"%c-block @%llu (%s/%llu/%d)\n",
1753 btrfsic_get_block_type(state
, b_all
),
1754 b_all
->logical_bytenr
, b_all
->dev_state
->name
,
1755 b_all
->dev_bytenr
, b_all
->mirror_num
);
1757 list_for_each(elem_ref_to
, &b_all
->ref_to_list
) {
1758 const struct btrfsic_block_link
*const l
=
1759 list_entry(elem_ref_to
,
1760 struct btrfsic_block_link
,
1763 printk(KERN_INFO
" %c @%llu (%s/%llu/%d)"
1765 " %c @%llu (%s/%llu/%d)\n",
1766 btrfsic_get_block_type(state
, b_all
),
1767 b_all
->logical_bytenr
, b_all
->dev_state
->name
,
1768 b_all
->dev_bytenr
, b_all
->mirror_num
,
1770 btrfsic_get_block_type(state
, l
->block_ref_to
),
1771 l
->block_ref_to
->logical_bytenr
,
1772 l
->block_ref_to
->dev_state
->name
,
1773 l
->block_ref_to
->dev_bytenr
,
1774 l
->block_ref_to
->mirror_num
);
1777 list_for_each(elem_ref_from
, &b_all
->ref_from_list
) {
1778 const struct btrfsic_block_link
*const l
=
1779 list_entry(elem_ref_from
,
1780 struct btrfsic_block_link
,
1783 printk(KERN_INFO
" %c @%llu (%s/%llu/%d)"
1785 " %c @%llu (%s/%llu/%d)\n",
1786 btrfsic_get_block_type(state
, b_all
),
1787 b_all
->logical_bytenr
, b_all
->dev_state
->name
,
1788 b_all
->dev_bytenr
, b_all
->mirror_num
,
1790 btrfsic_get_block_type(state
, l
->block_ref_from
),
1791 l
->block_ref_from
->logical_bytenr
,
1792 l
->block_ref_from
->dev_state
->name
,
1793 l
->block_ref_from
->dev_bytenr
,
1794 l
->block_ref_from
->mirror_num
);
1797 printk(KERN_INFO
"\n");
1802 * Test whether the disk block contains a tree block (leaf or node)
1803 * (note that this test fails for the super block)
1805 static int btrfsic_test_for_metadata(struct btrfsic_state
*state
,
1806 char **datav
, unsigned int num_pages
)
1808 struct btrfs_header
*h
;
1809 u8 csum
[BTRFS_CSUM_SIZE
];
1813 if (num_pages
* PAGE_CACHE_SIZE
< state
->metablock_size
)
1814 return 1; /* not metadata */
1815 num_pages
= state
->metablock_size
>> PAGE_CACHE_SHIFT
;
1816 h
= (struct btrfs_header
*)datav
[0];
1818 if (memcmp(h
->fsid
, state
->root
->fs_info
->fsid
, BTRFS_UUID_SIZE
))
1821 for (i
= 0; i
< num_pages
; i
++) {
1822 u8
*data
= i
? datav
[i
] : (datav
[i
] + BTRFS_CSUM_SIZE
);
1823 size_t sublen
= i
? PAGE_CACHE_SIZE
:
1824 (PAGE_CACHE_SIZE
- BTRFS_CSUM_SIZE
);
1826 crc
= crc32c(crc
, data
, sublen
);
1828 btrfs_csum_final(crc
, csum
);
1829 if (memcmp(csum
, h
->csum
, state
->csum_size
))
1832 return 0; /* is metadata */
1835 static void btrfsic_process_written_block(struct btrfsic_dev_state
*dev_state
,
1836 u64 dev_bytenr
, char **mapped_datav
,
1837 unsigned int num_pages
,
1838 struct bio
*bio
, int *bio_is_patched
,
1839 struct buffer_head
*bh
,
1840 int submit_bio_bh_rw
)
1843 struct btrfsic_block
*block
;
1844 struct btrfsic_block_data_ctx block_ctx
;
1846 struct btrfsic_state
*state
= dev_state
->state
;
1847 struct block_device
*bdev
= dev_state
->bdev
;
1848 unsigned int processed_len
;
1850 if (NULL
!= bio_is_patched
)
1851 *bio_is_patched
= 0;
1858 is_metadata
= (0 == btrfsic_test_for_metadata(state
, mapped_datav
,
1861 block
= btrfsic_block_hashtable_lookup(bdev
, dev_bytenr
,
1862 &state
->block_hashtable
);
1863 if (NULL
!= block
) {
1865 struct list_head
*elem_ref_to
;
1866 struct list_head
*tmp_ref_to
;
1868 if (block
->is_superblock
) {
1869 bytenr
= btrfs_super_bytenr((struct btrfs_super_block
*)
1871 if (num_pages
* PAGE_CACHE_SIZE
<
1872 BTRFS_SUPER_INFO_SIZE
) {
1874 "btrfsic: cannot work with too short bios!\n");
1878 BUG_ON(BTRFS_SUPER_INFO_SIZE
& (PAGE_CACHE_SIZE
- 1));
1879 processed_len
= BTRFS_SUPER_INFO_SIZE
;
1880 if (state
->print_mask
&
1881 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE
) {
1883 "[before new superblock is written]:\n");
1884 btrfsic_dump_tree_sub(state
, block
, 0);
1888 if (!block
->is_superblock
) {
1889 if (num_pages
* PAGE_CACHE_SIZE
<
1890 state
->metablock_size
) {
1892 "btrfsic: cannot work with too short bios!\n");
1895 processed_len
= state
->metablock_size
;
1896 bytenr
= btrfs_stack_header_bytenr(
1897 (struct btrfs_header
*)
1899 btrfsic_cmp_log_and_dev_bytenr(state
, bytenr
,
1903 if (block
->logical_bytenr
!= bytenr
) {
1905 "Written block @%llu (%s/%llu/%d)"
1906 " found in hash table, %c,"
1908 " (!= stored %llu).\n",
1909 bytenr
, dev_state
->name
, dev_bytenr
,
1911 btrfsic_get_block_type(state
, block
),
1912 block
->logical_bytenr
);
1913 block
->logical_bytenr
= bytenr
;
1914 } else if (state
->print_mask
&
1915 BTRFSIC_PRINT_MASK_VERBOSE
)
1917 "Written block @%llu (%s/%llu/%d)"
1918 " found in hash table, %c.\n",
1919 bytenr
, dev_state
->name
, dev_bytenr
,
1921 btrfsic_get_block_type(state
, block
));
1923 if (num_pages
* PAGE_CACHE_SIZE
<
1924 state
->datablock_size
) {
1926 "btrfsic: cannot work with too short bios!\n");
1929 processed_len
= state
->datablock_size
;
1930 bytenr
= block
->logical_bytenr
;
1931 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1933 "Written block @%llu (%s/%llu/%d)"
1934 " found in hash table, %c.\n",
1935 bytenr
, dev_state
->name
, dev_bytenr
,
1937 btrfsic_get_block_type(state
, block
));
1940 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1942 "ref_to_list: %cE, ref_from_list: %cE\n",
1943 list_empty(&block
->ref_to_list
) ? ' ' : '!',
1944 list_empty(&block
->ref_from_list
) ? ' ' : '!');
1945 if (btrfsic_is_block_ref_by_superblock(state
, block
, 0)) {
1946 printk(KERN_INFO
"btrfs: attempt to overwrite %c-block"
1947 " @%llu (%s/%llu/%d), old(gen=%llu,"
1948 " objectid=%llu, type=%d, offset=%llu),"
1950 " which is referenced by most recent superblock"
1951 " (superblockgen=%llu)!\n",
1952 btrfsic_get_block_type(state
, block
), bytenr
,
1953 dev_state
->name
, dev_bytenr
, block
->mirror_num
,
1955 btrfs_disk_key_objectid(&block
->disk_key
),
1956 block
->disk_key
.type
,
1957 btrfs_disk_key_offset(&block
->disk_key
),
1958 btrfs_stack_header_generation(
1959 (struct btrfs_header
*) mapped_datav
[0]),
1960 state
->max_superblock_generation
);
1961 btrfsic_dump_tree(state
);
1964 if (!block
->is_iodone
&& !block
->never_written
) {
1965 printk(KERN_INFO
"btrfs: attempt to overwrite %c-block"
1966 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
1967 " which is not yet iodone!\n",
1968 btrfsic_get_block_type(state
, block
), bytenr
,
1969 dev_state
->name
, dev_bytenr
, block
->mirror_num
,
1971 btrfs_stack_header_generation(
1972 (struct btrfs_header
*)
1974 /* it would not be safe to go on */
1975 btrfsic_dump_tree(state
);
1980 * Clear all references of this block. Do not free
1981 * the block itself even if is not referenced anymore
1982 * because it still carries valueable information
1983 * like whether it was ever written and IO completed.
1985 list_for_each_safe(elem_ref_to
, tmp_ref_to
,
1986 &block
->ref_to_list
) {
1987 struct btrfsic_block_link
*const l
=
1988 list_entry(elem_ref_to
,
1989 struct btrfsic_block_link
,
1992 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1993 btrfsic_print_rem_link(state
, l
);
1995 if (0 == l
->ref_cnt
) {
1996 list_del(&l
->node_ref_to
);
1997 list_del(&l
->node_ref_from
);
1998 btrfsic_block_link_hashtable_remove(l
);
1999 btrfsic_block_link_free(l
);
2003 if (block
->is_superblock
)
2004 ret
= btrfsic_map_superblock(state
, bytenr
,
2008 ret
= btrfsic_map_block(state
, bytenr
, processed_len
,
2012 "btrfsic: btrfsic_map_block(root @%llu)"
2013 " failed!\n", bytenr
);
2016 block_ctx
.datav
= mapped_datav
;
2017 /* the following is required in case of writes to mirrors,
2018 * use the same that was used for the lookup */
2019 block_ctx
.dev
= dev_state
;
2020 block_ctx
.dev_bytenr
= dev_bytenr
;
2022 if (is_metadata
|| state
->include_extent_data
) {
2023 block
->never_written
= 0;
2024 block
->iodone_w_error
= 0;
2026 block
->is_iodone
= 0;
2027 BUG_ON(NULL
== bio_is_patched
);
2028 if (!*bio_is_patched
) {
2029 block
->orig_bio_bh_private
=
2031 block
->orig_bio_bh_end_io
.bio
=
2033 block
->next_in_same_bio
= NULL
;
2034 bio
->bi_private
= block
;
2035 bio
->bi_end_io
= btrfsic_bio_end_io
;
2036 *bio_is_patched
= 1;
2038 struct btrfsic_block
*chained_block
=
2039 (struct btrfsic_block
*)
2042 BUG_ON(NULL
== chained_block
);
2043 block
->orig_bio_bh_private
=
2044 chained_block
->orig_bio_bh_private
;
2045 block
->orig_bio_bh_end_io
.bio
=
2046 chained_block
->orig_bio_bh_end_io
.
2048 block
->next_in_same_bio
= chained_block
;
2049 bio
->bi_private
= block
;
2051 } else if (NULL
!= bh
) {
2052 block
->is_iodone
= 0;
2053 block
->orig_bio_bh_private
= bh
->b_private
;
2054 block
->orig_bio_bh_end_io
.bh
= bh
->b_end_io
;
2055 block
->next_in_same_bio
= NULL
;
2056 bh
->b_private
= block
;
2057 bh
->b_end_io
= btrfsic_bh_end_io
;
2059 block
->is_iodone
= 1;
2060 block
->orig_bio_bh_private
= NULL
;
2061 block
->orig_bio_bh_end_io
.bio
= NULL
;
2062 block
->next_in_same_bio
= NULL
;
2066 block
->flush_gen
= dev_state
->last_flush_gen
+ 1;
2067 block
->submit_bio_bh_rw
= submit_bio_bh_rw
;
2069 block
->logical_bytenr
= bytenr
;
2070 block
->is_metadata
= 1;
2071 if (block
->is_superblock
) {
2072 BUG_ON(PAGE_CACHE_SIZE
!=
2073 BTRFS_SUPER_INFO_SIZE
);
2074 ret
= btrfsic_process_written_superblock(
2077 (struct btrfs_super_block
*)
2079 if (state
->print_mask
&
2080 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE
) {
2082 "[after new superblock is written]:\n");
2083 btrfsic_dump_tree_sub(state
, block
, 0);
2086 block
->mirror_num
= 0; /* unknown */
2087 ret
= btrfsic_process_metablock(
2095 "btrfsic: btrfsic_process_metablock"
2096 "(root @%llu) failed!\n",
2099 block
->is_metadata
= 0;
2100 block
->mirror_num
= 0; /* unknown */
2101 block
->generation
= BTRFSIC_GENERATION_UNKNOWN
;
2102 if (!state
->include_extent_data
2103 && list_empty(&block
->ref_from_list
)) {
2105 * disk block is overwritten with extent
2106 * data (not meta data) and we are configured
2107 * to not include extent data: take the
2108 * chance and free the block's memory
2110 btrfsic_block_hashtable_remove(block
);
2111 list_del(&block
->all_blocks_node
);
2112 btrfsic_block_free(block
);
2115 btrfsic_release_block_ctx(&block_ctx
);
2117 /* block has not been found in hash table */
2121 processed_len
= state
->datablock_size
;
2122 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2123 printk(KERN_INFO
"Written block (%s/%llu/?)"
2124 " !found in hash table, D.\n",
2125 dev_state
->name
, dev_bytenr
);
2126 if (!state
->include_extent_data
) {
2127 /* ignore that written D block */
2131 /* this is getting ugly for the
2132 * include_extent_data case... */
2133 bytenr
= 0; /* unknown */
2134 block_ctx
.start
= bytenr
;
2135 block_ctx
.len
= processed_len
;
2136 block_ctx
.mem_to_free
= NULL
;
2137 block_ctx
.pagev
= NULL
;
2139 processed_len
= state
->metablock_size
;
2140 bytenr
= btrfs_stack_header_bytenr(
2141 (struct btrfs_header
*)
2143 btrfsic_cmp_log_and_dev_bytenr(state
, bytenr
, dev_state
,
2145 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2147 "Written block @%llu (%s/%llu/?)"
2148 " !found in hash table, M.\n",
2149 bytenr
, dev_state
->name
, dev_bytenr
);
2151 ret
= btrfsic_map_block(state
, bytenr
, processed_len
,
2155 "btrfsic: btrfsic_map_block(root @%llu)"
2161 block_ctx
.datav
= mapped_datav
;
2162 /* the following is required in case of writes to mirrors,
2163 * use the same that was used for the lookup */
2164 block_ctx
.dev
= dev_state
;
2165 block_ctx
.dev_bytenr
= dev_bytenr
;
2167 block
= btrfsic_block_alloc();
2168 if (NULL
== block
) {
2169 printk(KERN_INFO
"btrfsic: error, kmalloc failed!\n");
2170 btrfsic_release_block_ctx(&block_ctx
);
2173 block
->dev_state
= dev_state
;
2174 block
->dev_bytenr
= dev_bytenr
;
2175 block
->logical_bytenr
= bytenr
;
2176 block
->is_metadata
= is_metadata
;
2177 block
->never_written
= 0;
2178 block
->iodone_w_error
= 0;
2179 block
->mirror_num
= 0; /* unknown */
2180 block
->flush_gen
= dev_state
->last_flush_gen
+ 1;
2181 block
->submit_bio_bh_rw
= submit_bio_bh_rw
;
2183 block
->is_iodone
= 0;
2184 BUG_ON(NULL
== bio_is_patched
);
2185 if (!*bio_is_patched
) {
2186 block
->orig_bio_bh_private
= bio
->bi_private
;
2187 block
->orig_bio_bh_end_io
.bio
= bio
->bi_end_io
;
2188 block
->next_in_same_bio
= NULL
;
2189 bio
->bi_private
= block
;
2190 bio
->bi_end_io
= btrfsic_bio_end_io
;
2191 *bio_is_patched
= 1;
2193 struct btrfsic_block
*chained_block
=
2194 (struct btrfsic_block
*)
2197 BUG_ON(NULL
== chained_block
);
2198 block
->orig_bio_bh_private
=
2199 chained_block
->orig_bio_bh_private
;
2200 block
->orig_bio_bh_end_io
.bio
=
2201 chained_block
->orig_bio_bh_end_io
.bio
;
2202 block
->next_in_same_bio
= chained_block
;
2203 bio
->bi_private
= block
;
2205 } else if (NULL
!= bh
) {
2206 block
->is_iodone
= 0;
2207 block
->orig_bio_bh_private
= bh
->b_private
;
2208 block
->orig_bio_bh_end_io
.bh
= bh
->b_end_io
;
2209 block
->next_in_same_bio
= NULL
;
2210 bh
->b_private
= block
;
2211 bh
->b_end_io
= btrfsic_bh_end_io
;
2213 block
->is_iodone
= 1;
2214 block
->orig_bio_bh_private
= NULL
;
2215 block
->orig_bio_bh_end_io
.bio
= NULL
;
2216 block
->next_in_same_bio
= NULL
;
2218 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2220 "New written %c-block @%llu (%s/%llu/%d)\n",
2221 is_metadata
? 'M' : 'D',
2222 block
->logical_bytenr
, block
->dev_state
->name
,
2223 block
->dev_bytenr
, block
->mirror_num
);
2224 list_add(&block
->all_blocks_node
, &state
->all_blocks_list
);
2225 btrfsic_block_hashtable_add(block
, &state
->block_hashtable
);
2228 ret
= btrfsic_process_metablock(state
, block
,
2232 "btrfsic: process_metablock(root @%llu)"
2236 btrfsic_release_block_ctx(&block_ctx
);
2240 BUG_ON(!processed_len
);
2241 dev_bytenr
+= processed_len
;
2242 mapped_datav
+= processed_len
>> PAGE_CACHE_SHIFT
;
2243 num_pages
-= processed_len
>> PAGE_CACHE_SHIFT
;
2247 static void btrfsic_bio_end_io(struct bio
*bp
, int bio_error_status
)
2249 struct btrfsic_block
*block
= (struct btrfsic_block
*)bp
->bi_private
;
2252 /* mutex is not held! This is not save if IO is not yet completed
2255 if (bio_error_status
)
2258 BUG_ON(NULL
== block
);
2259 bp
->bi_private
= block
->orig_bio_bh_private
;
2260 bp
->bi_end_io
= block
->orig_bio_bh_end_io
.bio
;
2263 struct btrfsic_block
*next_block
;
2264 struct btrfsic_dev_state
*const dev_state
= block
->dev_state
;
2266 if ((dev_state
->state
->print_mask
&
2267 BTRFSIC_PRINT_MASK_END_IO_BIO_BH
))
2269 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2271 btrfsic_get_block_type(dev_state
->state
, block
),
2272 block
->logical_bytenr
, dev_state
->name
,
2273 block
->dev_bytenr
, block
->mirror_num
);
2274 next_block
= block
->next_in_same_bio
;
2275 block
->iodone_w_error
= iodone_w_error
;
2276 if (block
->submit_bio_bh_rw
& REQ_FLUSH
) {
2277 dev_state
->last_flush_gen
++;
2278 if ((dev_state
->state
->print_mask
&
2279 BTRFSIC_PRINT_MASK_END_IO_BIO_BH
))
2281 "bio_end_io() new %s flush_gen=%llu\n",
2283 dev_state
->last_flush_gen
);
2285 if (block
->submit_bio_bh_rw
& REQ_FUA
)
2286 block
->flush_gen
= 0; /* FUA completed means block is
2288 block
->is_iodone
= 1; /* for FLUSH, this releases the block */
2290 } while (NULL
!= block
);
2292 bp
->bi_end_io(bp
, bio_error_status
);
2295 static void btrfsic_bh_end_io(struct buffer_head
*bh
, int uptodate
)
2297 struct btrfsic_block
*block
= (struct btrfsic_block
*)bh
->b_private
;
2298 int iodone_w_error
= !uptodate
;
2299 struct btrfsic_dev_state
*dev_state
;
2301 BUG_ON(NULL
== block
);
2302 dev_state
= block
->dev_state
;
2303 if ((dev_state
->state
->print_mask
& BTRFSIC_PRINT_MASK_END_IO_BIO_BH
))
2305 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2307 btrfsic_get_block_type(dev_state
->state
, block
),
2308 block
->logical_bytenr
, block
->dev_state
->name
,
2309 block
->dev_bytenr
, block
->mirror_num
);
2311 block
->iodone_w_error
= iodone_w_error
;
2312 if (block
->submit_bio_bh_rw
& REQ_FLUSH
) {
2313 dev_state
->last_flush_gen
++;
2314 if ((dev_state
->state
->print_mask
&
2315 BTRFSIC_PRINT_MASK_END_IO_BIO_BH
))
2317 "bh_end_io() new %s flush_gen=%llu\n",
2318 dev_state
->name
, dev_state
->last_flush_gen
);
2320 if (block
->submit_bio_bh_rw
& REQ_FUA
)
2321 block
->flush_gen
= 0; /* FUA completed means block is on disk */
2323 bh
->b_private
= block
->orig_bio_bh_private
;
2324 bh
->b_end_io
= block
->orig_bio_bh_end_io
.bh
;
2325 block
->is_iodone
= 1; /* for FLUSH, this releases the block */
2326 bh
->b_end_io(bh
, uptodate
);
2329 static int btrfsic_process_written_superblock(
2330 struct btrfsic_state
*state
,
2331 struct btrfsic_block
*const superblock
,
2332 struct btrfs_super_block
*const super_hdr
)
2336 superblock
->generation
= btrfs_super_generation(super_hdr
);
2337 if (!(superblock
->generation
> state
->max_superblock_generation
||
2338 0 == state
->max_superblock_generation
)) {
2339 if (state
->print_mask
& BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE
)
2341 "btrfsic: superblock @%llu (%s/%llu/%d)"
2342 " with old gen %llu <= %llu\n",
2343 superblock
->logical_bytenr
,
2344 superblock
->dev_state
->name
,
2345 superblock
->dev_bytenr
, superblock
->mirror_num
,
2346 btrfs_super_generation(super_hdr
),
2347 state
->max_superblock_generation
);
2349 if (state
->print_mask
& BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE
)
2351 "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2352 " with new gen %llu > %llu\n",
2353 superblock
->logical_bytenr
,
2354 superblock
->dev_state
->name
,
2355 superblock
->dev_bytenr
, superblock
->mirror_num
,
2356 btrfs_super_generation(super_hdr
),
2357 state
->max_superblock_generation
);
2359 state
->max_superblock_generation
=
2360 btrfs_super_generation(super_hdr
);
2361 state
->latest_superblock
= superblock
;
2364 for (pass
= 0; pass
< 3; pass
++) {
2367 struct btrfsic_block
*next_block
;
2368 struct btrfsic_block_data_ctx tmp_next_block_ctx
;
2369 struct btrfsic_block_link
*l
;
2372 const char *additional_string
= NULL
;
2373 struct btrfs_disk_key tmp_disk_key
= {0};
2375 btrfs_set_disk_key_objectid(&tmp_disk_key
,
2376 BTRFS_ROOT_ITEM_KEY
);
2377 btrfs_set_disk_key_objectid(&tmp_disk_key
, 0);
2381 btrfs_set_disk_key_objectid(&tmp_disk_key
,
2382 BTRFS_ROOT_TREE_OBJECTID
);
2383 additional_string
= "root ";
2384 next_bytenr
= btrfs_super_root(super_hdr
);
2385 if (state
->print_mask
&
2386 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
2387 printk(KERN_INFO
"root@%llu\n", next_bytenr
);
2390 btrfs_set_disk_key_objectid(&tmp_disk_key
,
2391 BTRFS_CHUNK_TREE_OBJECTID
);
2392 additional_string
= "chunk ";
2393 next_bytenr
= btrfs_super_chunk_root(super_hdr
);
2394 if (state
->print_mask
&
2395 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
2396 printk(KERN_INFO
"chunk@%llu\n", next_bytenr
);
2399 btrfs_set_disk_key_objectid(&tmp_disk_key
,
2400 BTRFS_TREE_LOG_OBJECTID
);
2401 additional_string
= "log ";
2402 next_bytenr
= btrfs_super_log_root(super_hdr
);
2403 if (0 == next_bytenr
)
2405 if (state
->print_mask
&
2406 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
2407 printk(KERN_INFO
"log@%llu\n", next_bytenr
);
2412 btrfs_num_copies(state
->root
->fs_info
,
2413 next_bytenr
, BTRFS_SUPER_INFO_SIZE
);
2414 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
2415 printk(KERN_INFO
"num_copies(log_bytenr=%llu) = %d\n",
2416 next_bytenr
, num_copies
);
2417 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
2420 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2422 "btrfsic_process_written_superblock("
2423 "mirror_num=%d)\n", mirror_num
);
2424 ret
= btrfsic_map_block(state
, next_bytenr
,
2425 BTRFS_SUPER_INFO_SIZE
,
2426 &tmp_next_block_ctx
,
2430 "btrfsic: btrfsic_map_block(@%llu,"
2431 " mirror=%d) failed!\n",
2432 next_bytenr
, mirror_num
);
2436 next_block
= btrfsic_block_lookup_or_add(
2438 &tmp_next_block_ctx
,
2443 if (NULL
== next_block
) {
2445 "btrfsic: error, kmalloc failed!\n");
2446 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
2450 next_block
->disk_key
= tmp_disk_key
;
2452 next_block
->generation
=
2453 BTRFSIC_GENERATION_UNKNOWN
;
2454 l
= btrfsic_block_link_lookup_or_add(
2456 &tmp_next_block_ctx
,
2459 BTRFSIC_GENERATION_UNKNOWN
);
2460 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
2466 if (-1 == btrfsic_check_all_ref_blocks(state
, superblock
, 0)) {
2468 btrfsic_dump_tree(state
);
2474 static int btrfsic_check_all_ref_blocks(struct btrfsic_state
*state
,
2475 struct btrfsic_block
*const block
,
2476 int recursion_level
)
2478 struct list_head
*elem_ref_to
;
2481 if (recursion_level
>= 3 + BTRFS_MAX_LEVEL
) {
2483 * Note that this situation can happen and does not
2484 * indicate an error in regular cases. It happens
2485 * when disk blocks are freed and later reused.
2486 * The check-integrity module is not aware of any
2487 * block free operations, it just recognizes block
2488 * write operations. Therefore it keeps the linkage
2489 * information for a block until a block is
2490 * rewritten. This can temporarily cause incorrect
2491 * and even circular linkage informations. This
2492 * causes no harm unless such blocks are referenced
2493 * by the most recent super block.
2495 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2497 "btrfsic: abort cyclic linkage (case 1).\n");
2503 * This algorithm is recursive because the amount of used stack
2504 * space is very small and the max recursion depth is limited.
2506 list_for_each(elem_ref_to
, &block
->ref_to_list
) {
2507 const struct btrfsic_block_link
*const l
=
2508 list_entry(elem_ref_to
, struct btrfsic_block_link
,
2511 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2513 "rl=%d, %c @%llu (%s/%llu/%d)"
2514 " %u* refers to %c @%llu (%s/%llu/%d)\n",
2516 btrfsic_get_block_type(state
, block
),
2517 block
->logical_bytenr
, block
->dev_state
->name
,
2518 block
->dev_bytenr
, block
->mirror_num
,
2520 btrfsic_get_block_type(state
, l
->block_ref_to
),
2521 l
->block_ref_to
->logical_bytenr
,
2522 l
->block_ref_to
->dev_state
->name
,
2523 l
->block_ref_to
->dev_bytenr
,
2524 l
->block_ref_to
->mirror_num
);
2525 if (l
->block_ref_to
->never_written
) {
2526 printk(KERN_INFO
"btrfs: attempt to write superblock"
2527 " which references block %c @%llu (%s/%llu/%d)"
2528 " which is never written!\n",
2529 btrfsic_get_block_type(state
, l
->block_ref_to
),
2530 l
->block_ref_to
->logical_bytenr
,
2531 l
->block_ref_to
->dev_state
->name
,
2532 l
->block_ref_to
->dev_bytenr
,
2533 l
->block_ref_to
->mirror_num
);
2535 } else if (!l
->block_ref_to
->is_iodone
) {
2536 printk(KERN_INFO
"btrfs: attempt to write superblock"
2537 " which references block %c @%llu (%s/%llu/%d)"
2538 " which is not yet iodone!\n",
2539 btrfsic_get_block_type(state
, l
->block_ref_to
),
2540 l
->block_ref_to
->logical_bytenr
,
2541 l
->block_ref_to
->dev_state
->name
,
2542 l
->block_ref_to
->dev_bytenr
,
2543 l
->block_ref_to
->mirror_num
);
2545 } else if (l
->block_ref_to
->iodone_w_error
) {
2546 printk(KERN_INFO
"btrfs: attempt to write superblock"
2547 " which references block %c @%llu (%s/%llu/%d)"
2548 " which has write error!\n",
2549 btrfsic_get_block_type(state
, l
->block_ref_to
),
2550 l
->block_ref_to
->logical_bytenr
,
2551 l
->block_ref_to
->dev_state
->name
,
2552 l
->block_ref_to
->dev_bytenr
,
2553 l
->block_ref_to
->mirror_num
);
2555 } else if (l
->parent_generation
!=
2556 l
->block_ref_to
->generation
&&
2557 BTRFSIC_GENERATION_UNKNOWN
!=
2558 l
->parent_generation
&&
2559 BTRFSIC_GENERATION_UNKNOWN
!=
2560 l
->block_ref_to
->generation
) {
2561 printk(KERN_INFO
"btrfs: attempt to write superblock"
2562 " which references block %c @%llu (%s/%llu/%d)"
2563 " with generation %llu !="
2564 " parent generation %llu!\n",
2565 btrfsic_get_block_type(state
, l
->block_ref_to
),
2566 l
->block_ref_to
->logical_bytenr
,
2567 l
->block_ref_to
->dev_state
->name
,
2568 l
->block_ref_to
->dev_bytenr
,
2569 l
->block_ref_to
->mirror_num
,
2570 l
->block_ref_to
->generation
,
2571 l
->parent_generation
);
2573 } else if (l
->block_ref_to
->flush_gen
>
2574 l
->block_ref_to
->dev_state
->last_flush_gen
) {
2575 printk(KERN_INFO
"btrfs: attempt to write superblock"
2576 " which references block %c @%llu (%s/%llu/%d)"
2577 " which is not flushed out of disk's write cache"
2578 " (block flush_gen=%llu,"
2579 " dev->flush_gen=%llu)!\n",
2580 btrfsic_get_block_type(state
, l
->block_ref_to
),
2581 l
->block_ref_to
->logical_bytenr
,
2582 l
->block_ref_to
->dev_state
->name
,
2583 l
->block_ref_to
->dev_bytenr
,
2584 l
->block_ref_to
->mirror_num
, block
->flush_gen
,
2585 l
->block_ref_to
->dev_state
->last_flush_gen
);
2587 } else if (-1 == btrfsic_check_all_ref_blocks(state
,
2598 static int btrfsic_is_block_ref_by_superblock(
2599 const struct btrfsic_state
*state
,
2600 const struct btrfsic_block
*block
,
2601 int recursion_level
)
2603 struct list_head
*elem_ref_from
;
2605 if (recursion_level
>= 3 + BTRFS_MAX_LEVEL
) {
2606 /* refer to comment at "abort cyclic linkage (case 1)" */
2607 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2609 "btrfsic: abort cyclic linkage (case 2).\n");
2615 * This algorithm is recursive because the amount of used stack space
2616 * is very small and the max recursion depth is limited.
2618 list_for_each(elem_ref_from
, &block
->ref_from_list
) {
2619 const struct btrfsic_block_link
*const l
=
2620 list_entry(elem_ref_from
, struct btrfsic_block_link
,
2623 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2625 "rl=%d, %c @%llu (%s/%llu/%d)"
2626 " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2628 btrfsic_get_block_type(state
, block
),
2629 block
->logical_bytenr
, block
->dev_state
->name
,
2630 block
->dev_bytenr
, block
->mirror_num
,
2632 btrfsic_get_block_type(state
, l
->block_ref_from
),
2633 l
->block_ref_from
->logical_bytenr
,
2634 l
->block_ref_from
->dev_state
->name
,
2635 l
->block_ref_from
->dev_bytenr
,
2636 l
->block_ref_from
->mirror_num
);
2637 if (l
->block_ref_from
->is_superblock
&&
2638 state
->latest_superblock
->dev_bytenr
==
2639 l
->block_ref_from
->dev_bytenr
&&
2640 state
->latest_superblock
->dev_state
->bdev
==
2641 l
->block_ref_from
->dev_state
->bdev
)
2643 else if (btrfsic_is_block_ref_by_superblock(state
,
2653 static void btrfsic_print_add_link(const struct btrfsic_state
*state
,
2654 const struct btrfsic_block_link
*l
)
2657 "Add %u* link from %c @%llu (%s/%llu/%d)"
2658 " to %c @%llu (%s/%llu/%d).\n",
2660 btrfsic_get_block_type(state
, l
->block_ref_from
),
2661 l
->block_ref_from
->logical_bytenr
,
2662 l
->block_ref_from
->dev_state
->name
,
2663 l
->block_ref_from
->dev_bytenr
, l
->block_ref_from
->mirror_num
,
2664 btrfsic_get_block_type(state
, l
->block_ref_to
),
2665 l
->block_ref_to
->logical_bytenr
,
2666 l
->block_ref_to
->dev_state
->name
, l
->block_ref_to
->dev_bytenr
,
2667 l
->block_ref_to
->mirror_num
);
2670 static void btrfsic_print_rem_link(const struct btrfsic_state
*state
,
2671 const struct btrfsic_block_link
*l
)
2674 "Rem %u* link from %c @%llu (%s/%llu/%d)"
2675 " to %c @%llu (%s/%llu/%d).\n",
2677 btrfsic_get_block_type(state
, l
->block_ref_from
),
2678 l
->block_ref_from
->logical_bytenr
,
2679 l
->block_ref_from
->dev_state
->name
,
2680 l
->block_ref_from
->dev_bytenr
, l
->block_ref_from
->mirror_num
,
2681 btrfsic_get_block_type(state
, l
->block_ref_to
),
2682 l
->block_ref_to
->logical_bytenr
,
2683 l
->block_ref_to
->dev_state
->name
, l
->block_ref_to
->dev_bytenr
,
2684 l
->block_ref_to
->mirror_num
);
2687 static char btrfsic_get_block_type(const struct btrfsic_state
*state
,
2688 const struct btrfsic_block
*block
)
2690 if (block
->is_superblock
&&
2691 state
->latest_superblock
->dev_bytenr
== block
->dev_bytenr
&&
2692 state
->latest_superblock
->dev_state
->bdev
== block
->dev_state
->bdev
)
2694 else if (block
->is_superblock
)
2696 else if (block
->is_metadata
)
2702 static void btrfsic_dump_tree(const struct btrfsic_state
*state
)
2704 btrfsic_dump_tree_sub(state
, state
->latest_superblock
, 0);
2707 static void btrfsic_dump_tree_sub(const struct btrfsic_state
*state
,
2708 const struct btrfsic_block
*block
,
2711 struct list_head
*elem_ref_to
;
2713 static char buf
[80];
2714 int cursor_position
;
2717 * Should better fill an on-stack buffer with a complete line and
2718 * dump it at once when it is time to print a newline character.
2722 * This algorithm is recursive because the amount of used stack space
2723 * is very small and the max recursion depth is limited.
2725 indent_add
= sprintf(buf
, "%c-%llu(%s/%llu/%d)",
2726 btrfsic_get_block_type(state
, block
),
2727 block
->logical_bytenr
, block
->dev_state
->name
,
2728 block
->dev_bytenr
, block
->mirror_num
);
2729 if (indent_level
+ indent_add
> BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL
) {
2734 indent_level
+= indent_add
;
2735 if (list_empty(&block
->ref_to_list
)) {
2739 if (block
->mirror_num
> 1 &&
2740 !(state
->print_mask
& BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS
)) {
2745 cursor_position
= indent_level
;
2746 list_for_each(elem_ref_to
, &block
->ref_to_list
) {
2747 const struct btrfsic_block_link
*const l
=
2748 list_entry(elem_ref_to
, struct btrfsic_block_link
,
2751 while (cursor_position
< indent_level
) {
2756 indent_add
= sprintf(buf
, " %d*--> ", l
->ref_cnt
);
2758 indent_add
= sprintf(buf
, " --> ");
2759 if (indent_level
+ indent_add
>
2760 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL
) {
2762 cursor_position
= 0;
2768 btrfsic_dump_tree_sub(state
, l
->block_ref_to
,
2769 indent_level
+ indent_add
);
2770 cursor_position
= 0;
2774 static struct btrfsic_block_link
*btrfsic_block_link_lookup_or_add(
2775 struct btrfsic_state
*state
,
2776 struct btrfsic_block_data_ctx
*next_block_ctx
,
2777 struct btrfsic_block
*next_block
,
2778 struct btrfsic_block
*from_block
,
2779 u64 parent_generation
)
2781 struct btrfsic_block_link
*l
;
2783 l
= btrfsic_block_link_hashtable_lookup(next_block_ctx
->dev
->bdev
,
2784 next_block_ctx
->dev_bytenr
,
2785 from_block
->dev_state
->bdev
,
2786 from_block
->dev_bytenr
,
2787 &state
->block_link_hashtable
);
2789 l
= btrfsic_block_link_alloc();
2792 "btrfsic: error, kmalloc" " failed!\n");
2796 l
->block_ref_to
= next_block
;
2797 l
->block_ref_from
= from_block
;
2799 l
->parent_generation
= parent_generation
;
2801 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2802 btrfsic_print_add_link(state
, l
);
2804 list_add(&l
->node_ref_to
, &from_block
->ref_to_list
);
2805 list_add(&l
->node_ref_from
, &next_block
->ref_from_list
);
2807 btrfsic_block_link_hashtable_add(l
,
2808 &state
->block_link_hashtable
);
2811 l
->parent_generation
= parent_generation
;
2812 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2813 btrfsic_print_add_link(state
, l
);
2819 static struct btrfsic_block
*btrfsic_block_lookup_or_add(
2820 struct btrfsic_state
*state
,
2821 struct btrfsic_block_data_ctx
*block_ctx
,
2822 const char *additional_string
,
2829 struct btrfsic_block
*block
;
2831 block
= btrfsic_block_hashtable_lookup(block_ctx
->dev
->bdev
,
2832 block_ctx
->dev_bytenr
,
2833 &state
->block_hashtable
);
2834 if (NULL
== block
) {
2835 struct btrfsic_dev_state
*dev_state
;
2837 block
= btrfsic_block_alloc();
2838 if (NULL
== block
) {
2839 printk(KERN_INFO
"btrfsic: error, kmalloc failed!\n");
2842 dev_state
= btrfsic_dev_state_lookup(block_ctx
->dev
->bdev
);
2843 if (NULL
== dev_state
) {
2845 "btrfsic: error, lookup dev_state failed!\n");
2846 btrfsic_block_free(block
);
2849 block
->dev_state
= dev_state
;
2850 block
->dev_bytenr
= block_ctx
->dev_bytenr
;
2851 block
->logical_bytenr
= block_ctx
->start
;
2852 block
->is_metadata
= is_metadata
;
2853 block
->is_iodone
= is_iodone
;
2854 block
->never_written
= never_written
;
2855 block
->mirror_num
= mirror_num
;
2856 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2858 "New %s%c-block @%llu (%s/%llu/%d)\n",
2860 btrfsic_get_block_type(state
, block
),
2861 block
->logical_bytenr
, dev_state
->name
,
2862 block
->dev_bytenr
, mirror_num
);
2863 list_add(&block
->all_blocks_node
, &state
->all_blocks_list
);
2864 btrfsic_block_hashtable_add(block
, &state
->block_hashtable
);
2865 if (NULL
!= was_created
)
2868 if (NULL
!= was_created
)
2875 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state
*state
,
2877 struct btrfsic_dev_state
*dev_state
,
2883 struct btrfsic_block_data_ctx block_ctx
;
2886 num_copies
= btrfs_num_copies(state
->root
->fs_info
,
2887 bytenr
, state
->metablock_size
);
2889 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
2890 ret
= btrfsic_map_block(state
, bytenr
, state
->metablock_size
,
2891 &block_ctx
, mirror_num
);
2893 printk(KERN_INFO
"btrfsic:"
2894 " btrfsic_map_block(logical @%llu,"
2895 " mirror %d) failed!\n",
2896 bytenr
, mirror_num
);
2900 if (dev_state
->bdev
== block_ctx
.dev
->bdev
&&
2901 dev_bytenr
== block_ctx
.dev_bytenr
) {
2903 btrfsic_release_block_ctx(&block_ctx
);
2906 btrfsic_release_block_ctx(&block_ctx
);
2910 printk(KERN_INFO
"btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2911 " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2912 " phys_bytenr=%llu)!\n",
2913 bytenr
, dev_state
->name
, dev_bytenr
);
2914 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
2915 ret
= btrfsic_map_block(state
, bytenr
,
2916 state
->metablock_size
,
2917 &block_ctx
, mirror_num
);
2921 printk(KERN_INFO
"Read logical bytenr @%llu maps to"
2923 bytenr
, block_ctx
.dev
->name
,
2924 block_ctx
.dev_bytenr
, mirror_num
);
2930 static struct btrfsic_dev_state
*btrfsic_dev_state_lookup(
2931 struct block_device
*bdev
)
2933 struct btrfsic_dev_state
*ds
;
2935 ds
= btrfsic_dev_state_hashtable_lookup(bdev
,
2936 &btrfsic_dev_state_hashtable
);
2940 int btrfsic_submit_bh(int rw
, struct buffer_head
*bh
)
2942 struct btrfsic_dev_state
*dev_state
;
2944 if (!btrfsic_is_initialized
)
2945 return submit_bh(rw
, bh
);
2947 mutex_lock(&btrfsic_mutex
);
2948 /* since btrfsic_submit_bh() might also be called before
2949 * btrfsic_mount(), this might return NULL */
2950 dev_state
= btrfsic_dev_state_lookup(bh
->b_bdev
);
2952 /* Only called to write the superblock (incl. FLUSH/FUA) */
2953 if (NULL
!= dev_state
&&
2954 (rw
& WRITE
) && bh
->b_size
> 0) {
2957 dev_bytenr
= 4096 * bh
->b_blocknr
;
2958 if (dev_state
->state
->print_mask
&
2959 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
)
2961 "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu),"
2962 " size=%zu, data=%p, bdev=%p)\n",
2963 rw
, (unsigned long long)bh
->b_blocknr
,
2964 dev_bytenr
, bh
->b_size
, bh
->b_data
, bh
->b_bdev
);
2965 btrfsic_process_written_block(dev_state
, dev_bytenr
,
2966 &bh
->b_data
, 1, NULL
,
2968 } else if (NULL
!= dev_state
&& (rw
& REQ_FLUSH
)) {
2969 if (dev_state
->state
->print_mask
&
2970 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
)
2972 "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
2974 if (!dev_state
->dummy_block_for_bio_bh_flush
.is_iodone
) {
2975 if ((dev_state
->state
->print_mask
&
2976 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
|
2977 BTRFSIC_PRINT_MASK_VERBOSE
)))
2979 "btrfsic_submit_bh(%s) with FLUSH"
2980 " but dummy block already in use"
2984 struct btrfsic_block
*const block
=
2985 &dev_state
->dummy_block_for_bio_bh_flush
;
2987 block
->is_iodone
= 0;
2988 block
->never_written
= 0;
2989 block
->iodone_w_error
= 0;
2990 block
->flush_gen
= dev_state
->last_flush_gen
+ 1;
2991 block
->submit_bio_bh_rw
= rw
;
2992 block
->orig_bio_bh_private
= bh
->b_private
;
2993 block
->orig_bio_bh_end_io
.bh
= bh
->b_end_io
;
2994 block
->next_in_same_bio
= NULL
;
2995 bh
->b_private
= block
;
2996 bh
->b_end_io
= btrfsic_bh_end_io
;
2999 mutex_unlock(&btrfsic_mutex
);
3000 return submit_bh(rw
, bh
);
3003 void btrfsic_submit_bio(int rw
, struct bio
*bio
)
3005 struct btrfsic_dev_state
*dev_state
;
3007 if (!btrfsic_is_initialized
) {
3008 submit_bio(rw
, bio
);
3012 mutex_lock(&btrfsic_mutex
);
3013 /* since btrfsic_submit_bio() is also called before
3014 * btrfsic_mount(), this might return NULL */
3015 dev_state
= btrfsic_dev_state_lookup(bio
->bi_bdev
);
3016 if (NULL
!= dev_state
&&
3017 (rw
& WRITE
) && NULL
!= bio
->bi_io_vec
) {
3021 char **mapped_datav
;
3023 dev_bytenr
= 512 * bio
->bi_sector
;
3025 if (dev_state
->state
->print_mask
&
3026 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
)
3028 "submit_bio(rw=0x%x, bi_vcnt=%u,"
3029 " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
3031 (unsigned long long)bio
->bi_sector
, dev_bytenr
,
3034 mapped_datav
= kmalloc(sizeof(*mapped_datav
) * bio
->bi_vcnt
,
3038 for (i
= 0; i
< bio
->bi_vcnt
; i
++) {
3039 BUG_ON(bio
->bi_io_vec
[i
].bv_len
!= PAGE_CACHE_SIZE
);
3040 mapped_datav
[i
] = kmap(bio
->bi_io_vec
[i
].bv_page
);
3041 if (!mapped_datav
[i
]) {
3044 kunmap(bio
->bi_io_vec
[i
].bv_page
);
3046 kfree(mapped_datav
);
3049 if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
|
3050 BTRFSIC_PRINT_MASK_VERBOSE
) ==
3051 (dev_state
->state
->print_mask
&
3052 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
|
3053 BTRFSIC_PRINT_MASK_VERBOSE
)))
3055 "#%u: page=%p, len=%u, offset=%u\n",
3056 i
, bio
->bi_io_vec
[i
].bv_page
,
3057 bio
->bi_io_vec
[i
].bv_len
,
3058 bio
->bi_io_vec
[i
].bv_offset
);
3060 btrfsic_process_written_block(dev_state
, dev_bytenr
,
3061 mapped_datav
, bio
->bi_vcnt
,
3062 bio
, &bio_is_patched
,
3066 kunmap(bio
->bi_io_vec
[i
].bv_page
);
3068 kfree(mapped_datav
);
3069 } else if (NULL
!= dev_state
&& (rw
& REQ_FLUSH
)) {
3070 if (dev_state
->state
->print_mask
&
3071 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
)
3073 "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
3075 if (!dev_state
->dummy_block_for_bio_bh_flush
.is_iodone
) {
3076 if ((dev_state
->state
->print_mask
&
3077 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
|
3078 BTRFSIC_PRINT_MASK_VERBOSE
)))
3080 "btrfsic_submit_bio(%s) with FLUSH"
3081 " but dummy block already in use"
3085 struct btrfsic_block
*const block
=
3086 &dev_state
->dummy_block_for_bio_bh_flush
;
3088 block
->is_iodone
= 0;
3089 block
->never_written
= 0;
3090 block
->iodone_w_error
= 0;
3091 block
->flush_gen
= dev_state
->last_flush_gen
+ 1;
3092 block
->submit_bio_bh_rw
= rw
;
3093 block
->orig_bio_bh_private
= bio
->bi_private
;
3094 block
->orig_bio_bh_end_io
.bio
= bio
->bi_end_io
;
3095 block
->next_in_same_bio
= NULL
;
3096 bio
->bi_private
= block
;
3097 bio
->bi_end_io
= btrfsic_bio_end_io
;
3101 mutex_unlock(&btrfsic_mutex
);
3103 submit_bio(rw
, bio
);
3106 int btrfsic_mount(struct btrfs_root
*root
,
3107 struct btrfs_fs_devices
*fs_devices
,
3108 int including_extent_data
, u32 print_mask
)
3111 struct btrfsic_state
*state
;
3112 struct list_head
*dev_head
= &fs_devices
->devices
;
3113 struct btrfs_device
*device
;
3115 if (root
->nodesize
!= root
->leafsize
) {
3117 "btrfsic: cannot handle nodesize %d != leafsize %d!\n",
3118 root
->nodesize
, root
->leafsize
);
3121 if (root
->nodesize
& ((u64
)PAGE_CACHE_SIZE
- 1)) {
3123 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3124 root
->nodesize
, PAGE_CACHE_SIZE
);
3127 if (root
->leafsize
& ((u64
)PAGE_CACHE_SIZE
- 1)) {
3129 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3130 root
->leafsize
, PAGE_CACHE_SIZE
);
3133 if (root
->sectorsize
& ((u64
)PAGE_CACHE_SIZE
- 1)) {
3135 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3136 root
->sectorsize
, PAGE_CACHE_SIZE
);
3139 state
= kzalloc(sizeof(*state
), GFP_NOFS
);
3140 if (NULL
== state
) {
3141 printk(KERN_INFO
"btrfs check-integrity: kmalloc() failed!\n");
3145 if (!btrfsic_is_initialized
) {
3146 mutex_init(&btrfsic_mutex
);
3147 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable
);
3148 btrfsic_is_initialized
= 1;
3150 mutex_lock(&btrfsic_mutex
);
3152 state
->print_mask
= print_mask
;
3153 state
->include_extent_data
= including_extent_data
;
3154 state
->csum_size
= 0;
3155 state
->metablock_size
= root
->nodesize
;
3156 state
->datablock_size
= root
->sectorsize
;
3157 INIT_LIST_HEAD(&state
->all_blocks_list
);
3158 btrfsic_block_hashtable_init(&state
->block_hashtable
);
3159 btrfsic_block_link_hashtable_init(&state
->block_link_hashtable
);
3160 state
->max_superblock_generation
= 0;
3161 state
->latest_superblock
= NULL
;
3163 list_for_each_entry(device
, dev_head
, dev_list
) {
3164 struct btrfsic_dev_state
*ds
;
3167 if (!device
->bdev
|| !device
->name
)
3170 ds
= btrfsic_dev_state_alloc();
3173 "btrfs check-integrity: kmalloc() failed!\n");
3174 mutex_unlock(&btrfsic_mutex
);
3177 ds
->bdev
= device
->bdev
;
3179 bdevname(ds
->bdev
, ds
->name
);
3180 ds
->name
[BDEVNAME_SIZE
- 1] = '\0';
3181 for (p
= ds
->name
; *p
!= '\0'; p
++);
3182 while (p
> ds
->name
&& *p
!= '/')
3186 strlcpy(ds
->name
, p
, sizeof(ds
->name
));
3187 btrfsic_dev_state_hashtable_add(ds
,
3188 &btrfsic_dev_state_hashtable
);
3191 ret
= btrfsic_process_superblock(state
, fs_devices
);
3193 mutex_unlock(&btrfsic_mutex
);
3194 btrfsic_unmount(root
, fs_devices
);
3198 if (state
->print_mask
& BTRFSIC_PRINT_MASK_INITIAL_DATABASE
)
3199 btrfsic_dump_database(state
);
3200 if (state
->print_mask
& BTRFSIC_PRINT_MASK_INITIAL_TREE
)
3201 btrfsic_dump_tree(state
);
3203 mutex_unlock(&btrfsic_mutex
);
3207 void btrfsic_unmount(struct btrfs_root
*root
,
3208 struct btrfs_fs_devices
*fs_devices
)
3210 struct list_head
*elem_all
;
3211 struct list_head
*tmp_all
;
3212 struct btrfsic_state
*state
;
3213 struct list_head
*dev_head
= &fs_devices
->devices
;
3214 struct btrfs_device
*device
;
3216 if (!btrfsic_is_initialized
)
3219 mutex_lock(&btrfsic_mutex
);
3222 list_for_each_entry(device
, dev_head
, dev_list
) {
3223 struct btrfsic_dev_state
*ds
;
3225 if (!device
->bdev
|| !device
->name
)
3228 ds
= btrfsic_dev_state_hashtable_lookup(
3230 &btrfsic_dev_state_hashtable
);
3233 btrfsic_dev_state_hashtable_remove(ds
);
3234 btrfsic_dev_state_free(ds
);
3238 if (NULL
== state
) {
3240 "btrfsic: error, cannot find state information"
3242 mutex_unlock(&btrfsic_mutex
);
3247 * Don't care about keeping the lists' state up to date,
3248 * just free all memory that was allocated dynamically.
3249 * Free the blocks and the block_links.
3251 list_for_each_safe(elem_all
, tmp_all
, &state
->all_blocks_list
) {
3252 struct btrfsic_block
*const b_all
=
3253 list_entry(elem_all
, struct btrfsic_block
,
3255 struct list_head
*elem_ref_to
;
3256 struct list_head
*tmp_ref_to
;
3258 list_for_each_safe(elem_ref_to
, tmp_ref_to
,
3259 &b_all
->ref_to_list
) {
3260 struct btrfsic_block_link
*const l
=
3261 list_entry(elem_ref_to
,
3262 struct btrfsic_block_link
,
3265 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
3266 btrfsic_print_rem_link(state
, l
);
3269 if (0 == l
->ref_cnt
)
3270 btrfsic_block_link_free(l
);
3273 if (b_all
->is_iodone
|| b_all
->never_written
)
3274 btrfsic_block_free(b_all
);
3276 printk(KERN_INFO
"btrfs: attempt to free %c-block"
3277 " @%llu (%s/%llu/%d) on umount which is"
3278 " not yet iodone!\n",
3279 btrfsic_get_block_type(state
, b_all
),
3280 b_all
->logical_bytenr
, b_all
->dev_state
->name
,
3281 b_all
->dev_bytenr
, b_all
->mirror_num
);
3284 mutex_unlock(&btrfsic_mutex
);