2 * btrfs.c -- readonly btrfs support for syslinux
3 * Some data structures are derivated from btrfs-tools-0.19 ctree.h
4 * Copyright 2009 Intel Corporation; author: alek.du@intel.com
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
9 * Boston MA 02111-1307, USA; either version 2 of the License, or
10 * (at your option) any later version; incorporated herein by reference.
23 /* compare function used for bin_search */
24 typedef int (*cmp_func
)(void *ptr1
, void *ptr2
);
26 /* simple but useful bin search, used for chunk search and btree search */
27 static int bin_search(void *ptr
, int item_size
, void *cmp_item
, cmp_func func
,
28 int min
, int max
, int *slot
)
38 mid
= (low
+ high
) / 2;
39 offset
= mid
* item_size
;
42 ret
= func(item
, cmp_item
);
57 static int cache_ready
;
58 static struct btrfs_chunk_map chunk_map
;
59 static struct btrfs_super_block sb
;
60 /* used for small chunk read for btrfs_read */
61 #define RAW_BUF_SIZE 4096
62 static u8 raw_buf
[RAW_BUF_SIZE
];
65 static int btrfs_comp_chunk_map(struct btrfs_chunk_map_item
*m1
,
66 struct btrfs_chunk_map_item
*m2
)
68 if (m1
->logical
> m2
->logical
)
70 if (m1
->logical
< m2
->logical
)
75 /* insert a new chunk mapping item */
76 static void insert_map(struct btrfs_chunk_map_item
*item
)
82 if (chunk_map
.map
== NULL
) { /* first item */
83 chunk_map
.map_length
= BTRFS_MAX_CHUNK_ENTRIES
;
84 chunk_map
.map
= (struct btrfs_chunk_map_item
*)
85 malloc(chunk_map
.map_length
* sizeof(*chunk_map
.map
));
86 chunk_map
.map
[0] = *item
;
87 chunk_map
.cur_length
= 1;
90 ret
= bin_search(chunk_map
.map
, sizeof(*item
), item
,
91 (cmp_func
)btrfs_comp_chunk_map
, 0,
92 chunk_map
.cur_length
, &slot
);
93 if (ret
== 0)/* already in map */
95 if (chunk_map
.cur_length
== BTRFS_MAX_CHUNK_ENTRIES
) {
96 /* should be impossible */
97 printf("too many chunk items\n");
100 for (i
= chunk_map
.cur_length
; i
> slot
; i
--)
101 chunk_map
.map
[i
] = chunk_map
.map
[i
-1];
102 chunk_map
.map
[slot
] = *item
;
103 chunk_map
.cur_length
++;
107 * from sys_chunk_array or chunk_tree, we can convert a logical address to
108 * a physical address we can not support multi device case yet
110 static u64
logical_physical(u64 logical
)
112 struct btrfs_chunk_map_item item
;
115 item
.logical
= logical
;
116 ret
= bin_search(chunk_map
.map
, sizeof(*chunk_map
.map
), &item
,
117 (cmp_func
)btrfs_comp_chunk_map
, 0,
118 chunk_map
.cur_length
, &slot
);
124 chunk_map
.map
[slot
-1].logical
+ chunk_map
.map
[slot
-1].length
)
126 return chunk_map
.map
[slot
-1].physical
+ logical
-
127 chunk_map
.map
[slot
-1].logical
;
130 /* raw read from disk, offset and count are bytes */
131 static int raw_read(struct fs_info
*fs
, char *buf
, u64 offset
, u64 count
)
133 struct disk
*disk
= fs
->fs_dev
->disk
;
134 size_t max
= RAW_BUF_SIZE
>> disk
->sector_shift
;
135 size_t off
, cnt
, done
, total
;
140 sec
= offset
>> disk
->sector_shift
;
141 off
= offset
- (sec
<< disk
->sector_shift
);
142 done
= disk
->rdwr_sectors(disk
, raw_buf
, sec
, max
, 0);
143 if (done
== 0)/* no data */
145 cnt
= (done
<< disk
->sector_shift
) - off
;
148 memcpy(buf
, raw_buf
+ off
, cnt
);
152 if (done
!= max
)/* no enough sectors */
155 return total
- count
;
158 /* cache read from disk, offset and count are bytes */
159 static int cache_read(struct fs_info
*fs
, char *buf
, u64 offset
, u64 count
)
162 size_t block_size
= fs
->fs_dev
->cache_block_size
;
163 size_t off
, cnt
, total
;
168 block
= offset
/ block_size
;
169 off
= offset
% block_size
;
170 cd
= get_cache(fs
->fs_dev
, block
);
173 cnt
= block_size
- off
;
176 memcpy(buf
, cd
+ off
, cnt
);
181 return total
- count
;
184 static int btrfs_read(struct fs_info
*fs
, char *buf
, u64 offset
, u64 count
)
187 return cache_read(fs
, buf
, offset
, count
);
188 return raw_read(fs
, buf
, offset
, count
);
191 /* btrfs has several super block mirrors, need to calculate their location */
192 static inline u64
btrfs_sb_offset(int mirror
)
194 u64 start
= 16 * 1024;
196 return start
<< (BTRFS_SUPER_MIRROR_SHIFT
* mirror
);
197 return BTRFS_SUPER_INFO_OFFSET
;
200 /* find the most recent super block */
201 static void btrfs_read_super_block(struct fs_info
*fs
)
205 u8 fsid
[BTRFS_FSID_SIZE
];
208 struct btrfs_super_block buf
;
210 /* find most recent super block */
211 for (i
= 0; i
< BTRFS_SUPER_MIRROR_MAX
; i
++) {
212 offset
= btrfs_sb_offset(i
);
213 ret
= btrfs_read(fs
, (char *)&buf
, offset
, sizeof(buf
));
214 if (ret
< sizeof(buf
))
217 if (buf
.bytenr
!= offset
||
218 strncmp((char *)(&buf
.magic
), BTRFS_MAGIC
,
223 memcpy(fsid
, buf
.fsid
, sizeof(fsid
));
224 else if (memcmp(fsid
, buf
.fsid
, sizeof(fsid
)))
227 if (buf
.generation
> transid
) {
228 memcpy(&sb
, &buf
, sizeof(sb
));
229 transid
= buf
.generation
;
234 static inline unsigned long btrfs_chunk_item_size(int num_stripes
)
236 return sizeof(struct btrfs_chunk
) +
237 sizeof(struct btrfs_stripe
) * (num_stripes
- 1);
240 static void clear_path(struct btrfs_path
*path
)
242 memset(path
, 0, sizeof(*path
));
245 static int btrfs_comp_keys(struct btrfs_disk_key
*k1
, struct btrfs_disk_key
*k2
)
247 if (k1
->objectid
> k2
->objectid
)
249 if (k1
->objectid
< k2
->objectid
)
251 if (k1
->type
> k2
->type
)
253 if (k1
->type
< k2
->type
)
255 if (k1
->offset
> k2
->offset
)
257 if (k1
->offset
< k2
->offset
)
262 /* compare keys but ignore offset, is useful to enumerate all same kind keys */
263 static int btrfs_comp_keys_type(struct btrfs_disk_key
*k1
,
264 struct btrfs_disk_key
*k2
)
266 if (k1
->objectid
> k2
->objectid
)
268 if (k1
->objectid
< k2
->objectid
)
270 if (k1
->type
> k2
->type
)
272 if (k1
->type
< k2
->type
)
277 /* seach tree directly on disk ... */
278 static int search_tree(struct fs_info
*fs
, u64 loffset
,
279 struct btrfs_disk_key
*key
, struct btrfs_path
*path
)
281 u8 buf
[BTRFS_MAX_LEAF_SIZE
];
282 struct btrfs_header
*header
= (struct btrfs_header
*)buf
;
283 struct btrfs_node
*node
= (struct btrfs_node
*)buf
;
284 struct btrfs_leaf
*leaf
= (struct btrfs_leaf
*)buf
;
288 offset
= logical_physical(loffset
);
289 btrfs_read(fs
, (char *)header
, offset
, sizeof(*header
));
290 if (header
->level
) {/*node*/
291 btrfs_read(fs
, (char *)&node
->ptrs
[0], offset
+ sizeof(*header
),
292 sb
.nodesize
- sizeof(*header
));
293 path
->itemsnr
[header
->level
] = header
->nritems
;
294 path
->offsets
[header
->level
] = loffset
;
295 ret
= bin_search(&node
->ptrs
[0], sizeof(struct btrfs_key_ptr
),
296 key
, (cmp_func
)btrfs_comp_keys
,
297 path
->slots
[header
->level
], header
->nritems
, &slot
);
298 if (ret
&& slot
> path
->slots
[header
->level
])
300 path
->slots
[header
->level
] = slot
;
301 ret
= search_tree(fs
, node
->ptrs
[slot
].blockptr
, key
, path
);
303 btrfs_read(fs
, (char *)&leaf
->items
, offset
+ sizeof(*header
),
304 sb
.leafsize
- sizeof(*header
));
305 path
->itemsnr
[header
->level
] = header
->nritems
;
306 path
->offsets
[0] = loffset
;
307 ret
= bin_search(&leaf
->items
[0], sizeof(struct btrfs_item
),
308 key
, (cmp_func
)btrfs_comp_keys
, path
->slots
[0],
309 header
->nritems
, &slot
);
310 if (ret
&& slot
> path
->slots
[header
->level
])
312 path
->slots
[0] = slot
;
313 path
->item
= leaf
->items
[slot
];
314 btrfs_read(fs
, (char *)&path
->data
,
315 offset
+ sizeof(*header
) + leaf
->items
[slot
].offset
,
316 leaf
->items
[slot
].size
);
321 /* return 0 if leaf found */
322 static int next_leaf(struct fs_info
*fs
, struct btrfs_disk_key
*key
, struct btrfs_path
*path
)
327 while (level
< BTRFS_MAX_LEVEL
) {
328 if (!path
->itemsnr
[level
]) /* no more nodes */
330 slot
= path
->slots
[level
] + 1;
331 if (slot
>= path
->itemsnr
[level
]) {
335 path
->slots
[level
] = slot
;
336 path
->slots
[level
-1] = 0; /* reset low level slots info */
337 search_tree(fs
, path
->offsets
[level
], key
, path
);
340 if (level
== BTRFS_MAX_LEVEL
)
345 /* return 0 if slot found */
346 static int next_slot(struct fs_info
*fs
, struct btrfs_disk_key
*key
, struct btrfs_path
*path
)
350 if (!path
->itemsnr
[0])
352 slot
= path
->slots
[0] + 1;
353 if (slot
>= path
->itemsnr
[0])
355 path
->slots
[0] = slot
;
356 search_tree(fs
, path
->offsets
[0], key
, path
);
361 * read chunk_array in super block
363 static void btrfs_read_sys_chunk_array(void)
365 struct btrfs_chunk_map_item item
;
366 struct btrfs_disk_key
*key
;
367 struct btrfs_chunk
*chunk
;
370 /* read chunk array in superblock */
372 while (cur
< sb
.sys_chunk_array_size
) {
373 key
= (struct btrfs_disk_key
*)(sb
.sys_chunk_array
+ cur
);
375 chunk
= (struct btrfs_chunk
*)(sb
.sys_chunk_array
+ cur
);
376 cur
+= btrfs_chunk_item_size(chunk
->num_stripes
);
377 /* insert to mapping table, ignore multi stripes */
378 item
.logical
= key
->offset
;
379 item
.length
= chunk
->length
;
380 item
.devid
= chunk
->stripe
.devid
;
381 item
.physical
= chunk
->stripe
.offset
;/*ignore other stripes */
386 /* read chunk items from chunk_tree and insert them to chunk map */
387 static void btrfs_read_chunk_tree(struct fs_info
*fs
)
389 struct btrfs_disk_key search_key
;
390 struct btrfs_chunk
*chunk
;
391 struct btrfs_chunk_map_item item
;
392 struct btrfs_path path
;
394 if (!(sb
.flags
& BTRFS_SUPER_FLAG_METADUMP
)) {
395 if (sb
.num_devices
> 1)
396 printf("warning: only support single device btrfs\n");
397 /* read chunk from chunk_tree */
398 search_key
.objectid
= BTRFS_FIRST_CHUNK_TREE_OBJECTID
;
399 search_key
.type
= BTRFS_CHUNK_ITEM_KEY
;
400 search_key
.offset
= 0;
402 search_tree(fs
, sb
.chunk_root
, &search_key
, &path
);
405 if (btrfs_comp_keys_type(&search_key
,
408 chunk
= (struct btrfs_chunk
*)(path
.data
);
409 /* insert to mapping table, ignore stripes */
410 item
.logical
= path
.item
.key
.offset
;
411 item
.length
= chunk
->length
;
412 item
.devid
= chunk
->stripe
.devid
;
413 item
.physical
= chunk
->stripe
.offset
;
415 } while (!next_slot(fs
, &search_key
, &path
));
416 if (btrfs_comp_keys_type(&search_key
, &path
.item
.key
))
418 } while (!next_leaf(fs
, &search_key
, &path
));
422 static inline u64
btrfs_name_hash(const char *name
, int len
)
424 return btrfs_crc32c((u32
)~1, name
, len
);
427 static struct inode
*btrfs_iget_by_inr(struct fs_info
*fs
, u64 inr
)
430 struct btrfs_inode_item inode_item
;
431 struct btrfs_disk_key search_key
;
432 struct btrfs_path path
;
435 /* FIXME: some BTRFS inode member are u64, while our logical inode
436 is u32, we may need change them to u64 later */
437 search_key
.objectid
= inr
;
438 search_key
.type
= BTRFS_INODE_ITEM_KEY
;
439 search_key
.offset
= 0;
441 ret
= search_tree(fs
, fs_tree
, &search_key
, &path
);
444 inode_item
= *(struct btrfs_inode_item
*)path
.data
;
445 if (!(inode
= alloc_inode(fs
, inr
, sizeof(struct btrfs_pvt_inode
))))
448 inode
->size
= inode_item
.size
;
449 inode
->mode
= IFTODT(inode_item
.mode
);
451 if (inode
->mode
== DT_REG
|| inode
->mode
== DT_LNK
) {
452 struct btrfs_file_extent_item extent_item
;
455 /* get file_extent_item */
456 search_key
.type
= BTRFS_EXTENT_DATA_KEY
;
457 search_key
.offset
= 0;
459 ret
= search_tree(fs
, fs_tree
, &search_key
, &path
);
461 return NULL
; /* impossible */
462 extent_item
= *(struct btrfs_file_extent_item
*)path
.data
;
463 if (extent_item
.type
== BTRFS_FILE_EXTENT_INLINE
)/* inline file */
464 offset
= path
.offsets
[0] + sizeof(struct btrfs_header
)
466 + offsetof(struct btrfs_file_extent_item
, disk_bytenr
);
468 offset
= extent_item
.disk_bytenr
;
469 PVT(inode
)->offset
= offset
;
474 static struct inode
*btrfs_iget_root(struct fs_info
*fs
)
476 /* BTRFS_FIRST_CHUNK_TREE_OBJECTID(256) actually is first OBJECTID for FS_TREE */
477 return btrfs_iget_by_inr(fs
, BTRFS_FIRST_CHUNK_TREE_OBJECTID
);
480 static struct inode
*btrfs_iget(const char *name
, struct inode
*parent
)
482 struct fs_info
*fs
= parent
->fs
;
483 struct btrfs_disk_key search_key
;
484 struct btrfs_path path
;
485 struct btrfs_dir_item dir_item
;
488 search_key
.objectid
= parent
->ino
;
489 search_key
.type
= BTRFS_DIR_ITEM_KEY
;
490 search_key
.offset
= btrfs_name_hash(name
, strlen(name
));
492 ret
= search_tree(fs
, fs_tree
, &search_key
, &path
);
495 dir_item
= *(struct btrfs_dir_item
*)path
.data
;
497 return btrfs_iget_by_inr(fs
, dir_item
.location
.objectid
);
500 static int btrfs_readlink(struct inode
*inode
, char *buf
)
502 btrfs_read(inode
->fs
, buf
, logical_physical(PVT(inode
)->offset
), inode
->size
);
503 buf
[inode
->size
] = '\0';
507 static int btrfs_readdir(struct file
*file
, struct dirent
*dirent
)
509 struct fs_info
*fs
= file
->fs
;
510 struct inode
*inode
= file
->inode
;
511 struct btrfs_disk_key search_key
;
512 struct btrfs_path path
;
513 struct btrfs_dir_item
*dir_item
;
517 * we use file->offset to store last search key.offset, will will search
518 * key that lower that offset, 0 means first search and we will search
519 * -1UL, which is the biggest possible key
521 search_key
.objectid
= inode
->ino
;
522 search_key
.type
= BTRFS_DIR_ITEM_KEY
;
523 search_key
.offset
= file
->offset
- 1;
525 ret
= search_tree(fs
, fs_tree
, &search_key
, &path
);
528 if (btrfs_comp_keys_type(&search_key
, &path
.item
.key
))
532 dir_item
= (struct btrfs_dir_item
*)path
.data
;
533 file
->offset
= path
.item
.key
.offset
;
534 dirent
->d_ino
= dir_item
->location
.objectid
;
535 dirent
->d_off
= file
->offset
;
536 dirent
->d_reclen
= offsetof(struct dirent
, d_name
)
537 + dir_item
->name_len
+ 1;
538 dirent
->d_type
= IFTODT(dir_item
->type
);
539 memcpy(dirent
->d_name
, dir_item
+ 1, dir_item
->name_len
);
540 dirent
->d_name
[dir_item
->name_len
] = '\0';
545 static int btrfs_next_extent(struct inode
*inode
, uint32_t lstart
)
547 struct btrfs_disk_key search_key
;
548 struct btrfs_file_extent_item extent_item
;
549 struct btrfs_path path
;
552 struct fs_info
*fs
= inode
->fs
;
553 u32 sec_shift
= SECTOR_SHIFT(fs
);
554 u32 sec_size
= SECTOR_SIZE(fs
);
556 search_key
.objectid
= inode
->ino
;
557 search_key
.type
= BTRFS_EXTENT_DATA_KEY
;
558 search_key
.offset
= lstart
<< sec_shift
;
560 ret
= search_tree(fs
, fs_tree
, &search_key
, &path
);
561 if (ret
) { /* impossible */
562 printf("btrfs: search extent data error!\n");
565 extent_item
= *(struct btrfs_file_extent_item
*)path
.data
;
567 if (extent_item
.type
== BTRFS_FILE_EXTENT_INLINE
) {/* inline file */
568 /* we fake a extent here, and PVT of inode will tell us */
569 offset
= path
.offsets
[0] + sizeof(struct btrfs_header
)
571 + offsetof(struct btrfs_file_extent_item
, disk_bytenr
);
572 inode
->next_extent
.len
=
573 (inode
->size
+ sec_size
-1) >> sec_shift
;
575 offset
= extent_item
.disk_bytenr
+ extent_item
.offset
;
576 inode
->next_extent
.len
=
577 (extent_item
.num_bytes
+ sec_size
- 1) >> sec_shift
;
579 inode
->next_extent
.pstart
=
580 logical_physical(offset
) >> sec_shift
;
581 PVT(inode
)->offset
= offset
;
585 static uint32_t btrfs_getfssec(struct file
*file
, char *buf
, int sectors
,
589 struct fs_info
*fs
= file
->fs
;
590 u32 off
= PVT(file
->inode
)->offset
% SECTOR_SIZE(fs
);
591 bool handle_inline
= false;
593 if (off
&& !file
->offset
) {/* inline file first read patch */
594 file
->inode
->size
+= off
;
595 handle_inline
= true;
597 ret
= generic_getfssec(file
, buf
, sectors
, have_more
);
600 off
= PVT(file
->inode
)->offset
% SECTOR_SIZE(fs
);
601 if (handle_inline
) {/* inline file patch */
603 memcpy(buf
, buf
+ off
, ret
);
608 static void btrfs_get_fs_tree(struct fs_info
*fs
)
610 struct btrfs_disk_key search_key
;
611 struct btrfs_path path
;
612 struct btrfs_root_item
*tree
;
613 bool subvol_ok
= false;
615 /* check if subvol is filled by installer */
617 search_key
.objectid
= BTRFS_FS_TREE_OBJECTID
;
618 search_key
.type
= BTRFS_ROOT_REF_KEY
;
619 search_key
.offset
= 0;
621 if (search_tree(fs
, sb
.root
, &search_key
, &path
))
622 next_slot(fs
, &search_key
, &path
);
625 struct btrfs_root_ref
*ref
;
627 if (btrfs_comp_keys_type(&search_key
,
630 ref
= (struct btrfs_root_ref
*)path
.data
;
631 if (!strcmp((char*)(ref
+ 1), SubvolName
)) {
635 } while (!next_slot(fs
, &search_key
, &path
));
638 if (btrfs_comp_keys_type(&search_key
, &path
.item
.key
))
640 } while (!next_leaf(fs
, &search_key
, &path
));
641 if (!subvol_ok
) /* should be impossible */
642 printf("no subvol found!\n");
644 /* find fs_tree from tree_root */
646 search_key
.objectid
= path
.item
.key
.offset
;
647 else /* "default" volume */
648 search_key
.objectid
= BTRFS_FS_TREE_OBJECTID
;
649 search_key
.type
= BTRFS_ROOT_ITEM_KEY
;
650 search_key
.offset
= -1;
652 search_tree(fs
, sb
.root
, &search_key
, &path
);
653 tree
= (struct btrfs_root_item
*)path
.data
;
654 fs_tree
= tree
->bytenr
;
657 /* init. the fs meta data, return the block size shift bits. */
658 static int btrfs_fs_init(struct fs_info
*fs
)
660 struct disk
*disk
= fs
->fs_dev
->disk
;
664 fs
->sector_shift
= disk
->sector_shift
;
665 fs
->sector_size
= 1 << fs
->sector_shift
;
666 fs
->block_shift
= BTRFS_BLOCK_SHIFT
;
667 fs
->block_size
= 1 << fs
->block_shift
;
669 btrfs_read_super_block(fs
);
670 if (strncmp((char *)(&sb
.magic
), BTRFS_MAGIC
, sizeof(sb
.magic
)))
672 btrfs_read_sys_chunk_array();
673 btrfs_read_chunk_tree(fs
);
674 btrfs_get_fs_tree(fs
);
677 /* Initialize the block cache */
678 cache_init(fs
->fs_dev
, fs
->block_shift
);
680 return fs
->block_shift
;
683 const struct fs_ops btrfs_fs_ops
= {
686 .fs_init
= btrfs_fs_init
,
687 .iget_root
= btrfs_iget_root
,
689 .readlink
= btrfs_readlink
,
690 .getfssec
= btrfs_getfssec
,
691 .close_file
= generic_close_file
,
692 .mangle_name
= generic_mangle_name
,
693 .next_extent
= btrfs_next_extent
,
694 .readdir
= btrfs_readdir
,
695 .load_config
= generic_load_config