4 * As should be obvious for Linux kernel code, license is GPLv2
6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
8 * Generally contains mount/umount code and also serves as a dump area for
9 * any functions that don't fit elsewhere and neither justify a file of their
13 #include <linux/bio.h>
14 #include <linux/slab.h>
15 #include <linux/blkdev.h>
16 #include <linux/module.h>
17 #include <linux/mtd/mtd.h>
18 #include <linux/statfs.h>
19 #include <linux/buffer_head.h>
21 static DEFINE_MUTEX(emergency_mutex
);
22 static struct page
*emergency_page
;
24 struct page
*emergency_read_begin(struct address_space
*mapping
, pgoff_t index
)
26 filler_t
*filler
= (filler_t
*)mapping
->a_ops
->readpage
;
30 page
= read_cache_page(mapping
, index
, filler
, NULL
);
34 /* No more pages available, switch to emergency page */
35 printk(KERN_INFO
"Logfs: Using emergency page\n");
36 mutex_lock(&emergency_mutex
);
37 err
= filler(NULL
, emergency_page
);
39 mutex_unlock(&emergency_mutex
);
40 printk(KERN_EMERG
"Logfs: Error reading emergency page\n");
43 return emergency_page
;
46 void emergency_read_end(struct page
*page
)
48 if (page
== emergency_page
)
49 mutex_unlock(&emergency_mutex
);
51 page_cache_release(page
);
54 static void dump_segfile(struct super_block
*sb
)
56 struct logfs_super
*super
= logfs_super(sb
);
57 struct logfs_segment_entry se
;
60 for (segno
= 0; segno
< super
->s_no_segs
; segno
++) {
61 logfs_get_segment_entry(sb
, segno
, &se
);
62 printk("%3x: %6x %8x", segno
, be32_to_cpu(se
.ec_level
),
63 be32_to_cpu(se
.valid
));
64 if (++segno
< super
->s_no_segs
) {
65 logfs_get_segment_entry(sb
, segno
, &se
);
66 printk(" %6x %8x", be32_to_cpu(se
.ec_level
),
67 be32_to_cpu(se
.valid
));
69 if (++segno
< super
->s_no_segs
) {
70 logfs_get_segment_entry(sb
, segno
, &se
);
71 printk(" %6x %8x", be32_to_cpu(se
.ec_level
),
72 be32_to_cpu(se
.valid
));
74 if (++segno
< super
->s_no_segs
) {
75 logfs_get_segment_entry(sb
, segno
, &se
);
76 printk(" %6x %8x", be32_to_cpu(se
.ec_level
),
77 be32_to_cpu(se
.valid
));
84 * logfs_crash_dump - dump debug information to device
86 * The LogFS superblock only occupies part of a segment. This function will
87 * write as much debug information as it can gather into the spare space.
89 void logfs_crash_dump(struct super_block
*sb
)
95 * TODO: move to lib/string.c
98 * memchr_inv - Find a character in an area of memory.
100 * @c: The byte to search for
101 * @n: The size of the area.
103 * returns the address of the first character other than @c, or %NULL
104 * if the whole buffer contains just @c.
106 void *memchr_inv(const void *s
, int c
, size_t n
)
108 const unsigned char *p
= s
;
110 if ((unsigned char)c
!= *p
++)
111 return (void *)(p
- 1);
117 * FIXME: There should be a reserve for root, similar to ext2.
119 int logfs_statfs(struct dentry
*dentry
, struct kstatfs
*stats
)
121 struct super_block
*sb
= dentry
->d_sb
;
122 struct logfs_super
*super
= logfs_super(sb
);
124 stats
->f_type
= LOGFS_MAGIC_U32
;
125 stats
->f_bsize
= sb
->s_blocksize
;
126 stats
->f_blocks
= super
->s_size
>> LOGFS_BLOCK_BITS
>> 3;
127 stats
->f_bfree
= super
->s_free_bytes
>> sb
->s_blocksize_bits
;
128 stats
->f_bavail
= super
->s_free_bytes
>> sb
->s_blocksize_bits
;
131 stats
->f_namelen
= LOGFS_MAX_NAMELEN
;
135 static int logfs_sb_set(struct super_block
*sb
, void *_super
)
137 struct logfs_super
*super
= _super
;
139 sb
->s_fs_info
= super
;
140 sb
->s_mtd
= super
->s_mtd
;
141 sb
->s_bdev
= super
->s_bdev
;
144 sb
->s_bdi
= &bdev_get_queue(sb
->s_bdev
)->backing_dev_info
;
148 sb
->s_bdi
= sb
->s_mtd
->backing_dev_info
;
153 static int logfs_sb_test(struct super_block
*sb
, void *_super
)
155 struct logfs_super
*super
= _super
;
156 struct mtd_info
*mtd
= super
->s_mtd
;
158 if (mtd
&& sb
->s_mtd
== mtd
)
160 if (super
->s_bdev
&& sb
->s_bdev
== super
->s_bdev
)
165 static void set_segment_header(struct logfs_segment_header
*sh
, u8 type
,
166 u8 level
, u32 segno
, u32 ec
)
171 sh
->segno
= cpu_to_be32(segno
);
172 sh
->ec
= cpu_to_be32(ec
);
173 sh
->gec
= cpu_to_be64(segno
);
174 sh
->crc
= logfs_crc32(sh
, LOGFS_SEGMENT_HEADERSIZE
, 4);
177 static void logfs_write_ds(struct super_block
*sb
, struct logfs_disk_super
*ds
,
180 struct logfs_super
*super
= logfs_super(sb
);
181 struct logfs_segment_header
*sh
= &ds
->ds_sh
;
184 memset(ds
, 0, sizeof(*ds
));
185 set_segment_header(sh
, SEG_SUPER
, 0, segno
, ec
);
187 ds
->ds_ifile_levels
= super
->s_ifile_levels
;
188 ds
->ds_iblock_levels
= super
->s_iblock_levels
;
189 ds
->ds_data_levels
= super
->s_data_levels
; /* XXX: Remove */
190 ds
->ds_segment_shift
= super
->s_segshift
;
191 ds
->ds_block_shift
= sb
->s_blocksize_bits
;
192 ds
->ds_write_shift
= super
->s_writeshift
;
193 ds
->ds_filesystem_size
= cpu_to_be64(super
->s_size
);
194 ds
->ds_segment_size
= cpu_to_be32(super
->s_segsize
);
195 ds
->ds_bad_seg_reserve
= cpu_to_be32(super
->s_bad_seg_reserve
);
196 ds
->ds_feature_incompat
= cpu_to_be64(super
->s_feature_incompat
);
197 ds
->ds_feature_ro_compat
= cpu_to_be64(super
->s_feature_ro_compat
);
198 ds
->ds_feature_compat
= cpu_to_be64(super
->s_feature_compat
);
199 ds
->ds_feature_flags
= cpu_to_be64(super
->s_feature_flags
);
200 ds
->ds_root_reserve
= cpu_to_be64(super
->s_root_reserve
);
201 ds
->ds_speed_reserve
= cpu_to_be64(super
->s_speed_reserve
);
203 ds
->ds_journal_seg
[i
] = cpu_to_be32(super
->s_journal_seg
[i
]);
204 ds
->ds_magic
= cpu_to_be64(LOGFS_MAGIC
);
205 ds
->ds_crc
= logfs_crc32(ds
, sizeof(*ds
),
206 LOGFS_SEGMENT_HEADERSIZE
+ 12);
209 static int write_one_sb(struct super_block
*sb
,
210 struct page
*(*find_sb
)(struct super_block
*sb
, u64
*ofs
))
212 struct logfs_super
*super
= logfs_super(sb
);
213 struct logfs_disk_super
*ds
;
214 struct logfs_segment_entry se
;
220 page
= find_sb(sb
, &ofs
);
223 ds
= page_address(page
);
224 segno
= seg_no(sb
, ofs
);
225 logfs_get_segment_entry(sb
, segno
, &se
);
226 ec
= be32_to_cpu(se
.ec_level
) >> 4;
228 logfs_set_segment_erased(sb
, segno
, ec
, 0);
229 logfs_write_ds(sb
, ds
, segno
, ec
);
230 err
= super
->s_devops
->write_sb(sb
, page
);
231 page_cache_release(page
);
235 int logfs_write_sb(struct super_block
*sb
)
237 struct logfs_super
*super
= logfs_super(sb
);
240 /* First superblock */
241 err
= write_one_sb(sb
, super
->s_devops
->find_first_sb
);
245 /* Last superblock */
246 err
= write_one_sb(sb
, super
->s_devops
->find_last_sb
);
252 static int ds_cmp(const void *ds0
, const void *ds1
)
254 size_t len
= sizeof(struct logfs_disk_super
);
256 /* We know the segment headers differ, so ignore them */
257 len
-= LOGFS_SEGMENT_HEADERSIZE
;
258 ds0
+= LOGFS_SEGMENT_HEADERSIZE
;
259 ds1
+= LOGFS_SEGMENT_HEADERSIZE
;
260 return memcmp(ds0
, ds1
, len
);
263 static int logfs_recover_sb(struct super_block
*sb
)
265 struct logfs_super
*super
= logfs_super(sb
);
266 struct logfs_disk_super _ds0
, *ds0
= &_ds0
;
267 struct logfs_disk_super _ds1
, *ds1
= &_ds1
;
268 int err
, valid0
, valid1
;
270 /* read first superblock */
271 err
= wbuf_read(sb
, super
->s_sb_ofs
[0], sizeof(*ds0
), ds0
);
274 /* read last superblock */
275 err
= wbuf_read(sb
, super
->s_sb_ofs
[1], sizeof(*ds1
), ds1
);
278 valid0
= logfs_check_ds(ds0
) == 0;
279 valid1
= logfs_check_ds(ds1
) == 0;
281 if (!valid0
&& valid1
) {
282 printk(KERN_INFO
"First superblock is invalid - fixing.\n");
283 return write_one_sb(sb
, super
->s_devops
->find_first_sb
);
285 if (valid0
&& !valid1
) {
286 printk(KERN_INFO
"Last superblock is invalid - fixing.\n");
287 return write_one_sb(sb
, super
->s_devops
->find_last_sb
);
289 if (valid0
&& valid1
&& ds_cmp(ds0
, ds1
)) {
290 printk(KERN_INFO
"Superblocks don't match - fixing.\n");
291 return logfs_write_sb(sb
);
293 /* If neither is valid now, something's wrong. Didn't we properly
294 * check them before?!? */
295 BUG_ON(!valid0
&& !valid1
);
299 static int logfs_make_writeable(struct super_block
*sb
)
303 err
= logfs_open_segfile(sb
);
307 /* Repair any broken superblock copies */
308 err
= logfs_recover_sb(sb
);
312 /* Check areas for trailing unaccounted data */
313 err
= logfs_check_areas(sb
);
317 /* Do one GC pass before any data gets dirtied */
320 /* after all initializations are done, replay the journal
321 * for rw-mounts, if necessary */
322 err
= logfs_replay_journal(sb
);
329 static int logfs_get_sb_final(struct super_block
*sb
)
331 struct logfs_super
*super
= logfs_super(sb
);
332 struct inode
*rootdir
;
336 rootdir
= logfs_iget(sb
, LOGFS_INO_ROOT
);
340 sb
->s_root
= d_alloc_root(rootdir
);
346 /* at that point we know that ->put_super() will be called */
347 super
->s_erase_page
= alloc_pages(GFP_KERNEL
, 0);
348 if (!super
->s_erase_page
)
350 memset(page_address(super
->s_erase_page
), 0xFF, PAGE_SIZE
);
352 /* FIXME: check for read-only mounts */
353 err
= logfs_make_writeable(sb
);
355 __free_page(super
->s_erase_page
);
359 log_super("LogFS: Finished mounting\n");
363 iput(super
->s_master_inode
);
364 iput(super
->s_segfile_inode
);
365 iput(super
->s_mapping_inode
);
369 int logfs_check_ds(struct logfs_disk_super
*ds
)
371 struct logfs_segment_header
*sh
= &ds
->ds_sh
;
373 if (ds
->ds_magic
!= cpu_to_be64(LOGFS_MAGIC
))
375 if (sh
->crc
!= logfs_crc32(sh
, LOGFS_SEGMENT_HEADERSIZE
, 4))
377 if (ds
->ds_crc
!= logfs_crc32(ds
, sizeof(*ds
),
378 LOGFS_SEGMENT_HEADERSIZE
+ 12))
383 static struct page
*find_super_block(struct super_block
*sb
)
385 struct logfs_super
*super
= logfs_super(sb
);
386 struct page
*first
, *last
;
388 first
= super
->s_devops
->find_first_sb(sb
, &super
->s_sb_ofs
[0]);
389 if (!first
|| IS_ERR(first
))
391 last
= super
->s_devops
->find_last_sb(sb
, &super
->s_sb_ofs
[1]);
392 if (!last
|| IS_ERR(last
)) {
393 page_cache_release(first
);
397 if (!logfs_check_ds(page_address(first
))) {
398 page_cache_release(last
);
402 /* First one didn't work, try the second superblock */
403 if (!logfs_check_ds(page_address(last
))) {
404 page_cache_release(first
);
408 /* Neither worked, sorry folks */
409 page_cache_release(first
);
410 page_cache_release(last
);
414 static int __logfs_read_sb(struct super_block
*sb
)
416 struct logfs_super
*super
= logfs_super(sb
);
418 struct logfs_disk_super
*ds
;
421 page
= find_super_block(sb
);
425 ds
= page_address(page
);
426 super
->s_size
= be64_to_cpu(ds
->ds_filesystem_size
);
427 super
->s_root_reserve
= be64_to_cpu(ds
->ds_root_reserve
);
428 super
->s_speed_reserve
= be64_to_cpu(ds
->ds_speed_reserve
);
429 super
->s_bad_seg_reserve
= be32_to_cpu(ds
->ds_bad_seg_reserve
);
430 super
->s_segsize
= 1 << ds
->ds_segment_shift
;
431 super
->s_segmask
= (1 << ds
->ds_segment_shift
) - 1;
432 super
->s_segshift
= ds
->ds_segment_shift
;
433 sb
->s_blocksize
= 1 << ds
->ds_block_shift
;
434 sb
->s_blocksize_bits
= ds
->ds_block_shift
;
435 super
->s_writesize
= 1 << ds
->ds_write_shift
;
436 super
->s_writeshift
= ds
->ds_write_shift
;
437 super
->s_no_segs
= super
->s_size
>> super
->s_segshift
;
438 super
->s_no_blocks
= super
->s_segsize
>> sb
->s_blocksize_bits
;
439 super
->s_feature_incompat
= be64_to_cpu(ds
->ds_feature_incompat
);
440 super
->s_feature_ro_compat
= be64_to_cpu(ds
->ds_feature_ro_compat
);
441 super
->s_feature_compat
= be64_to_cpu(ds
->ds_feature_compat
);
442 super
->s_feature_flags
= be64_to_cpu(ds
->ds_feature_flags
);
445 super
->s_journal_seg
[i
] = be32_to_cpu(ds
->ds_journal_seg
[i
]);
447 super
->s_ifile_levels
= ds
->ds_ifile_levels
;
448 super
->s_iblock_levels
= ds
->ds_iblock_levels
;
449 super
->s_data_levels
= ds
->ds_data_levels
;
450 super
->s_total_levels
= super
->s_ifile_levels
+ super
->s_iblock_levels
451 + super
->s_data_levels
;
452 page_cache_release(page
);
456 static int logfs_read_sb(struct super_block
*sb
, int read_only
)
458 struct logfs_super
*super
= logfs_super(sb
);
461 super
->s_btree_pool
= mempool_create(32, btree_alloc
, btree_free
, NULL
);
462 if (!super
->s_btree_pool
)
465 btree_init_mempool64(&super
->s_shadow_tree
.new, super
->s_btree_pool
);
466 btree_init_mempool64(&super
->s_shadow_tree
.old
, super
->s_btree_pool
);
467 btree_init_mempool32(&super
->s_shadow_tree
.segment_map
,
468 super
->s_btree_pool
);
470 ret
= logfs_init_mapping(sb
);
474 ret
= __logfs_read_sb(sb
);
478 if (super
->s_feature_incompat
& ~LOGFS_FEATURES_INCOMPAT
)
480 if ((super
->s_feature_ro_compat
& ~LOGFS_FEATURES_RO_COMPAT
) &&
484 ret
= logfs_init_rw(sb
);
488 ret
= logfs_init_areas(sb
);
492 ret
= logfs_init_gc(sb
);
496 ret
= logfs_init_journal(sb
);
503 static void logfs_kill_sb(struct super_block
*sb
)
505 struct logfs_super
*super
= logfs_super(sb
);
507 log_super("LogFS: Start unmounting\n");
508 /* Alias entries slow down mount, so evict as many as possible */
510 logfs_write_anchor(sb
);
513 * From this point on alias entries are simply dropped - and any
514 * writes to the object store are considered bugs.
516 super
->s_flags
|= LOGFS_SB_FLAG_SHUTDOWN
;
517 log_super("LogFS: Now in shutdown\n");
518 generic_shutdown_super(sb
);
520 BUG_ON(super
->s_dirty_used_bytes
|| super
->s_dirty_free_bytes
);
522 logfs_cleanup_gc(sb
);
523 logfs_cleanup_journal(sb
);
524 logfs_cleanup_areas(sb
);
525 logfs_cleanup_rw(sb
);
526 if (super
->s_erase_page
)
527 __free_page(super
->s_erase_page
);
528 super
->s_devops
->put_device(super
);
529 logfs_mempool_destroy(super
->s_btree_pool
);
530 logfs_mempool_destroy(super
->s_alias_pool
);
532 log_super("LogFS: Finished unmounting\n");
535 static struct dentry
*logfs_get_sb_device(struct logfs_super
*super
,
536 struct file_system_type
*type
, int flags
)
538 struct super_block
*sb
;
540 static int mount_count
;
542 log_super("LogFS: Start mount %x\n", mount_count
++);
545 sb
= sget(type
, logfs_sb_test
, logfs_sb_set
, super
);
547 super
->s_devops
->put_device(super
);
553 /* Device is already in use */
554 super
->s_devops
->put_device(super
);
556 return dget(sb
->s_root
);
560 * sb->s_maxbytes is limited to 8TB. On 32bit systems, the page cache
561 * only covers 16TB and the upper 8TB are used for indirect blocks.
562 * On 64bit system we could bump up the limit, but that would make
563 * the filesystem incompatible with 32bit systems.
565 sb
->s_maxbytes
= (1ull << 43) - 1;
566 sb
->s_op
= &logfs_super_operations
;
567 sb
->s_flags
= flags
| MS_NOATIME
;
569 err
= logfs_read_sb(sb
, sb
->s_flags
& MS_RDONLY
);
573 sb
->s_flags
|= MS_ACTIVE
;
574 err
= logfs_get_sb_final(sb
);
576 deactivate_locked_super(sb
);
579 return dget(sb
->s_root
);
582 /* no ->s_root, no ->put_super() */
583 iput(super
->s_master_inode
);
584 iput(super
->s_segfile_inode
);
585 iput(super
->s_mapping_inode
);
586 deactivate_locked_super(sb
);
590 static struct dentry
*logfs_mount(struct file_system_type
*type
, int flags
,
591 const char *devname
, void *data
)
594 struct logfs_super
*super
;
597 super
= kzalloc(sizeof(*super
), GFP_KERNEL
);
599 return ERR_PTR(-ENOMEM
);
601 mutex_init(&super
->s_dirop_mutex
);
602 mutex_init(&super
->s_object_alias_mutex
);
603 INIT_LIST_HEAD(&super
->s_freeing_list
);
606 err
= logfs_get_sb_bdev(super
, type
, devname
);
607 else if (strncmp(devname
, "mtd", 3))
608 err
= logfs_get_sb_bdev(super
, type
, devname
);
611 mtdnr
= simple_strtoul(devname
+3, &garbage
, 0);
615 err
= logfs_get_sb_mtd(super
, mtdnr
);
623 return logfs_get_sb_device(super
, type
, flags
);
626 static struct file_system_type logfs_fs_type
= {
627 .owner
= THIS_MODULE
,
629 .mount
= logfs_mount
,
630 .kill_sb
= logfs_kill_sb
,
631 .fs_flags
= FS_REQUIRES_DEV
,
635 static int __init
logfs_init(void)
639 emergency_page
= alloc_pages(GFP_KERNEL
, 0);
643 ret
= logfs_compr_init();
647 ret
= logfs_init_inode_cache();
651 return register_filesystem(&logfs_fs_type
);
655 __free_pages(emergency_page
, 0);
659 static void __exit
logfs_exit(void)
661 unregister_filesystem(&logfs_fs_type
);
662 logfs_destroy_inode_cache();
664 __free_pages(emergency_page
, 0);
667 module_init(logfs_init
);
668 module_exit(logfs_exit
);
670 MODULE_LICENSE("GPL v2");
671 MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
672 MODULE_DESCRIPTION("scalable flash filesystem");