2 * fs/logfs/journal.c - journal handling code
4 * As should be obvious for Linux kernel code, license is GPLv2
6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
9 #include <linux/slab.h>
11 static void logfs_calc_free(struct super_block
*sb
)
13 struct logfs_super
*super
= logfs_super(sb
);
14 u64 reserve
, no_segs
= super
->s_no_segs
;
18 /* superblock segments */
20 super
->s_no_journal_segs
= 0;
23 if (super
->s_journal_seg
[i
]) {
25 super
->s_no_journal_segs
++;
28 /* open segments plus one extra per level for GC */
29 no_segs
-= 2 * super
->s_total_levels
;
31 free
= no_segs
* (super
->s_segsize
- LOGFS_SEGMENT_RESERVE
);
32 free
-= super
->s_used_bytes
;
33 /* just a bit extra */
34 free
-= super
->s_total_levels
* 4096;
36 /* Bad blocks are 'paid' for with speed reserve - the filesystem
37 * simply gets slower as bad blocks accumulate. Until the bad blocks
38 * exceed the speed reserve - then the filesystem gets smaller.
40 reserve
= super
->s_bad_segments
+ super
->s_bad_seg_reserve
;
41 reserve
*= super
->s_segsize
- LOGFS_SEGMENT_RESERVE
;
42 reserve
= max(reserve
, super
->s_speed_reserve
);
47 super
->s_free_bytes
= free
;
50 static void reserve_sb_and_journal(struct super_block
*sb
)
52 struct logfs_super
*super
= logfs_super(sb
);
53 struct btree_head32
*head
= &super
->s_reserved_segments
;
56 err
= btree_insert32(head
, seg_no(sb
, super
->s_sb_ofs
[0]), (void *)1,
60 err
= btree_insert32(head
, seg_no(sb
, super
->s_sb_ofs
[1]), (void *)1,
65 if (!super
->s_journal_seg
[i
])
67 err
= btree_insert32(head
, super
->s_journal_seg
[i
], (void *)1,
73 static void read_dynsb(struct super_block
*sb
,
74 struct logfs_je_dynsb
*dynsb
)
76 struct logfs_super
*super
= logfs_super(sb
);
78 super
->s_gec
= be64_to_cpu(dynsb
->ds_gec
);
79 super
->s_sweeper
= be64_to_cpu(dynsb
->ds_sweeper
);
80 super
->s_victim_ino
= be64_to_cpu(dynsb
->ds_victim_ino
);
81 super
->s_rename_dir
= be64_to_cpu(dynsb
->ds_rename_dir
);
82 super
->s_rename_pos
= be64_to_cpu(dynsb
->ds_rename_pos
);
83 super
->s_used_bytes
= be64_to_cpu(dynsb
->ds_used_bytes
);
84 super
->s_generation
= be32_to_cpu(dynsb
->ds_generation
);
87 static void read_anchor(struct super_block
*sb
,
88 struct logfs_je_anchor
*da
)
90 struct logfs_super
*super
= logfs_super(sb
);
91 struct inode
*inode
= super
->s_master_inode
;
92 struct logfs_inode
*li
= logfs_inode(inode
);
95 super
->s_last_ino
= be64_to_cpu(da
->da_last_ino
);
97 li
->li_height
= da
->da_height
;
98 i_size_write(inode
, be64_to_cpu(da
->da_size
));
99 li
->li_used_bytes
= be64_to_cpu(da
->da_used_bytes
);
101 for (i
= 0; i
< LOGFS_EMBEDDED_FIELDS
; i
++)
102 li
->li_data
[i
] = be64_to_cpu(da
->da_data
[i
]);
105 static void read_erasecount(struct super_block
*sb
,
106 struct logfs_je_journal_ec
*ec
)
108 struct logfs_super
*super
= logfs_super(sb
);
112 super
->s_journal_ec
[i
] = be32_to_cpu(ec
->ec
[i
]);
115 static int read_area(struct super_block
*sb
, struct logfs_je_area
*a
)
117 struct logfs_super
*super
= logfs_super(sb
);
118 struct logfs_area
*area
= super
->s_area
[a
->gc_level
];
120 u32 writemask
= ~(super
->s_writesize
- 1);
122 if (a
->gc_level
>= LOGFS_NO_AREAS
)
124 if (a
->vim
!= VIM_DEFAULT
)
125 return -EIO
; /* TODO: close area and continue */
127 area
->a_used_bytes
= be32_to_cpu(a
->used_bytes
);
128 area
->a_written_bytes
= area
->a_used_bytes
& writemask
;
129 area
->a_segno
= be32_to_cpu(a
->segno
);
133 ofs
= dev_ofs(sb
, area
->a_segno
, area
->a_written_bytes
);
134 if (super
->s_writesize
> 1)
135 logfs_buf_recover(area
, ofs
, a
+ 1, super
->s_writesize
);
137 logfs_buf_recover(area
, ofs
, NULL
, 0);
141 static void *unpack(void *from
, void *to
)
143 struct logfs_journal_header
*jh
= from
;
144 void *data
= from
+ sizeof(struct logfs_journal_header
);
146 size_t inlen
, outlen
;
148 inlen
= be16_to_cpu(jh
->h_len
);
149 outlen
= be16_to_cpu(jh
->h_datalen
);
151 if (jh
->h_compr
== COMPR_NONE
)
152 memcpy(to
, data
, inlen
);
154 err
= logfs_uncompress(data
, to
, inlen
, outlen
);
160 static int __read_je_header(struct super_block
*sb
, u64 ofs
,
161 struct logfs_journal_header
*jh
)
163 struct logfs_super
*super
= logfs_super(sb
);
164 size_t bufsize
= max_t(size_t, sb
->s_blocksize
, super
->s_writesize
)
165 + MAX_JOURNAL_HEADER
;
166 u16 type
, len
, datalen
;
169 /* read header only */
170 err
= wbuf_read(sb
, ofs
, sizeof(*jh
), jh
);
173 type
= be16_to_cpu(jh
->h_type
);
174 len
= be16_to_cpu(jh
->h_len
);
175 datalen
= be16_to_cpu(jh
->h_datalen
);
176 if (len
> sb
->s_blocksize
)
178 if ((type
< JE_FIRST
) || (type
> JE_LAST
))
180 if (datalen
> bufsize
)
185 static int __read_je_payload(struct super_block
*sb
, u64 ofs
,
186 struct logfs_journal_header
*jh
)
191 len
= be16_to_cpu(jh
->h_len
);
192 err
= wbuf_read(sb
, ofs
+ sizeof(*jh
), len
, jh
+ 1);
195 if (jh
->h_crc
!= logfs_crc32(jh
, len
+ sizeof(*jh
), 4)) {
196 /* Old code was confused. It forgot about the header length
197 * and stopped calculating the crc 16 bytes before the end
199 * FIXME: Remove this hack once the old code is fixed.
201 if (jh
->h_crc
== logfs_crc32(jh
, len
, 4))
210 * jh needs to be large enough to hold the complete entry, not just the header
212 static int __read_je(struct super_block
*sb
, u64 ofs
,
213 struct logfs_journal_header
*jh
)
217 err
= __read_je_header(sb
, ofs
, jh
);
220 return __read_je_payload(sb
, ofs
, jh
);
223 static int read_je(struct super_block
*sb
, u64 ofs
)
225 struct logfs_super
*super
= logfs_super(sb
);
226 struct logfs_journal_header
*jh
= super
->s_compressed_je
;
227 void *scratch
= super
->s_je
;
231 err
= __read_je(sb
, ofs
, jh
);
234 type
= be16_to_cpu(jh
->h_type
);
235 datalen
= be16_to_cpu(jh
->h_datalen
);
239 read_dynsb(sb
, unpack(jh
, scratch
));
242 read_anchor(sb
, unpack(jh
, scratch
));
245 read_erasecount(sb
, unpack(jh
, scratch
));
248 read_area(sb
, unpack(jh
, scratch
));
251 err
= logfs_load_object_aliases(sb
, unpack(jh
, scratch
),
261 static int logfs_read_segment(struct super_block
*sb
, u32 segno
)
263 struct logfs_super
*super
= logfs_super(sb
);
264 struct logfs_journal_header
*jh
= super
->s_compressed_je
;
265 u64 ofs
, seg_ofs
= dev_ofs(sb
, segno
, 0);
266 u32 h_ofs
, last_ofs
= 0;
267 u16 len
, datalen
, last_len
= 0;
270 /* search for most recent commit */
271 for (h_ofs
= 0; h_ofs
< super
->s_segsize
; h_ofs
+= sizeof(*jh
)) {
272 ofs
= seg_ofs
+ h_ofs
;
273 err
= __read_je_header(sb
, ofs
, jh
);
276 if (jh
->h_type
!= cpu_to_be16(JE_COMMIT
))
278 err
= __read_je_payload(sb
, ofs
, jh
);
281 len
= be16_to_cpu(jh
->h_len
);
282 datalen
= be16_to_cpu(jh
->h_datalen
);
283 if ((datalen
> sizeof(super
->s_je_array
)) ||
284 (datalen
% sizeof(__be64
)))
288 h_ofs
+= ALIGN(len
, sizeof(*jh
)) - sizeof(*jh
);
293 ofs
= seg_ofs
+ last_ofs
;
294 log_journal("Read commit from %llx\n", ofs
);
295 err
= __read_je(sb
, ofs
, jh
);
296 BUG_ON(err
); /* We should have caught it in the scan loop already */
300 unpack(jh
, super
->s_je_array
);
301 super
->s_no_je
= last_len
/ sizeof(__be64
);
302 /* iterate over array */
303 for (i
= 0; i
< super
->s_no_je
; i
++) {
304 err
= read_je(sb
, be64_to_cpu(super
->s_je_array
[i
]));
308 super
->s_journal_area
->a_segno
= segno
;
312 static u64
read_gec(struct super_block
*sb
, u32 segno
)
314 struct logfs_segment_header sh
;
320 err
= wbuf_read(sb
, dev_ofs(sb
, segno
, 0), sizeof(sh
), &sh
);
323 crc
= logfs_crc32(&sh
, sizeof(sh
), 4);
325 WARN_ON(sh
.gec
!= cpu_to_be64(0xffffffffffffffffull
));
326 /* Most likely it was just erased */
329 return be64_to_cpu(sh
.gec
);
332 static int logfs_read_journal(struct super_block
*sb
)
334 struct logfs_super
*super
= logfs_super(sb
);
335 u64 gec
[LOGFS_JOURNAL_SEGS
], max
;
341 journal_for_each(i
) {
342 segno
= super
->s_journal_seg
[i
];
343 gec
[i
] = read_gec(sb
, super
->s_journal_seg
[i
]);
351 /* FIXME: Try older segments in case of error */
352 return logfs_read_segment(sb
, super
->s_journal_seg
[max_i
]);
356 * First search the current segment (outer loop), then pick the next segment
357 * in the array, skipping any zero entries (inner loop).
359 static void journal_get_free_segment(struct logfs_area
*area
)
361 struct logfs_super
*super
= logfs_super(area
->a_sb
);
364 journal_for_each(i
) {
365 if (area
->a_segno
!= super
->s_journal_seg
[i
])
370 if (i
== LOGFS_JOURNAL_SEGS
)
372 } while (!super
->s_journal_seg
[i
]);
374 area
->a_segno
= super
->s_journal_seg
[i
];
375 area
->a_erase_count
= ++(super
->s_journal_ec
[i
]);
376 log_journal("Journal now at %x (ec %x)\n", area
->a_segno
,
377 area
->a_erase_count
);
383 static void journal_get_erase_count(struct logfs_area
*area
)
385 /* erase count is stored globally and incremented in
386 * journal_get_free_segment() - nothing to do here */
389 static int journal_erase_segment(struct logfs_area
*area
)
391 struct super_block
*sb
= area
->a_sb
;
393 struct logfs_segment_header sh
;
394 unsigned char c
[ALIGN(sizeof(struct logfs_segment_header
), 16)];
399 err
= logfs_erase_segment(sb
, area
->a_segno
, 1);
403 memset(&u
, 0, sizeof(u
));
405 u
.sh
.type
= SEG_JOURNAL
;
407 u
.sh
.segno
= cpu_to_be32(area
->a_segno
);
408 u
.sh
.ec
= cpu_to_be32(area
->a_erase_count
);
409 u
.sh
.gec
= cpu_to_be64(logfs_super(sb
)->s_gec
);
410 u
.sh
.crc
= logfs_crc32(&u
.sh
, sizeof(u
.sh
), 4);
412 /* This causes a bug in segment.c. Not yet. */
413 //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0);
415 ofs
= dev_ofs(sb
, area
->a_segno
, 0);
416 area
->a_used_bytes
= sizeof(u
);
417 logfs_buf_write(area
, ofs
, &u
, sizeof(u
));
421 static size_t __logfs_write_header(struct logfs_super
*super
,
422 struct logfs_journal_header
*jh
, size_t len
, size_t datalen
,
425 jh
->h_len
= cpu_to_be16(len
);
426 jh
->h_type
= cpu_to_be16(type
);
427 jh
->h_datalen
= cpu_to_be16(datalen
);
434 jh
->h_crc
= logfs_crc32(jh
, len
+ sizeof(*jh
), 4);
435 return ALIGN(len
, 16) + sizeof(*jh
);
438 static size_t logfs_write_header(struct logfs_super
*super
,
439 struct logfs_journal_header
*jh
, size_t datalen
, u16 type
)
441 size_t len
= datalen
;
443 return __logfs_write_header(super
, jh
, len
, datalen
, type
, COMPR_NONE
);
446 static inline size_t logfs_journal_erasecount_size(struct logfs_super
*super
)
448 return LOGFS_JOURNAL_SEGS
* sizeof(__be32
);
451 static void *logfs_write_erasecount(struct super_block
*sb
, void *_ec
,
452 u16
*type
, size_t *len
)
454 struct logfs_super
*super
= logfs_super(sb
);
455 struct logfs_je_journal_ec
*ec
= _ec
;
459 ec
->ec
[i
] = cpu_to_be32(super
->s_journal_ec
[i
]);
460 *type
= JE_ERASECOUNT
;
461 *len
= logfs_journal_erasecount_size(super
);
465 static void account_shadow(void *_shadow
, unsigned long _sb
, u64 ignore
,
468 struct logfs_shadow
*shadow
= _shadow
;
469 struct super_block
*sb
= (void *)_sb
;
470 struct logfs_super
*super
= logfs_super(sb
);
472 /* consume new space */
473 super
->s_free_bytes
-= shadow
->new_len
;
474 super
->s_used_bytes
+= shadow
->new_len
;
475 super
->s_dirty_used_bytes
-= shadow
->new_len
;
477 /* free up old space */
478 super
->s_free_bytes
+= shadow
->old_len
;
479 super
->s_used_bytes
-= shadow
->old_len
;
480 super
->s_dirty_free_bytes
-= shadow
->old_len
;
482 logfs_set_segment_used(sb
, shadow
->old_ofs
, -shadow
->old_len
);
483 logfs_set_segment_used(sb
, shadow
->new_ofs
, shadow
->new_len
);
485 log_journal("account_shadow(%llx, %llx, %x) %llx->%llx %x->%x\n",
486 shadow
->ino
, shadow
->bix
, shadow
->gc_level
,
487 shadow
->old_ofs
, shadow
->new_ofs
,
488 shadow
->old_len
, shadow
->new_len
);
489 mempool_free(shadow
, super
->s_shadow_pool
);
492 static void account_shadows(struct super_block
*sb
)
494 struct logfs_super
*super
= logfs_super(sb
);
495 struct inode
*inode
= super
->s_master_inode
;
496 struct logfs_inode
*li
= logfs_inode(inode
);
497 struct shadow_tree
*tree
= &super
->s_shadow_tree
;
499 btree_grim_visitor64(&tree
->new, (unsigned long)sb
, account_shadow
);
500 btree_grim_visitor64(&tree
->old
, (unsigned long)sb
, account_shadow
);
501 btree_grim_visitor32(&tree
->segment_map
, 0, NULL
);
502 tree
->no_shadowed_segments
= 0;
506 * We never actually use the structure, when attached to the
507 * master inode. But it is easier to always free it here than
508 * to have checks in several places elsewhere when allocating
511 li
->li_block
->ops
->free_block(sb
, li
->li_block
);
513 BUG_ON((s64
)li
->li_used_bytes
< 0);
516 static void *__logfs_write_anchor(struct super_block
*sb
, void *_da
,
517 u16
*type
, size_t *len
)
519 struct logfs_super
*super
= logfs_super(sb
);
520 struct logfs_je_anchor
*da
= _da
;
521 struct inode
*inode
= super
->s_master_inode
;
522 struct logfs_inode
*li
= logfs_inode(inode
);
525 da
->da_height
= li
->li_height
;
526 da
->da_last_ino
= cpu_to_be64(super
->s_last_ino
);
527 da
->da_size
= cpu_to_be64(i_size_read(inode
));
528 da
->da_used_bytes
= cpu_to_be64(li
->li_used_bytes
);
529 for (i
= 0; i
< LOGFS_EMBEDDED_FIELDS
; i
++)
530 da
->da_data
[i
] = cpu_to_be64(li
->li_data
[i
]);
536 static void *logfs_write_dynsb(struct super_block
*sb
, void *_dynsb
,
537 u16
*type
, size_t *len
)
539 struct logfs_super
*super
= logfs_super(sb
);
540 struct logfs_je_dynsb
*dynsb
= _dynsb
;
542 dynsb
->ds_gec
= cpu_to_be64(super
->s_gec
);
543 dynsb
->ds_sweeper
= cpu_to_be64(super
->s_sweeper
);
544 dynsb
->ds_victim_ino
= cpu_to_be64(super
->s_victim_ino
);
545 dynsb
->ds_rename_dir
= cpu_to_be64(super
->s_rename_dir
);
546 dynsb
->ds_rename_pos
= cpu_to_be64(super
->s_rename_pos
);
547 dynsb
->ds_used_bytes
= cpu_to_be64(super
->s_used_bytes
);
548 dynsb
->ds_generation
= cpu_to_be32(super
->s_generation
);
550 *len
= sizeof(*dynsb
);
554 static void write_wbuf(struct super_block
*sb
, struct logfs_area
*area
,
557 struct logfs_super
*super
= logfs_super(sb
);
558 struct address_space
*mapping
= super
->s_mapping_inode
->i_mapping
;
564 ofs
= dev_ofs(sb
, area
->a_segno
,
565 area
->a_used_bytes
& ~(super
->s_writesize
- 1));
566 index
= ofs
>> PAGE_SHIFT
;
567 page_ofs
= ofs
& (PAGE_SIZE
- 1);
569 page
= find_lock_page(mapping
, index
);
571 memcpy(wbuf
, page_address(page
) + page_ofs
, super
->s_writesize
);
575 static void *logfs_write_area(struct super_block
*sb
, void *_a
,
576 u16
*type
, size_t *len
)
578 struct logfs_super
*super
= logfs_super(sb
);
579 struct logfs_area
*area
= super
->s_area
[super
->s_sum_index
];
580 struct logfs_je_area
*a
= _a
;
582 a
->vim
= VIM_DEFAULT
;
583 a
->gc_level
= super
->s_sum_index
;
584 a
->used_bytes
= cpu_to_be32(area
->a_used_bytes
);
585 a
->segno
= cpu_to_be32(area
->a_segno
);
586 if (super
->s_writesize
> 1)
587 write_wbuf(sb
, area
, a
+ 1);
590 *len
= sizeof(*a
) + super
->s_writesize
;
594 static void *logfs_write_commit(struct super_block
*sb
, void *h
,
595 u16
*type
, size_t *len
)
597 struct logfs_super
*super
= logfs_super(sb
);
600 *len
= super
->s_no_je
* sizeof(__be64
);
601 return super
->s_je_array
;
604 static size_t __logfs_write_je(struct super_block
*sb
, void *buf
, u16 type
,
607 struct logfs_super
*super
= logfs_super(sb
);
608 void *header
= super
->s_compressed_je
;
609 void *data
= header
+ sizeof(struct logfs_journal_header
);
610 ssize_t compr_len
, pad_len
;
611 u8 compr
= COMPR_ZLIB
;
614 return logfs_write_header(super
, header
, 0, type
);
616 BUG_ON(len
> sb
->s_blocksize
);
617 compr_len
= logfs_compress(buf
, data
, len
, sb
->s_blocksize
);
618 if (compr_len
< 0 || type
== JE_ANCHOR
) {
619 memcpy(data
, buf
, len
);
624 pad_len
= ALIGN(compr_len
, 16);
625 memset(data
+ compr_len
, 0, pad_len
- compr_len
);
627 return __logfs_write_header(super
, header
, compr_len
, len
, type
, compr
);
630 static s64
logfs_get_free_bytes(struct logfs_area
*area
, size_t *bytes
,
633 u32 writesize
= logfs_super(area
->a_sb
)->s_writesize
;
637 ret
= logfs_open_area(area
, *bytes
);
641 ofs
= area
->a_used_bytes
;
642 area
->a_used_bytes
+= *bytes
;
645 area
->a_used_bytes
= ALIGN(area
->a_used_bytes
, writesize
);
646 *bytes
= area
->a_used_bytes
- ofs
;
649 return dev_ofs(area
->a_sb
, area
->a_segno
, ofs
);
652 static int logfs_write_je_buf(struct super_block
*sb
, void *buf
, u16 type
,
655 struct logfs_super
*super
= logfs_super(sb
);
656 struct logfs_area
*area
= super
->s_journal_area
;
657 struct logfs_journal_header
*jh
= super
->s_compressed_je
;
662 len
= __logfs_write_je(sb
, buf
, type
, buf_len
);
663 if (jh
->h_type
== cpu_to_be16(JE_COMMIT
))
666 ofs
= logfs_get_free_bytes(area
, &len
, must_pad
);
669 logfs_buf_write(area
, ofs
, super
->s_compressed_je
, len
);
670 BUG_ON(super
->s_no_je
>= MAX_JOURNAL_ENTRIES
);
671 super
->s_je_array
[super
->s_no_je
++] = cpu_to_be64(ofs
);
675 static int logfs_write_je(struct super_block
*sb
,
676 void* (*write
)(struct super_block
*sb
, void *scratch
,
677 u16
*type
, size_t *len
))
683 buf
= write(sb
, logfs_super(sb
)->s_je
, &type
, &len
);
684 return logfs_write_je_buf(sb
, buf
, type
, len
);
687 int write_alias_journal(struct super_block
*sb
, u64 ino
, u64 bix
,
688 level_t level
, int child_no
, __be64 val
)
690 struct logfs_super
*super
= logfs_super(sb
);
691 struct logfs_obj_alias
*oa
= super
->s_je
;
692 int err
= 0, fill
= super
->s_je_fill
;
694 log_aliases("logfs_write_obj_aliases #%x(%llx, %llx, %x, %x) %llx\n",
695 fill
, ino
, bix
, level
, child_no
, be64_to_cpu(val
));
696 oa
[fill
].ino
= cpu_to_be64(ino
);
697 oa
[fill
].bix
= cpu_to_be64(bix
);
699 oa
[fill
].level
= (__force u8
)level
;
700 oa
[fill
].child_no
= cpu_to_be16(child_no
);
702 if (fill
>= sb
->s_blocksize
/ sizeof(*oa
)) {
703 err
= logfs_write_je_buf(sb
, oa
, JE_OBJ_ALIAS
, sb
->s_blocksize
);
707 super
->s_je_fill
= fill
;
711 static int logfs_write_obj_aliases(struct super_block
*sb
)
713 struct logfs_super
*super
= logfs_super(sb
);
716 log_journal("logfs_write_obj_aliases: %d aliases to write\n",
717 super
->s_no_object_aliases
);
718 super
->s_je_fill
= 0;
719 err
= logfs_write_obj_aliases_pagecache(sb
);
723 if (super
->s_je_fill
)
724 err
= logfs_write_je_buf(sb
, super
->s_je
, JE_OBJ_ALIAS
,
726 * sizeof(struct logfs_obj_alias
));
731 * Write all journal entries. The goto logic ensures that all journal entries
732 * are written whenever a new segment is used. It is ugly and potentially a
733 * bit wasteful, but robustness is more important. With this we can *always*
734 * erase all journal segments except the one containing the most recent commit.
736 void logfs_write_anchor(struct super_block
*sb
)
738 struct logfs_super
*super
= logfs_super(sb
);
739 struct logfs_area
*area
= super
->s_journal_area
;
742 if (!(super
->s_flags
& LOGFS_SB_FLAG_DIRTY
))
744 super
->s_flags
&= ~LOGFS_SB_FLAG_DIRTY
;
746 BUG_ON(super
->s_flags
& LOGFS_SB_FLAG_SHUTDOWN
);
747 mutex_lock(&super
->s_journal_mutex
);
749 /* Do this first or suffer corruption */
750 logfs_sync_segments(sb
);
756 if (!super
->s_area
[i
]->a_is_open
)
758 super
->s_sum_index
= i
;
759 err
= logfs_write_je(sb
, logfs_write_area
);
763 err
= logfs_write_obj_aliases(sb
);
766 err
= logfs_write_je(sb
, logfs_write_erasecount
);
769 err
= logfs_write_je(sb
, __logfs_write_anchor
);
772 err
= logfs_write_je(sb
, logfs_write_dynsb
);
776 * Order is imperative. First we sync all writes, including the
777 * non-committed journal writes. Then we write the final commit and
778 * sync the current journal segment.
779 * There is a theoretical bug here. Syncing the journal segment will
780 * write a number of journal entries and the final commit. All these
781 * are written in a single operation. If the device layer writes the
782 * data back-to-front, the commit will precede the other journal
783 * entries, leaving a race window.
784 * Two fixes are possible. Preferred is to fix the device layer to
785 * ensure writes happen front-to-back. Alternatively we can insert
786 * another logfs_sync_area() super->s_devops->sync() combo before
787 * writing the commit.
790 * On another subject, super->s_devops->sync is usually not necessary.
791 * Unless called from sys_sync or friends, a barrier would suffice.
793 super
->s_devops
->sync(sb
);
794 err
= logfs_write_je(sb
, logfs_write_commit
);
797 log_journal("Write commit to %llx\n",
798 be64_to_cpu(super
->s_je_array
[super
->s_no_je
- 1]));
799 logfs_sync_area(area
);
800 BUG_ON(area
->a_used_bytes
!= area
->a_written_bytes
);
801 super
->s_devops
->sync(sb
);
803 mutex_unlock(&super
->s_journal_mutex
);
807 void do_logfs_journal_wl_pass(struct super_block
*sb
)
809 struct logfs_super
*super
= logfs_super(sb
);
810 struct logfs_area
*area
= super
->s_journal_area
;
811 struct btree_head32
*head
= &super
->s_reserved_segments
;
815 log_journal("Journal requires wear-leveling.\n");
816 /* Drop old segments */
818 if (super
->s_journal_seg
[i
]) {
819 btree_remove32(head
, super
->s_journal_seg
[i
]);
820 logfs_set_segment_unreserved(sb
,
821 super
->s_journal_seg
[i
],
822 super
->s_journal_ec
[i
]);
823 super
->s_journal_seg
[i
] = 0;
824 super
->s_journal_ec
[i
] = 0;
826 /* Get new segments */
827 for (i
= 0; i
< super
->s_no_journal_segs
; i
++) {
828 segno
= get_best_cand(sb
, &super
->s_reserve_list
, &ec
);
829 super
->s_journal_seg
[i
] = segno
;
830 super
->s_journal_ec
[i
] = ec
;
831 logfs_set_segment_reserved(sb
, segno
);
832 err
= btree_insert32(head
, segno
, (void *)1, GFP_KERNEL
);
833 BUG_ON(err
); /* mempool should prevent this */
834 err
= logfs_erase_segment(sb
, segno
, 1);
835 BUG_ON(err
); /* FIXME: remount-ro would be nicer */
837 /* Manually move journal_area */
838 freeseg(sb
, area
->a_segno
);
839 area
->a_segno
= super
->s_journal_seg
[0];
841 area
->a_used_bytes
= 0;
843 logfs_write_anchor(sb
);
844 /* Write superblocks */
845 err
= logfs_write_sb(sb
);
849 static const struct logfs_area_ops journal_area_ops
= {
850 .get_free_segment
= journal_get_free_segment
,
851 .get_erase_count
= journal_get_erase_count
,
852 .erase_segment
= journal_erase_segment
,
855 int logfs_init_journal(struct super_block
*sb
)
857 struct logfs_super
*super
= logfs_super(sb
);
858 size_t bufsize
= max_t(size_t, sb
->s_blocksize
, super
->s_writesize
)
859 + MAX_JOURNAL_HEADER
;
862 mutex_init(&super
->s_journal_mutex
);
863 btree_init_mempool32(&super
->s_reserved_segments
, super
->s_btree_pool
);
865 super
->s_je
= kzalloc(bufsize
, GFP_KERNEL
);
869 super
->s_compressed_je
= kzalloc(bufsize
, GFP_KERNEL
);
870 if (!super
->s_compressed_je
)
873 super
->s_master_inode
= logfs_new_meta_inode(sb
, LOGFS_INO_MASTER
);
874 if (IS_ERR(super
->s_master_inode
))
875 return PTR_ERR(super
->s_master_inode
);
877 ret
= logfs_read_journal(sb
);
881 reserve_sb_and_journal(sb
);
884 super
->s_journal_area
->a_ops
= &journal_area_ops
;
888 void logfs_cleanup_journal(struct super_block
*sb
)
890 struct logfs_super
*super
= logfs_super(sb
);
892 btree_grim_visitor32(&super
->s_reserved_segments
, 0, NULL
);
893 destroy_meta_inode(super
->s_master_inode
);
894 super
->s_master_inode
= NULL
;
896 kfree(super
->s_compressed_je
);