2 * This file is part of UBIFS.
4 * Copyright (C) 2006-2008 Nokia Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
24 * This file contains journal replay code. It runs when the file-system is being
25 * mounted and requires no locking.
27 * The larger is the journal, the longer it takes to scan it, so the longer it
28 * takes to mount UBIFS. This is why the journal has limited size which may be
29 * changed depending on the system requirements. But a larger journal gives
30 * faster I/O speed because it writes the index less frequently. So this is a
31 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
32 * larger is the journal, the more memory its index may consume.
40 * REPLAY_DELETION: node was deleted
41 * REPLAY_REF: node is a reference node
49 * struct replay_entry - replay tree entry.
50 * @lnum: logical eraseblock number of the node
53 * @sqnum: node sequence number
54 * @flags: replay flags
55 * @rb: links the replay tree
57 * @nm: directory entry name
58 * @old_size: truncation old size
59 * @new_size: truncation new size
60 * @free: amount of free space in a bud
61 * @dirty: amount of dirty space in a bud from padding and deletion nodes
63 * UBIFS journal replay must compare node sequence numbers, which means it must
64 * build a tree of node information to insert into the TNC.
70 unsigned long long sqnum
;
88 * struct bud_entry - entry in the list of buds to replay.
89 * @list: next bud in the list
90 * @bud: bud description object
91 * @free: free bytes in the bud
92 * @sqnum: reference node sequence number
95 struct list_head list
;
96 struct ubifs_bud
*bud
;
98 unsigned long long sqnum
;
102 * set_bud_lprops - set free and dirty space used by a bud.
103 * @c: UBIFS file-system description object
104 * @r: replay entry of bud
106 static int set_bud_lprops(struct ubifs_info
*c
, struct replay_entry
*r
)
108 const struct ubifs_lprops
*lp
;
113 lp
= ubifs_lpt_lookup_dirty(c
, r
->lnum
);
120 if (r
->offs
== 0 && (lp
->free
!= c
->leb_size
|| lp
->dirty
!= 0)) {
122 * The LEB was added to the journal with a starting offset of
123 * zero which means the LEB must have been empty. The LEB
124 * property values should be lp->free == c->leb_size and
125 * lp->dirty == 0, but that is not the case. The reason is that
126 * the LEB was garbage collected. The garbage collector resets
127 * the free and dirty space without recording it anywhere except
128 * lprops, so if there is not a commit then lprops does not have
129 * that information next time the file system is mounted.
131 * We do not need to adjust free space because the scan has told
132 * us the exact value which is recorded in the replay entry as
135 * However we do need to subtract from the dirty space the
136 * amount of space that the garbage collector reclaimed, which
137 * is the whole LEB minus the amount of space that was free.
139 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r
->lnum
,
140 lp
->free
, lp
->dirty
);
141 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r
->lnum
,
142 lp
->free
, lp
->dirty
);
143 dirty
-= c
->leb_size
- lp
->free
;
145 * If the replay order was perfect the dirty space would now be
146 * zero. The order is not perfect because the journal heads
147 * race with each other. This is not a problem but is does mean
148 * that the dirty space may temporarily exceed c->leb_size
152 dbg_msg("LEB %d lp: %d free %d dirty "
153 "replay: %d free %d dirty", r
->lnum
, lp
->free
,
154 lp
->dirty
, r
->free
, r
->dirty
);
156 lp
= ubifs_change_lp(c
, lp
, r
->free
, dirty
+ r
->dirty
,
157 lp
->flags
| LPROPS_TAKEN
, 0);
163 ubifs_release_lprops(c
);
168 * trun_remove_range - apply a replay entry for a truncation to the TNC.
169 * @c: UBIFS file-system description object
170 * @r: replay entry of truncation
172 static int trun_remove_range(struct ubifs_info
*c
, struct replay_entry
*r
)
174 unsigned min_blk
, max_blk
;
175 union ubifs_key min_key
, max_key
;
178 min_blk
= r
->new_size
/ UBIFS_BLOCK_SIZE
;
179 if (r
->new_size
& (UBIFS_BLOCK_SIZE
- 1))
182 max_blk
= r
->old_size
/ UBIFS_BLOCK_SIZE
;
183 if ((r
->old_size
& (UBIFS_BLOCK_SIZE
- 1)) == 0)
186 ino
= key_inum(c
, &r
->key
);
188 data_key_init(c
, &min_key
, ino
, min_blk
);
189 data_key_init(c
, &max_key
, ino
, max_blk
);
191 return ubifs_tnc_remove_range(c
, &min_key
, &max_key
);
195 * apply_replay_entry - apply a replay entry to the TNC.
196 * @c: UBIFS file-system description object
197 * @r: replay entry to apply
199 * Apply a replay entry to the TNC.
201 static int apply_replay_entry(struct ubifs_info
*c
, struct replay_entry
*r
)
203 int err
, deletion
= ((r
->flags
& REPLAY_DELETION
) != 0);
205 dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r
->lnum
,
206 r
->offs
, r
->len
, r
->flags
, r
->sqnum
, DBGKEY(&r
->key
));
208 /* Set c->replay_sqnum to help deal with dangling branches. */
209 c
->replay_sqnum
= r
->sqnum
;
211 if (r
->flags
& REPLAY_REF
)
212 err
= set_bud_lprops(c
, r
);
213 else if (is_hash_key(c
, &r
->key
)) {
215 err
= ubifs_tnc_remove_nm(c
, &r
->key
, &r
->nm
);
217 err
= ubifs_tnc_add_nm(c
, &r
->key
, r
->lnum
, r
->offs
,
221 switch (key_type(c
, &r
->key
)) {
224 ino_t inum
= key_inum(c
, &r
->key
);
226 err
= ubifs_tnc_remove_ino(c
, inum
);
230 err
= trun_remove_range(c
, r
);
233 err
= ubifs_tnc_remove(c
, &r
->key
);
237 err
= ubifs_tnc_add(c
, &r
->key
, r
->lnum
, r
->offs
,
242 if (c
->need_recovery
)
243 err
= ubifs_recover_size_accum(c
, &r
->key
, deletion
,
251 * destroy_replay_tree - destroy the replay.
252 * @c: UBIFS file-system description object
254 * Destroy the replay tree.
256 static void destroy_replay_tree(struct ubifs_info
*c
)
258 struct rb_node
*this = c
->replay_tree
.rb_node
;
259 struct replay_entry
*r
;
263 this = this->rb_left
;
265 } else if (this->rb_right
) {
266 this = this->rb_right
;
269 r
= rb_entry(this, struct replay_entry
, rb
);
270 this = rb_parent(this);
272 if (this->rb_left
== &r
->rb
)
273 this->rb_left
= NULL
;
275 this->rb_right
= NULL
;
277 if (is_hash_key(c
, &r
->key
))
281 c
->replay_tree
= RB_ROOT
;
285 * apply_replay_tree - apply the replay tree to the TNC.
286 * @c: UBIFS file-system description object
288 * Apply the replay tree.
289 * Returns zero in case of success and a negative error code in case of
292 static int apply_replay_tree(struct ubifs_info
*c
)
294 struct rb_node
*this = rb_first(&c
->replay_tree
);
297 struct replay_entry
*r
;
302 r
= rb_entry(this, struct replay_entry
, rb
);
303 err
= apply_replay_entry(c
, r
);
306 this = rb_next(this);
312 * insert_node - insert a node to the replay tree.
313 * @c: UBIFS file-system description object
314 * @lnum: node logical eraseblock number
318 * @sqnum: sequence number
319 * @deletion: non-zero if this is a deletion
320 * @used: number of bytes in use in a LEB
321 * @old_size: truncation old size
322 * @new_size: truncation new size
324 * This function inserts a scanned non-direntry node to the replay tree. The
325 * replay tree is an RB-tree containing @struct replay_entry elements which are
326 * indexed by the sequence number. The replay tree is applied at the very end
327 * of the replay process. Since the tree is sorted in sequence number order,
328 * the older modifications are applied first. This function returns zero in
329 * case of success and a negative error code in case of failure.
331 static int insert_node(struct ubifs_info
*c
, int lnum
, int offs
, int len
,
332 union ubifs_key
*key
, unsigned long long sqnum
,
333 int deletion
, int *used
, loff_t old_size
,
336 struct rb_node
**p
= &c
->replay_tree
.rb_node
, *parent
= NULL
;
337 struct replay_entry
*r
;
339 if (key_inum(c
, key
) >= c
->highest_inum
)
340 c
->highest_inum
= key_inum(c
, key
);
342 dbg_mnt("add LEB %d:%d, key %s", lnum
, offs
, DBGKEY(key
));
345 r
= rb_entry(parent
, struct replay_entry
, rb
);
346 if (sqnum
< r
->sqnum
) {
349 } else if (sqnum
> r
->sqnum
) {
353 ubifs_err("duplicate sqnum in replay");
357 r
= kzalloc(sizeof(struct replay_entry
), GFP_KERNEL
);
362 *used
+= ALIGN(len
, 8);
367 r
->flags
= (deletion
? REPLAY_DELETION
: 0);
368 r
->old_size
= old_size
;
369 r
->new_size
= new_size
;
370 key_copy(c
, key
, &r
->key
);
372 rb_link_node(&r
->rb
, parent
, p
);
373 rb_insert_color(&r
->rb
, &c
->replay_tree
);
378 * insert_dent - insert a directory entry node into the replay tree.
379 * @c: UBIFS file-system description object
380 * @lnum: node logical eraseblock number
384 * @name: directory entry name
385 * @nlen: directory entry name length
386 * @sqnum: sequence number
387 * @deletion: non-zero if this is a deletion
388 * @used: number of bytes in use in a LEB
390 * This function inserts a scanned directory entry node to the replay tree.
391 * Returns zero in case of success and a negative error code in case of
394 * This function is also used for extended attribute entries because they are
395 * implemented as directory entry nodes.
397 static int insert_dent(struct ubifs_info
*c
, int lnum
, int offs
, int len
,
398 union ubifs_key
*key
, const char *name
, int nlen
,
399 unsigned long long sqnum
, int deletion
, int *used
)
401 struct rb_node
**p
= &c
->replay_tree
.rb_node
, *parent
= NULL
;
402 struct replay_entry
*r
;
405 if (key_inum(c
, key
) >= c
->highest_inum
)
406 c
->highest_inum
= key_inum(c
, key
);
408 dbg_mnt("add LEB %d:%d, key %s", lnum
, offs
, DBGKEY(key
));
411 r
= rb_entry(parent
, struct replay_entry
, rb
);
412 if (sqnum
< r
->sqnum
) {
416 if (sqnum
> r
->sqnum
) {
420 ubifs_err("duplicate sqnum in replay");
424 r
= kzalloc(sizeof(struct replay_entry
), GFP_KERNEL
);
427 nbuf
= kmalloc(nlen
+ 1, GFP_KERNEL
);
434 *used
+= ALIGN(len
, 8);
440 memcpy(nbuf
, name
, nlen
);
443 r
->flags
= (deletion
? REPLAY_DELETION
: 0);
444 key_copy(c
, key
, &r
->key
);
447 rb_link_node(&r
->rb
, parent
, p
);
448 rb_insert_color(&r
->rb
, &c
->replay_tree
);
453 * ubifs_validate_entry - validate directory or extended attribute entry node.
454 * @c: UBIFS file-system description object
455 * @dent: the node to validate
457 * This function validates directory or extended attribute entry node @dent.
458 * Returns zero if the node is all right and a %-EINVAL if not.
460 int ubifs_validate_entry(struct ubifs_info
*c
,
461 const struct ubifs_dent_node
*dent
)
463 int key_type
= key_type_flash(c
, dent
->key
);
464 int nlen
= le16_to_cpu(dent
->nlen
);
466 if (le32_to_cpu(dent
->ch
.len
) != nlen
+ UBIFS_DENT_NODE_SZ
+ 1 ||
467 dent
->type
>= UBIFS_ITYPES_CNT
||
468 nlen
> UBIFS_MAX_NLEN
|| dent
->name
[nlen
] != 0 ||
469 strnlen(dent
->name
, nlen
) != nlen
||
470 le64_to_cpu(dent
->inum
) > MAX_INUM
) {
471 ubifs_err("bad %s node", key_type
== UBIFS_DENT_KEY
?
472 "directory entry" : "extended attribute entry");
476 if (key_type
!= UBIFS_DENT_KEY
&& key_type
!= UBIFS_XENT_KEY
) {
477 ubifs_err("bad key type %d", key_type
);
485 * replay_bud - replay a bud logical eraseblock.
486 * @c: UBIFS file-system description object
487 * @lnum: bud logical eraseblock number to replay
488 * @offs: bud start offset
489 * @jhead: journal head to which this bud belongs
490 * @free: amount of free space in the bud is returned here
491 * @dirty: amount of dirty space from padding and deletion nodes is returned
494 * This function returns zero in case of success and a negative error code in
497 static int replay_bud(struct ubifs_info
*c
, int lnum
, int offs
, int jhead
,
498 int *free
, int *dirty
)
500 int err
= 0, used
= 0;
501 struct ubifs_scan_leb
*sleb
;
502 struct ubifs_scan_node
*snod
;
503 struct ubifs_bud
*bud
;
505 dbg_mnt("replay bud LEB %d, head %d", lnum
, jhead
);
506 if (c
->need_recovery
)
507 sleb
= ubifs_recover_leb(c
, lnum
, offs
, c
->sbuf
, jhead
!= GCHD
);
509 sleb
= ubifs_scan(c
, lnum
, offs
, c
->sbuf
);
511 return PTR_ERR(sleb
);
514 * The bud does not have to start from offset zero - the beginning of
515 * the 'lnum' LEB may contain previously committed data. One of the
516 * things we have to do in replay is to correctly update lprops with
517 * newer information about this LEB.
519 * At this point lprops thinks that this LEB has 'c->leb_size - offs'
520 * bytes of free space because it only contain information about
523 * But we know that real amount of free space is 'c->leb_size -
524 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
525 * 'sleb->endpt' is used by bud data. We have to correctly calculate
526 * how much of these data are dirty and update lprops with this
529 * The dirt in that LEB region is comprised of padding nodes, deletion
530 * nodes, truncation nodes and nodes which are obsoleted by subsequent
531 * nodes in this LEB. So instead of calculating clean space, we
532 * calculate used space ('used' variable).
535 list_for_each_entry(snod
, &sleb
->nodes
, list
) {
540 if (snod
->sqnum
>= SQNUM_WATERMARK
) {
541 ubifs_err("file system's life ended");
545 if (snod
->sqnum
> c
->max_sqnum
)
546 c
->max_sqnum
= snod
->sqnum
;
548 switch (snod
->type
) {
551 struct ubifs_ino_node
*ino
= snod
->node
;
552 loff_t new_size
= le64_to_cpu(ino
->size
);
554 if (le32_to_cpu(ino
->nlink
) == 0)
556 err
= insert_node(c
, lnum
, snod
->offs
, snod
->len
,
557 &snod
->key
, snod
->sqnum
, deletion
,
561 case UBIFS_DATA_NODE
:
563 struct ubifs_data_node
*dn
= snod
->node
;
564 loff_t new_size
= le32_to_cpu(dn
->size
) +
565 key_block(c
, &snod
->key
) *
568 err
= insert_node(c
, lnum
, snod
->offs
, snod
->len
,
569 &snod
->key
, snod
->sqnum
, deletion
,
573 case UBIFS_DENT_NODE
:
574 case UBIFS_XENT_NODE
:
576 struct ubifs_dent_node
*dent
= snod
->node
;
578 err
= ubifs_validate_entry(c
, dent
);
582 err
= insert_dent(c
, lnum
, snod
->offs
, snod
->len
,
583 &snod
->key
, dent
->name
,
584 le16_to_cpu(dent
->nlen
), snod
->sqnum
,
585 !le64_to_cpu(dent
->inum
), &used
);
588 case UBIFS_TRUN_NODE
:
590 struct ubifs_trun_node
*trun
= snod
->node
;
591 loff_t old_size
= le64_to_cpu(trun
->old_size
);
592 loff_t new_size
= le64_to_cpu(trun
->new_size
);
595 /* Validate truncation node */
596 if (old_size
< 0 || old_size
> c
->max_inode_sz
||
597 new_size
< 0 || new_size
> c
->max_inode_sz
||
598 old_size
<= new_size
) {
599 ubifs_err("bad truncation node");
604 * Create a fake truncation key just to use the same
605 * functions which expect nodes to have keys.
607 trun_key_init(c
, &key
, le32_to_cpu(trun
->inum
));
608 err
= insert_node(c
, lnum
, snod
->offs
, snod
->len
,
609 &key
, snod
->sqnum
, 1, &used
,
614 ubifs_err("unexpected node type %d in bud LEB %d:%d",
615 snod
->type
, lnum
, snod
->offs
);
623 bud
= ubifs_search_bud(c
, lnum
);
627 ubifs_assert(sleb
->endpt
- offs
>= used
);
628 ubifs_assert(sleb
->endpt
% c
->min_io_size
== 0);
630 if (sleb
->endpt
+ c
->min_io_size
<= c
->leb_size
&&
631 !(c
->vfs_sb
->s_flags
& MS_RDONLY
))
632 err
= ubifs_wbuf_seek_nolock(&c
->jheads
[jhead
].wbuf
, lnum
,
633 sleb
->endpt
, UBI_SHORTTERM
);
635 *dirty
= sleb
->endpt
- offs
- used
;
636 *free
= c
->leb_size
- sleb
->endpt
;
639 ubifs_scan_destroy(sleb
);
643 ubifs_err("bad node is at LEB %d:%d", lnum
, snod
->offs
);
644 dbg_dump_node(c
, snod
->node
);
645 ubifs_scan_destroy(sleb
);
650 * insert_ref_node - insert a reference node to the replay tree.
651 * @c: UBIFS file-system description object
652 * @lnum: node logical eraseblock number
654 * @sqnum: sequence number
655 * @free: amount of free space in bud
656 * @dirty: amount of dirty space from padding and deletion nodes
658 * This function inserts a reference node to the replay tree and returns zero
659 * in case of success or a negative error code in case of failure.
661 static int insert_ref_node(struct ubifs_info
*c
, int lnum
, int offs
,
662 unsigned long long sqnum
, int free
, int dirty
)
664 struct rb_node
**p
= &c
->replay_tree
.rb_node
, *parent
= NULL
;
665 struct replay_entry
*r
;
667 dbg_mnt("add ref LEB %d:%d", lnum
, offs
);
670 r
= rb_entry(parent
, struct replay_entry
, rb
);
671 if (sqnum
< r
->sqnum
) {
674 } else if (sqnum
> r
->sqnum
) {
678 ubifs_err("duplicate sqnum in replay tree");
682 r
= kzalloc(sizeof(struct replay_entry
), GFP_KERNEL
);
689 r
->flags
= REPLAY_REF
;
693 rb_link_node(&r
->rb
, parent
, p
);
694 rb_insert_color(&r
->rb
, &c
->replay_tree
);
699 * replay_buds - replay all buds.
700 * @c: UBIFS file-system description object
702 * This function returns zero in case of success and a negative error code in
705 static int replay_buds(struct ubifs_info
*c
)
708 int err
, uninitialized_var(free
), uninitialized_var(dirty
);
710 list_for_each_entry(b
, &c
->replay_buds
, list
) {
711 err
= replay_bud(c
, b
->bud
->lnum
, b
->bud
->start
, b
->bud
->jhead
,
715 err
= insert_ref_node(c
, b
->bud
->lnum
, b
->bud
->start
, b
->sqnum
,
725 * destroy_bud_list - destroy the list of buds to replay.
726 * @c: UBIFS file-system description object
728 static void destroy_bud_list(struct ubifs_info
*c
)
732 while (!list_empty(&c
->replay_buds
)) {
733 b
= list_entry(c
->replay_buds
.next
, struct bud_entry
, list
);
740 * add_replay_bud - add a bud to the list of buds to replay.
741 * @c: UBIFS file-system description object
742 * @lnum: bud logical eraseblock number to replay
743 * @offs: bud start offset
744 * @jhead: journal head to which this bud belongs
745 * @sqnum: reference node sequence number
747 * This function returns zero in case of success and a negative error code in
750 static int add_replay_bud(struct ubifs_info
*c
, int lnum
, int offs
, int jhead
,
751 unsigned long long sqnum
)
753 struct ubifs_bud
*bud
;
756 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum
, offs
, jhead
);
758 bud
= kmalloc(sizeof(struct ubifs_bud
), GFP_KERNEL
);
762 b
= kmalloc(sizeof(struct bud_entry
), GFP_KERNEL
);
771 ubifs_add_bud(c
, bud
);
775 list_add_tail(&b
->list
, &c
->replay_buds
);
781 * validate_ref - validate a reference node.
782 * @c: UBIFS file-system description object
783 * @ref: the reference node to validate
784 * @ref_lnum: LEB number of the reference node
785 * @ref_offs: reference node offset
787 * This function returns %1 if a bud reference already exists for the LEB. %0 is
788 * returned if the reference node is new, otherwise %-EINVAL is returned if
791 static int validate_ref(struct ubifs_info
*c
, const struct ubifs_ref_node
*ref
)
793 struct ubifs_bud
*bud
;
794 int lnum
= le32_to_cpu(ref
->lnum
);
795 unsigned int offs
= le32_to_cpu(ref
->offs
);
796 unsigned int jhead
= le32_to_cpu(ref
->jhead
);
799 * ref->offs may point to the end of LEB when the journal head points
800 * to the end of LEB and we write reference node for it during commit.
801 * So this is why we require 'offs > c->leb_size'.
803 if (jhead
>= c
->jhead_cnt
|| lnum
>= c
->leb_cnt
||
804 lnum
< c
->main_first
|| offs
> c
->leb_size
||
805 offs
& (c
->min_io_size
- 1))
808 /* Make sure we have not already looked at this bud */
809 bud
= ubifs_search_bud(c
, lnum
);
811 if (bud
->jhead
== jhead
&& bud
->start
<= offs
)
813 ubifs_err("bud at LEB %d:%d was already referred", lnum
, offs
);
821 * replay_log_leb - replay a log logical eraseblock.
822 * @c: UBIFS file-system description object
823 * @lnum: log logical eraseblock to replay
824 * @offs: offset to start replaying from
827 * This function replays a log LEB and returns zero in case of success, %1 if
828 * this is the last LEB in the log, and a negative error code in case of
831 static int replay_log_leb(struct ubifs_info
*c
, int lnum
, int offs
, void *sbuf
)
834 struct ubifs_scan_leb
*sleb
;
835 struct ubifs_scan_node
*snod
;
836 const struct ubifs_cs_node
*node
;
838 dbg_mnt("replay log LEB %d:%d", lnum
, offs
);
839 sleb
= ubifs_scan(c
, lnum
, offs
, sbuf
);
841 if (PTR_ERR(sleb
) != -EUCLEAN
|| !c
->need_recovery
)
842 return PTR_ERR(sleb
);
843 sleb
= ubifs_recover_log_leb(c
, lnum
, offs
, sbuf
);
845 return PTR_ERR(sleb
);
848 if (sleb
->nodes_cnt
== 0) {
855 snod
= list_entry(sleb
->nodes
.next
, struct ubifs_scan_node
, list
);
856 if (c
->cs_sqnum
== 0) {
858 * This is the first log LEB we are looking at, make sure that
859 * the first node is a commit start node. Also record its
860 * sequence number so that UBIFS can determine where the log
861 * ends, because all nodes which were have higher sequence
864 if (snod
->type
!= UBIFS_CS_NODE
) {
865 dbg_err("first log node at LEB %d:%d is not CS node",
869 if (le64_to_cpu(node
->cmt_no
) != c
->cmt_no
) {
870 dbg_err("first CS node at LEB %d:%d has wrong "
871 "commit number %llu expected %llu",
873 (unsigned long long)le64_to_cpu(node
->cmt_no
),
878 c
->cs_sqnum
= le64_to_cpu(node
->ch
.sqnum
);
879 dbg_mnt("commit start sqnum %llu", c
->cs_sqnum
);
882 if (snod
->sqnum
< c
->cs_sqnum
) {
884 * This means that we reached end of log and now
885 * look to the older log data, which was already
886 * committed but the eraseblock was not erased (UBIFS
887 * only un-maps it). So this basically means we have to
888 * exit with "end of log" code.
894 /* Make sure the first node sits at offset zero of the LEB */
895 if (snod
->offs
!= 0) {
896 dbg_err("first node is not at zero offset");
900 list_for_each_entry(snod
, &sleb
->nodes
, list
) {
904 if (snod
->sqnum
>= SQNUM_WATERMARK
) {
905 ubifs_err("file system's life ended");
909 if (snod
->sqnum
< c
->cs_sqnum
) {
910 dbg_err("bad sqnum %llu, commit sqnum %llu",
911 snod
->sqnum
, c
->cs_sqnum
);
915 if (snod
->sqnum
> c
->max_sqnum
)
916 c
->max_sqnum
= snod
->sqnum
;
918 switch (snod
->type
) {
919 case UBIFS_REF_NODE
: {
920 const struct ubifs_ref_node
*ref
= snod
->node
;
922 err
= validate_ref(c
, ref
);
924 break; /* Already have this bud */
928 err
= add_replay_bud(c
, le32_to_cpu(ref
->lnum
),
929 le32_to_cpu(ref
->offs
),
930 le32_to_cpu(ref
->jhead
),
938 /* Make sure it sits at the beginning of LEB */
939 if (snod
->offs
!= 0) {
940 ubifs_err("unexpected node in log");
945 ubifs_err("unexpected node in log");
950 if (sleb
->endpt
|| c
->lhead_offs
>= c
->leb_size
) {
951 c
->lhead_lnum
= lnum
;
952 c
->lhead_offs
= sleb
->endpt
;
957 ubifs_scan_destroy(sleb
);
961 ubifs_err("log error detected while replaying the log at LEB %d:%d",
962 lnum
, offs
+ snod
->offs
);
963 dbg_dump_node(c
, snod
->node
);
964 ubifs_scan_destroy(sleb
);
969 * take_ihead - update the status of the index head in lprops to 'taken'.
970 * @c: UBIFS file-system description object
972 * This function returns the amount of free space in the index head LEB or a
973 * negative error code.
975 static int take_ihead(struct ubifs_info
*c
)
977 const struct ubifs_lprops
*lp
;
982 lp
= ubifs_lpt_lookup_dirty(c
, c
->ihead_lnum
);
990 lp
= ubifs_change_lp(c
, lp
, LPROPS_NC
, LPROPS_NC
,
991 lp
->flags
| LPROPS_TAKEN
, 0);
999 ubifs_release_lprops(c
);
1004 * ubifs_replay_journal - replay journal.
1005 * @c: UBIFS file-system description object
1007 * This function scans the journal, replays and cleans it up. It makes sure all
1008 * memory data structures related to uncommitted journal are built (dirty TNC
1009 * tree, tree of buds, modified lprops, etc).
1011 int ubifs_replay_journal(struct ubifs_info
*c
)
1013 int err
, i
, lnum
, offs
, free
;
1016 BUILD_BUG_ON(UBIFS_TRUN_KEY
> 5);
1018 /* Update the status of the index head in lprops to 'taken' */
1019 free
= take_ihead(c
);
1021 return free
; /* Error code */
1023 if (c
->ihead_offs
!= c
->leb_size
- free
) {
1024 ubifs_err("bad index head LEB %d:%d", c
->ihead_lnum
,
1029 sbuf
= vmalloc(c
->leb_size
);
1033 dbg_mnt("start replaying the journal");
1037 lnum
= c
->ltail_lnum
= c
->lhead_lnum
;
1038 offs
= c
->lhead_offs
;
1040 for (i
= 0; i
< c
->log_lebs
; i
++, lnum
++) {
1041 if (lnum
>= UBIFS_LOG_LNUM
+ c
->log_lebs
) {
1043 * The log is logically circular, we reached the last
1044 * LEB, switch to the first one.
1046 lnum
= UBIFS_LOG_LNUM
;
1049 err
= replay_log_leb(c
, lnum
, offs
, sbuf
);
1051 /* We hit the end of the log */
1058 err
= replay_buds(c
);
1062 err
= apply_replay_tree(c
);
1067 * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable
1068 * to roughly estimate index growth. Things like @c->min_idx_lebs
1069 * depend on it. This means we have to initialize it to make sure
1070 * budgeting works properly.
1072 c
->budg_uncommitted_idx
= atomic_long_read(&c
->dirty_zn_cnt
);
1073 c
->budg_uncommitted_idx
*= c
->max_idx_node_sz
;
1075 ubifs_assert(c
->bud_bytes
<= c
->max_bud_bytes
|| c
->need_recovery
);
1076 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
1077 "highest_inum %lu", c
->lhead_lnum
, c
->lhead_offs
, c
->max_sqnum
,
1078 (unsigned long)c
->highest_inum
);
1080 destroy_replay_tree(c
);
1081 destroy_bud_list(c
);