1 /* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
4 /* Reiser4 Wandering Log */
6 /* You should read http://www.namesys.com/txn-doc.html
8 That describes how filesystem operations are performed as atomic
9 transactions, and how we try to arrange it so that we can write most of the
10 data only once while performing the operation atomically.
12 For the purposes of this code, it is enough for it to understand that it
13 has been told a given block should be written either once, or twice (if
14 twice then once to the wandered location and once to the real location).
16 This code guarantees that those blocks that are defined to be part of an
17 atom either all take effect or none of them take effect.
19 The "relocate set" of nodes are submitted to write by the jnode_flush()
20 routine, and the "overwrite set" is submitted by reiser4_write_log().
21 This is because with the overwrite set we seek to optimize writes, and
22 with the relocate set we seek to cause disk order to correlate with the
23 "parent first order" (preorder).
25 reiser4_write_log() allocates and writes wandered blocks and maintains
26 additional on-disk structures of the atom as wander records (each wander
27 record occupies one block) for storing of the "wandered map" (a table which
28 contains a relation between wandered and real block numbers) and other
29 information which might be needed at transaction recovery time.
31 The wander records are unidirectionally linked into a circle: each wander
32 record contains a block number of the next wander record, the last wander
33 record points to the first one.
35 One wander record (named "tx head" in this file) has a format which is
36 different from the other wander records. The "tx head" has a reference to the
37 "tx head" block of the previously committed atom. Also, "tx head" contains
38 fs information (the free blocks counter, and the oid allocator state) which
39 is logged in a special way .
41 There are two journal control blocks, named journal header and journal
42 footer which have fixed on-disk locations. The journal header has a
43 reference to the "tx head" block of the last committed atom. The journal
44 footer points to the "tx head" of the last flushed atom. The atom is
45 "played" when all blocks from its overwrite set are written to disk the
46 second time (i.e. written to their real locations).
48 NOTE: People who know reiserfs internals and its journal structure might be
49 confused with these terms journal footer and journal header. There is a table
50 with terms of similar semantics in reiserfs (reiser3) and reiser4:
52 REISER3 TERM | REISER4 TERM | DESCRIPTION
53 --------------------+-----------------------+----------------------------
54 commit record | journal header | atomic write of this record
55 | | ends transaction commit
56 --------------------+-----------------------+----------------------------
57 journal header | journal footer | atomic write of this record
58 | | ends post-commit writes.
60 | | writing of this journal
61 | | blocks (in reiser3) or
62 | | wandered blocks/records are
64 --------------------+-----------------------+----------------------------
66 The atom commit process is the following:
68 1. The overwrite set is taken from atom's clean list, and its size is
71 2. The number of necessary wander records (including tx head) is calculated,
72 and the wander record blocks are allocated.
74 3. Allocate wandered blocks and populate wander records by wandered map.
76 4. submit write requests for wander records and wandered blocks.
78 5. wait until submitted write requests complete.
80 6. update journal header: change the pointer to the block number of just
81 written tx head, submit an i/o for modified journal header block and wait
84 NOTE: The special logging for bitmap blocks and some reiser4 super block
85 fields makes processes of atom commit, flush and recovering a bit more
86 complex (see comments in the source code for details).
88 The atom playing process is the following:
90 1. Write atom's overwrite set in-place.
94 3. Update journal footer: change the pointer to block number of tx head
95 block of the atom we currently flushing, submit an i/o, wait on i/o
98 4. Free disk space which was used for wandered blocks and wander records.
100 After the freeing of wandered blocks and wander records we have that journal
101 footer points to the on-disk structure which might be overwritten soon.
102 Neither the log writer nor the journal recovery procedure use that pointer
103 for accessing the data. When the journal recovery procedure finds the oldest
104 transaction it compares the journal footer pointer value with the "prev_tx"
105 pointer value in tx head, if values are equal the oldest not flushed
106 transaction is found.
108 NOTE on disk space leakage: the information about of what blocks and how many
109 blocks are allocated for wandered blocks, wandered records is not written to
110 the disk because of special logging for bitmaps and some super blocks
111 counters. After a system crash we the reiser4 does not remember those
112 objects allocation, thus we have no such a kind of disk space leakage.
115 /* Special logging of reiser4 super block fields. */
117 /* There are some reiser4 super block fields (free block count and OID allocator
118 state (number of files and next free OID) which are logged separately from
119 super block to avoid unnecessary atom fusion.
121 So, the reiser4 super block can be not captured by a transaction with
122 allocates/deallocates disk blocks or create/delete file objects. Moreover,
123 the reiser4 on-disk super block is not touched when such a transaction is
124 committed and flushed. Those "counters logged specially" are logged in "tx
125 head" blocks and in the journal footer block.
127 A step-by-step description of special logging:
129 0. The per-atom information about deleted or created files and allocated or
130 freed blocks is collected during the transaction. The atom's
131 ->nr_objects_created and ->nr_objects_deleted are for object
132 deletion/creation tracking, the numbers of allocated and freed blocks are
133 calculated using atom's delete set and atom's capture list -- all new and
134 relocated nodes should be on atom's clean list and should have JNODE_RELOC
137 1. The "logged specially" reiser4 super block fields have their "committed"
138 versions in the reiser4 in-memory super block. They get modified only at
139 atom commit time. The atom's commit thread has an exclusive access to those
140 "committed" fields because the log writer implementation supports only one
141 atom commit a time (there is a per-fs "commit" mutex). At
142 that time "committed" counters are modified using per-atom information
143 collected during the transaction. These counters are stored on disk as a
144 part of tx head block when atom is committed.
146 2. When the atom is flushed the value of the free block counter and the OID
147 allocator state get written to the journal footer block. A special journal
148 procedure (journal_recover_sb_data()) takes those values from the journal
149 footer and updates the reiser4 in-memory super block.
151 NOTE: That means free block count and OID allocator state are logged
152 separately from the reiser4 super block regardless of the fact that the
153 reiser4 super block has fields to store both the free block counter and the
156 Writing the whole super block at commit time requires knowing true values of
157 all its fields without changes made by not yet committed transactions. It is
158 possible by having their "committed" version of the super block like the
159 reiser4 bitmap blocks have "committed" and "working" versions. However,
160 another scheme was implemented which stores special logged values in the
161 unused free space inside transaction head block. In my opinion it has an
162 advantage of not writing whole super block when only part of it was
170 #include "block_alloc.h"
171 #include "page_cache.h"
176 #include "writeout.h"
180 #include <linux/types.h>
181 #include <linux/fs.h> /* for struct super_block */
182 #include <linux/mm.h> /* for struct page */
183 #include <linux/pagemap.h>
184 #include <linux/bio.h> /* for struct bio */
185 #include <linux/blkdev.h>
187 static int write_jnodes_to_disk_extent(
188 jnode
*, int, const reiser4_block_nr
*, flush_queue_t
*, int);
190 /* The commit_handle is a container for objects needed at atom commit time */
191 struct commit_handle
{
192 /* A pointer to atom's list of OVRWR nodes */
193 struct list_head
*overwrite_set
;
194 /* atom's overwrite set size */
195 int overwrite_set_size
;
196 /* jnodes for wander record blocks */
197 struct list_head tx_list
;
198 /* number of wander records */
200 /* 'committed' sb counters are saved here until atom is completely
205 /* A pointer to the atom which is being committed */
207 /* A pointer to current super block */
208 struct super_block
*super
;
209 /* The counter of modified bitmaps */
210 reiser4_block_nr nr_bitmap
;
213 static void init_commit_handle(struct commit_handle
*ch
, txn_atom
*atom
)
215 memset(ch
, 0, sizeof(struct commit_handle
));
216 INIT_LIST_HEAD(&ch
->tx_list
);
219 ch
->super
= reiser4_get_current_sb();
222 static void done_commit_handle(struct commit_handle
*ch
)
224 assert("zam-690", list_empty(&ch
->tx_list
));
227 static inline int reiser4_use_write_barrier(struct super_block
* s
)
229 return !reiser4_is_set(s
, REISER4_NO_WRITE_BARRIER
);
232 static void disable_write_barrier(struct super_block
* s
)
234 notice("zam-1055", "%s does not support write barriers,"
235 " using synchronous write instead.", s
->s_id
);
236 set_bit((int)REISER4_NO_WRITE_BARRIER
, &get_super_private(s
)->fs_flags
);
239 /* fill journal header block data */
240 static void format_journal_header(struct commit_handle
*ch
)
242 struct reiser4_super_info_data
*sbinfo
;
243 struct journal_header
*header
;
246 sbinfo
= get_super_private(ch
->super
);
247 assert("zam-479", sbinfo
!= NULL
);
248 assert("zam-480", sbinfo
->journal_header
!= NULL
);
250 txhead
= list_entry(ch
->tx_list
.next
, jnode
, capture_link
);
252 jload(sbinfo
->journal_header
);
254 header
= (struct journal_header
*)jdata(sbinfo
->journal_header
);
255 assert("zam-484", header
!= NULL
);
257 put_unaligned(cpu_to_le64(*jnode_get_block(txhead
)),
258 &header
->last_committed_tx
);
260 jrelse(sbinfo
->journal_header
);
263 /* fill journal footer block data */
264 static void format_journal_footer(struct commit_handle
*ch
)
266 struct reiser4_super_info_data
*sbinfo
;
267 struct journal_footer
*footer
;
270 sbinfo
= get_super_private(ch
->super
);
272 tx_head
= list_entry(ch
->tx_list
.next
, jnode
, capture_link
);
274 assert("zam-493", sbinfo
!= NULL
);
275 assert("zam-494", sbinfo
->journal_header
!= NULL
);
277 check_me("zam-691", jload(sbinfo
->journal_footer
) == 0);
279 footer
= (struct journal_footer
*)jdata(sbinfo
->journal_footer
);
280 assert("zam-495", footer
!= NULL
);
282 put_unaligned(cpu_to_le64(*jnode_get_block(tx_head
)),
283 &footer
->last_flushed_tx
);
284 put_unaligned(cpu_to_le64(ch
->free_blocks
), &footer
->free_blocks
);
286 put_unaligned(cpu_to_le64(ch
->nr_files
), &footer
->nr_files
);
287 put_unaligned(cpu_to_le64(ch
->next_oid
), &footer
->next_oid
);
289 jrelse(sbinfo
->journal_footer
);
292 /* wander record capacity depends on current block size */
293 static int wander_record_capacity(const struct super_block
*super
)
295 return (super
->s_blocksize
-
296 sizeof(struct wander_record_header
)) /
297 sizeof(struct wander_entry
);
300 /* Fill first wander record (tx head) in accordance with supplied given data */
301 static void format_tx_head(struct commit_handle
*ch
)
305 struct tx_header
*header
;
307 tx_head
= list_entry(ch
->tx_list
.next
, jnode
, capture_link
);
308 assert("zam-692", &ch
->tx_list
!= &tx_head
->capture_link
);
310 next
= list_entry(tx_head
->capture_link
.next
, jnode
, capture_link
);
311 if (&ch
->tx_list
== &next
->capture_link
)
314 header
= (struct tx_header
*)jdata(tx_head
);
316 assert("zam-460", header
!= NULL
);
317 assert("zam-462", ch
->super
->s_blocksize
>= sizeof(struct tx_header
));
319 memset(jdata(tx_head
), 0, (size_t) ch
->super
->s_blocksize
);
320 memcpy(jdata(tx_head
), TX_HEADER_MAGIC
, TX_HEADER_MAGIC_SIZE
);
322 put_unaligned(cpu_to_le32(ch
->tx_size
), &header
->total
);
323 put_unaligned(cpu_to_le64(get_super_private(ch
->super
)->last_committed_tx
),
325 put_unaligned(cpu_to_le64(*jnode_get_block(next
)), &header
->next_block
);
326 put_unaligned(cpu_to_le64(ch
->free_blocks
), &header
->free_blocks
);
327 put_unaligned(cpu_to_le64(ch
->nr_files
), &header
->nr_files
);
328 put_unaligned(cpu_to_le64(ch
->next_oid
), &header
->next_oid
);
331 /* prepare ordinary wander record block (fill all service fields) */
333 format_wander_record(struct commit_handle
*ch
, jnode
*node
, __u32 serial
)
335 struct wander_record_header
*LRH
;
338 assert("zam-464", node
!= NULL
);
340 LRH
= (struct wander_record_header
*)jdata(node
);
341 next
= list_entry(node
->capture_link
.next
, jnode
, capture_link
);
343 if (&ch
->tx_list
== &next
->capture_link
)
344 next
= list_entry(ch
->tx_list
.next
, jnode
, capture_link
);
346 assert("zam-465", LRH
!= NULL
);
348 ch
->super
->s_blocksize
> sizeof(struct wander_record_header
));
350 memset(jdata(node
), 0, (size_t) ch
->super
->s_blocksize
);
351 memcpy(jdata(node
), WANDER_RECORD_MAGIC
, WANDER_RECORD_MAGIC_SIZE
);
353 put_unaligned(cpu_to_le32(ch
->tx_size
), &LRH
->total
);
354 put_unaligned(cpu_to_le32(serial
), &LRH
->serial
);
355 put_unaligned(cpu_to_le64(*jnode_get_block(next
)), &LRH
->next_block
);
358 /* add one wandered map entry to formatted wander record */
360 store_entry(jnode
* node
, int index
, const reiser4_block_nr
* a
,
361 const reiser4_block_nr
* b
)
364 struct wander_entry
*pairs
;
367 assert("zam-451", data
!= NULL
);
370 (struct wander_entry
*)(data
+ sizeof(struct wander_record_header
));
372 put_unaligned(cpu_to_le64(*a
), &pairs
[index
].original
);
373 put_unaligned(cpu_to_le64(*b
), &pairs
[index
].wandered
);
376 /* currently, wander records contains contain only wandered map, which depend on
377 overwrite set size */
378 static void get_tx_size(struct commit_handle
*ch
)
380 assert("zam-440", ch
->overwrite_set_size
!= 0);
381 assert("zam-695", ch
->tx_size
== 0);
383 /* count all ordinary wander records
384 (<overwrite_set_size> - 1) / <wander_record_capacity> + 1 and add one
387 (ch
->overwrite_set_size
- 1) / wander_record_capacity(ch
->super
) +
391 /* A special structure for using in store_wmap_actor() for saving its state
393 struct store_wmap_params
{
394 jnode
*cur
; /* jnode of current wander record to fill */
395 int idx
; /* free element index in wander record */
396 int capacity
; /* capacity */
399 struct list_head
*tx_list
;
403 /* an actor for use in blocknr_set_iterator routine which populates the list
404 of pre-formatted wander records by wandered map info */
406 store_wmap_actor(txn_atom
* atom UNUSED_ARG
, const reiser4_block_nr
* a
,
407 const reiser4_block_nr
* b
, void *data
)
409 struct store_wmap_params
*params
= data
;
411 if (params
->idx
>= params
->capacity
) {
412 /* a new wander record should be taken from the tx_list */
413 params
->cur
= list_entry(params
->cur
->capture_link
.next
, jnode
, capture_link
);
415 params
->tx_list
!= ¶ms
->cur
->capture_link
);
420 store_entry(params
->cur
, params
->idx
, a
, b
);
426 /* This function is called after Relocate set gets written to disk, Overwrite
427 set is written to wandered locations and all wander records are written
428 also. Updated journal header blocks contains a pointer (block number) to
429 first wander record of the just written transaction */
430 static int update_journal_header(struct commit_handle
*ch
, int use_barrier
)
432 struct reiser4_super_info_data
*sbinfo
= get_super_private(ch
->super
);
433 jnode
*jh
= sbinfo
->journal_header
;
434 jnode
*head
= list_entry(ch
->tx_list
.next
, jnode
, capture_link
);
437 format_journal_header(ch
);
439 ret
= write_jnodes_to_disk_extent(jh
, 1, jnode_get_block(jh
), NULL
,
440 use_barrier
? WRITEOUT_BARRIER
: 0);
444 /* blk_run_address_space(sbinfo->fake->i_mapping);
445 * blk_run_queues(); */
447 ret
= jwait_io(jh
, WRITE
);
452 sbinfo
->last_committed_tx
= *jnode_get_block(head
);
457 /* This function is called after write-back is finished. We update journal
458 footer block and free blocks which were occupied by wandered blocks and
459 transaction wander records */
460 static int update_journal_footer(struct commit_handle
*ch
, int use_barrier
)
462 reiser4_super_info_data
*sbinfo
= get_super_private(ch
->super
);
464 jnode
*jf
= sbinfo
->journal_footer
;
468 format_journal_footer(ch
);
470 ret
= write_jnodes_to_disk_extent(jf
, 1, jnode_get_block(jf
), NULL
,
471 use_barrier
? WRITEOUT_BARRIER
: 0);
475 /* blk_run_address_space(sbinfo->fake->i_mapping);
476 * blk_run_queue(); */
478 ret
= jwait_io(jf
, WRITE
);
485 /* free block numbers of wander records of already written in place transaction */
486 static void dealloc_tx_list(struct commit_handle
*ch
)
488 while (!list_empty(&ch
->tx_list
)) {
489 jnode
*cur
= list_entry(ch
->tx_list
.next
, jnode
, capture_link
);
490 list_del(&cur
->capture_link
);
491 ON_DEBUG(INIT_LIST_HEAD(&cur
->capture_link
));
492 reiser4_dealloc_block(jnode_get_block(cur
), BLOCK_NOT_COUNTED
,
495 unpin_jnode_data(cur
);
496 reiser4_drop_io_head(cur
);
500 /* An actor for use in block_nr_iterator() routine which frees wandered blocks
501 from atom's overwrite set. */
503 dealloc_wmap_actor(txn_atom
* atom UNUSED_ARG
,
504 const reiser4_block_nr
* a UNUSED_ARG
,
505 const reiser4_block_nr
* b
, void *data UNUSED_ARG
)
508 assert("zam-499", b
!= NULL
);
509 assert("zam-500", *b
!= 0);
510 assert("zam-501", !reiser4_blocknr_is_fake(b
));
512 reiser4_dealloc_block(b
, BLOCK_NOT_COUNTED
, BA_FORMATTED
);
516 /* free wandered block locations of already written in place transaction */
517 static void dealloc_wmap(struct commit_handle
*ch
)
519 assert("zam-696", ch
->atom
!= NULL
);
521 blocknr_set_iterator(ch
->atom
, &ch
->atom
->wandered_map
,
522 dealloc_wmap_actor
, NULL
, 1);
525 /* helper function for alloc wandered blocks, which refill set of block
526 numbers needed for wandered blocks */
528 get_more_wandered_blocks(int count
, reiser4_block_nr
* start
, int *len
)
530 reiser4_blocknr_hint hint
;
533 reiser4_block_nr wide_len
= count
;
535 /* FIXME-ZAM: A special policy needed for allocation of wandered blocks
536 ZAM-FIXME-HANS: yes, what happened to our discussion of using a fixed
537 reserved allocation area so as to get the best qualities of fixed
539 reiser4_blocknr_hint_init(&hint
);
540 hint
.block_stage
= BLOCK_GRABBED
;
542 ret
= reiser4_alloc_blocks(&hint
, start
, &wide_len
,
543 BA_FORMATTED
| BA_USE_DEFAULT_SEARCH_START
);
544 *len
= (int)wide_len
;
550 * roll back changes made before issuing BIO in the case of IO error.
552 static void undo_bio(struct bio
*bio
)
556 for (i
= 0; i
< bio
->bi_vcnt
; ++i
) {
560 pg
= bio
->bi_io_vec
[i
].bv_page
;
561 end_page_writeback(pg
);
563 spin_lock_jnode(node
);
564 JF_CLR(node
, JNODE_WRITEBACK
);
565 JF_SET(node
, JNODE_DIRTY
);
566 spin_unlock_jnode(node
);
571 /* put overwrite set back to atom's clean list */
572 static void put_overwrite_set(struct commit_handle
*ch
)
576 list_for_each_entry(cur
, ch
->overwrite_set
, capture_link
)
580 /* Count overwrite set size, grab disk space for wandered blocks allocation.
581 Since we have a separate list for atom's overwrite set we just scan the list,
582 count bitmap and other not leaf nodes which wandered blocks allocation we
583 have to grab space for. */
584 static int get_overwrite_set(struct commit_handle
*ch
)
588 __u64 nr_not_leaves
= 0;
590 __u64 nr_formatted_leaves
= 0;
591 __u64 nr_unformatted_leaves
= 0;
594 assert("zam-697", ch
->overwrite_set_size
== 0);
596 ch
->overwrite_set
= ATOM_OVRWR_LIST(ch
->atom
);
597 cur
= list_entry(ch
->overwrite_set
->next
, jnode
, capture_link
);
599 while (ch
->overwrite_set
!= &cur
->capture_link
) {
600 jnode
*next
= list_entry(cur
->capture_link
.next
, jnode
, capture_link
);
602 /* Count bitmap locks for getting correct statistics what number
603 * of blocks were cleared by the transaction commit. */
604 if (jnode_get_type(cur
) == JNODE_BITMAP
)
607 assert("zam-939", JF_ISSET(cur
, JNODE_OVRWR
)
608 || jnode_get_type(cur
) == JNODE_BITMAP
);
610 if (jnode_is_znode(cur
) && znode_above_root(JZNODE(cur
))) {
611 /* we replace fake znode by another (real)
612 znode which is suggested by disk_layout
615 /* FIXME: it looks like fake znode should be
616 replaced by jnode supplied by
619 struct super_block
*s
= reiser4_get_current_sb();
620 reiser4_super_info_data
*sbinfo
=
621 get_current_super_private();
623 if (sbinfo
->df_plug
->log_super
) {
624 jnode
*sj
= sbinfo
->df_plug
->log_super(s
);
626 assert("zam-593", sj
!= NULL
);
632 JF_SET(sj
, JNODE_OVRWR
);
633 insert_into_atom_ovrwr_list(ch
->atom
, sj
);
634 spin_unlock_jnode(sj
);
636 /* jload it as the rest of overwrite set */
637 jload_gfp(sj
, reiser4_ctx_gfp_mask_get(), 0);
639 ch
->overwrite_set_size
++;
641 spin_lock_jnode(cur
);
642 reiser4_uncapture_block(cur
);
647 ch
->overwrite_set_size
++;
648 ret
= jload_gfp(cur
, reiser4_ctx_gfp_mask_get(), 0);
650 reiser4_panic("zam-783",
651 "cannot load e-flushed jnode back (ret = %d)\n",
655 /* Count not leaves here because we have to grab disk space
656 * for wandered blocks. They were not counted as "flush
657 * reserved". Counting should be done _after_ nodes are pinned
658 * into memory by jload(). */
659 if (!jnode_is_leaf(cur
))
663 /* at this point @cur either has JNODE_FLUSH_RESERVED
664 * or is eflushed. Locking is not strong enough to
665 * write an assertion checking for this. */
666 if (jnode_is_znode(cur
))
667 nr_formatted_leaves
++;
669 nr_unformatted_leaves
++;
671 JF_CLR(cur
, JNODE_FLUSH_RESERVED
);
677 /* Grab space for writing (wandered blocks) of not leaves found in
679 ret
= reiser4_grab_space_force(nr_not_leaves
, BA_RESERVED
);
683 /* Disk space for allocation of wandered blocks of leaf nodes already
684 * reserved as "flush reserved", move it to grabbed space counter. */
685 spin_lock_atom(ch
->atom
);
687 nr_formatted_leaves
+ nr_unformatted_leaves
<=
688 ch
->atom
->flush_reserved
);
689 flush_reserved2grabbed(ch
->atom
, ch
->atom
->flush_reserved
);
690 spin_unlock_atom(ch
->atom
);
692 return ch
->overwrite_set_size
;
696 * write_jnodes_to_disk_extent - submit write request
698 * @first: first jnode of the list
699 * @nr: number of jnodes on the list
702 * @flags: used to decide whether page is to get PG_reclaim flag
704 * Submits a write request for @nr jnodes beginning from the @first, other
705 * jnodes are after the @first on the double-linked "capture" list. All jnodes
706 * will be written to the disk region of @nr blocks starting with @block_p block
707 * number. If @fq is not NULL it means that waiting for i/o completion will be
708 * done more efficiently by using flush_queue_t objects.
709 * This function is the one which writes list of jnodes in batch mode. It does
710 * all low-level things as bio construction and page states manipulation.
712 * ZAM-FIXME-HANS: brief me on why this function exists, and why bios are
713 * aggregated in this function instead of being left to the layers below
715 * FIXME: ZAM->HANS: What layer are you talking about? Can you point me to that?
716 * Why that layer needed? Why BIOs cannot be constructed here?
718 static int write_jnodes_to_disk_extent(
719 jnode
*first
, int nr
, const reiser4_block_nr
*block_p
,
720 flush_queue_t
*fq
, int flags
)
722 struct super_block
*super
= reiser4_get_current_sb();
723 int write_op
= ( flags
& WRITEOUT_BARRIER
) ? WRITE_BARRIER
: WRITE
;
726 reiser4_block_nr block
;
728 assert("zam-571", first
!= NULL
);
729 assert("zam-572", block_p
!= NULL
);
730 assert("zam-570", nr
> 0);
733 max_blocks
= min(bio_get_nr_vecs(super
->s_bdev
), BIO_MAX_PAGES
);
737 int nr_blocks
= min(nr
, max_blocks
);
741 bio
= bio_alloc(GFP_NOIO
, nr_blocks
);
743 return RETERR(-ENOMEM
);
745 bio
->bi_bdev
= super
->s_bdev
;
746 bio
->bi_sector
= block
* (super
->s_blocksize
>> 9);
747 for (nr_used
= 0, i
= 0; i
< nr_blocks
; i
++) {
750 pg
= jnode_page(cur
);
751 assert("zam-573", pg
!= NULL
);
755 lock_and_wait_page_writeback(pg
);
757 if (!bio_add_page(bio
, pg
, super
->s_blocksize
, 0)) {
759 * underlying device is satiated. Stop adding
763 page_cache_release(pg
);
767 spin_lock_jnode(cur
);
768 assert("nikita-3166",
769 pg
->mapping
== jnode_get_mapping(cur
));
770 assert("zam-912", !JF_ISSET(cur
, JNODE_WRITEBACK
));
772 spin_lock(&cur
->load
);
773 assert("nikita-3165", !jnode_is_releasable(cur
));
774 spin_unlock(&cur
->load
);
776 JF_SET(cur
, JNODE_WRITEBACK
);
777 JF_CLR(cur
, JNODE_DIRTY
);
778 ON_DEBUG(cur
->written
++);
779 spin_unlock_jnode(cur
);
782 set_page_writeback(pg
);
784 if (get_current_context()->entd
) {
785 /* this is ent thread */
786 entd_context
*ent
= get_entd_context(super
);
787 struct wbq
*rq
, *next
;
789 spin_lock(&ent
->guard
);
791 if (pg
== ent
->cur_request
->page
) {
793 * entd is called for this page. This
794 * request is not in th etodo list
796 ent
->cur_request
->written
= 1;
799 * if we have written a page for which writepage
800 * is called for - move request to another list.
802 list_for_each_entry_safe(rq
, next
, &ent
->todo_list
, link
) {
803 assert("", rq
->magic
== WBQ_MAGIC
);
804 if (pg
== rq
->page
) {
806 * remove request from
807 * entd's queue, but do
808 * not wake up a thread
812 list_del_init(&rq
->link
);
813 ent
->nr_todo_reqs
--;
814 list_add_tail(&rq
->link
, &ent
->done_list
);
815 ent
->nr_done_reqs
++;
821 spin_unlock(&ent
->guard
);
824 clear_page_dirty_for_io(pg
);
828 cur
= list_entry(cur
->capture_link
.next
, jnode
, capture_link
);
832 assert("nikita-3453",
833 bio
->bi_size
== super
->s_blocksize
* nr_used
);
834 assert("nikita-3454", bio
->bi_vcnt
== nr_used
);
836 /* Check if we are allowed to write at all */
837 if (super
->s_flags
& MS_RDONLY
)
842 add_fq_to_bio(fq
, bio
);
844 reiser4_submit_bio(write_op
, bio
);
845 not_supported
= bio_flagged(bio
, BIO_EOPNOTSUPP
);
851 block
+= nr_used
- 1;
852 update_blocknr_hint_default(super
, &block
);
863 /* This is a procedure which recovers a contiguous sequences of disk block
864 numbers in the given list of j-nodes and submits write requests on this
865 per-sequence basis */
867 write_jnode_list(struct list_head
*head
, flush_queue_t
*fq
,
868 long *nr_submitted
, int flags
)
871 jnode
*beg
= list_entry(head
->next
, jnode
, capture_link
);
873 while (head
!= &beg
->capture_link
) {
875 jnode
*cur
= list_entry(beg
->capture_link
.next
, jnode
, capture_link
);
877 while (head
!= &cur
->capture_link
) {
878 if (*jnode_get_block(cur
) != *jnode_get_block(beg
) + nr
)
881 cur
= list_entry(cur
->capture_link
.next
, jnode
, capture_link
);
884 ret
= write_jnodes_to_disk_extent(
885 beg
, nr
, jnode_get_block(beg
), fq
, flags
);
898 /* add given wandered mapping to atom's wandered map */
900 add_region_to_wmap(jnode
* cur
, int len
, const reiser4_block_nr
* block_p
)
903 blocknr_set_entry
*new_bsep
= NULL
;
904 reiser4_block_nr block
;
908 assert("zam-568", block_p
!= NULL
);
910 assert("zam-569", len
> 0);
912 while ((len
--) > 0) {
914 atom
= get_current_atom_locked();
916 !reiser4_blocknr_is_fake(jnode_get_block(cur
)));
918 blocknr_set_add_pair(atom
, &atom
->wandered_map
,
920 jnode_get_block(cur
), &block
);
921 } while (ret
== -E_REPEAT
);
924 /* deallocate blocks which were not added to wandered
926 reiser4_block_nr wide_len
= len
;
928 reiser4_dealloc_blocks(&block
, &wide_len
,
931 /* formatted, without defer */ );
936 spin_unlock_atom(atom
);
938 cur
= list_entry(cur
->capture_link
.next
, jnode
, capture_link
);
945 /* Allocate wandered blocks for current atom's OVERWRITE SET and immediately
946 submit IO for allocated blocks. We assume that current atom is in a stage
947 when any atom fusion is impossible and atom is unlocked and it is safe. */
948 static int alloc_wandered_blocks(struct commit_handle
*ch
, flush_queue_t
*fq
)
950 reiser4_block_nr block
;
958 assert("zam-534", ch
->overwrite_set_size
> 0);
960 rest
= ch
->overwrite_set_size
;
962 cur
= list_entry(ch
->overwrite_set
->next
, jnode
, capture_link
);
963 while (ch
->overwrite_set
!= &cur
->capture_link
) {
964 assert("zam-567", JF_ISSET(cur
, JNODE_OVRWR
));
966 ret
= get_more_wandered_blocks(rest
, &block
, &len
);
972 ret
= add_region_to_wmap(cur
, len
, &block
);
976 ret
= write_jnodes_to_disk_extent(cur
, len
, &block
, fq
, 0);
980 while ((len
--) > 0) {
982 ch
->overwrite_set
!= &cur
->capture_link
);
983 cur
= list_entry(cur
->capture_link
.next
, jnode
, capture_link
);
990 /* allocate given number of nodes over the journal area and link them into a
991 list, return pointer to the first jnode in the list */
992 static int alloc_tx(struct commit_handle
*ch
, flush_queue_t
* fq
)
994 reiser4_blocknr_hint hint
;
995 reiser4_block_nr allocated
= 0;
996 reiser4_block_nr first
, len
;
1000 reiser4_context
*ctx
;
1001 reiser4_super_info_data
*sbinfo
;
1003 assert("zam-698", ch
->tx_size
> 0);
1004 assert("zam-699", list_empty_careful(&ch
->tx_list
));
1006 ctx
= get_current_context();
1007 sbinfo
= get_super_private(ctx
->super
);
1009 while (allocated
< (unsigned)ch
->tx_size
) {
1010 len
= (ch
->tx_size
- allocated
);
1012 reiser4_blocknr_hint_init(&hint
);
1014 hint
.block_stage
= BLOCK_GRABBED
;
1016 /* FIXME: there should be some block allocation policy for
1017 nodes which contain wander records */
1019 /* We assume that disk space for wandered record blocks can be
1020 * taken from reserved area. */
1021 ret
= reiser4_alloc_blocks(&hint
, &first
, &len
,
1022 BA_FORMATTED
| BA_RESERVED
|
1023 BA_USE_DEFAULT_SEARCH_START
);
1024 reiser4_blocknr_hint_done(&hint
);
1031 /* create jnodes for all wander records */
1033 cur
= reiser4_alloc_io_head(&first
);
1036 ret
= RETERR(-ENOMEM
);
1037 goto free_not_assigned
;
1040 ret
= jinit_new(cur
, reiser4_ctx_gfp_mask_get());
1044 goto free_not_assigned
;
1047 pin_jnode_data(cur
);
1049 list_add_tail(&cur
->capture_link
, &ch
->tx_list
);
1055 { /* format a on-disk linked list of wander records */
1058 txhead
= list_entry(ch
->tx_list
.next
, jnode
, capture_link
);
1061 cur
= list_entry(txhead
->capture_link
.next
, jnode
, capture_link
);
1062 while (&ch
->tx_list
!= &cur
->capture_link
) {
1063 format_wander_record(ch
, cur
, serial
++);
1064 cur
= list_entry(cur
->capture_link
.next
, jnode
, capture_link
);
1068 { /* Fill wander records with Wandered Set */
1069 struct store_wmap_params params
;
1072 params
.cur
= list_entry(txhead
->capture_link
.next
, jnode
, capture_link
);
1076 wander_record_capacity(reiser4_get_current_sb());
1078 atom
= get_current_atom_locked();
1079 blocknr_set_iterator(atom
, &atom
->wandered_map
,
1080 &store_wmap_actor
, ¶ms
, 0);
1081 spin_unlock_atom(atom
);
1084 { /* relse all jnodes from tx_list */
1085 cur
= list_entry(ch
->tx_list
.next
, jnode
, capture_link
);
1086 while (&ch
->tx_list
!= &cur
->capture_link
) {
1088 cur
= list_entry(cur
->capture_link
.next
, jnode
, capture_link
);
1092 ret
= write_jnode_list(&ch
->tx_list
, fq
, NULL
, 0);
1097 /* We deallocate blocks not yet assigned to jnodes on tx_list. The
1098 caller takes care about invalidating of tx list */
1099 reiser4_dealloc_blocks(&first
, &len
, BLOCK_NOT_COUNTED
, BA_FORMATTED
);
1104 static int commit_tx(struct commit_handle
*ch
)
1110 /* Grab more space for wandered records. */
1111 ret
= reiser4_grab_space_force((__u64
) (ch
->tx_size
), BA_RESERVED
);
1115 fq
= get_fq_for_current_atom();
1119 spin_unlock_atom(fq
->atom
);
1121 ret
= alloc_wandered_blocks(ch
, fq
);
1124 ret
= alloc_tx(ch
, fq
);
1133 barrier
= reiser4_use_write_barrier(ch
->super
);
1135 ret
= current_atom_finish_all_fq();
1139 ret
= update_journal_header(ch
, barrier
);
1142 if (ret
== -EOPNOTSUPP
) {
1143 disable_write_barrier(ch
->super
);
1144 goto repeat_wo_barrier
;
1148 ret
= current_atom_finish_all_fq();
1153 static int write_tx_back(struct commit_handle
* ch
)
1159 reiser4_post_commit_hook();
1160 fq
= get_fq_for_current_atom();
1163 spin_unlock_atom(fq
->atom
);
1164 ret
= write_jnode_list(
1165 ch
->overwrite_set
, fq
, NULL
, WRITEOUT_FOR_PAGE_RECLAIM
);
1170 barrier
= reiser4_use_write_barrier(ch
->super
);
1172 ret
= current_atom_finish_all_fq();
1176 ret
= update_journal_footer(ch
, barrier
);
1179 if (ret
== -EOPNOTSUPP
) {
1180 disable_write_barrier(ch
->super
);
1181 goto repeat_wo_barrier
;
1185 ret
= current_atom_finish_all_fq();
1189 reiser4_post_write_back_hook();
1193 /* We assume that at this moment all captured blocks are marked as RELOC or
1194 WANDER (belong to Relocate o Overwrite set), all nodes from Relocate set
1195 are submitted to write.
1198 int reiser4_write_logs(long *nr_submitted
)
1201 struct super_block
*super
= reiser4_get_current_sb();
1202 reiser4_super_info_data
*sbinfo
= get_super_private(super
);
1203 struct commit_handle ch
;
1206 writeout_mode_enable();
1208 /* block allocator may add j-nodes to the clean_list */
1209 ret
= reiser4_pre_commit_hook();
1213 /* No locks are required if we take atom which stage >=
1214 * ASTAGE_PRE_COMMIT */
1215 atom
= get_current_context()->trans
->atom
;
1216 assert("zam-965", atom
!= NULL
);
1218 /* relocate set is on the atom->clean_nodes list after
1219 * current_atom_complete_writes() finishes. It can be safely
1220 * uncaptured after commit_mutex is locked, because any atom that
1221 * captures these nodes is guaranteed to commit after current one.
1223 * This can only be done after reiser4_pre_commit_hook(), because it is where
1224 * early flushed jnodes with CREATED bit are transferred to the
1225 * overwrite list. */
1226 reiser4_invalidate_list(ATOM_CLEAN_LIST(atom
));
1227 spin_lock_atom(atom
);
1228 /* There might be waiters for the relocate nodes which we have
1229 * released, wake them up. */
1230 reiser4_atom_send_event(atom
);
1231 spin_unlock_atom(atom
);
1233 if (REISER4_DEBUG
) {
1236 for (level
= 0; level
< REAL_MAX_ZTREE_HEIGHT
+ 1; ++level
)
1237 assert("nikita-3352",
1238 list_empty_careful(ATOM_DIRTY_LIST(atom
, level
)));
1241 sbinfo
->nr_files_committed
+= (unsigned)atom
->nr_objects_created
;
1242 sbinfo
->nr_files_committed
-= (unsigned)atom
->nr_objects_deleted
;
1244 init_commit_handle(&ch
, atom
);
1246 ch
.free_blocks
= sbinfo
->blocks_free_committed
;
1247 ch
.nr_files
= sbinfo
->nr_files_committed
;
1248 /* ZAM-FIXME-HANS: email me what the contention level is for the super
1250 ch
.next_oid
= oid_next(super
);
1252 /* count overwrite set and place it in a separate list */
1253 ret
= get_overwrite_set(&ch
);
1256 /* It is possible that overwrite set is empty here, it means
1257 all captured nodes are clean */
1261 /* Inform the caller about what number of dirty pages will be
1262 * submitted to disk. */
1263 *nr_submitted
+= ch
.overwrite_set_size
- ch
.nr_bitmap
;
1265 /* count all records needed for storing of the wandered set */
1268 ret
= commit_tx(&ch
);
1272 spin_lock_atom(atom
);
1273 reiser4_atom_set_stage(atom
, ASTAGE_POST_COMMIT
);
1274 spin_unlock_atom(atom
);
1276 ret
= write_tx_back(&ch
);
1277 reiser4_post_write_back_hook();
1281 /* there could be fq attached to current atom; the only way to
1283 current_atom_finish_all_fq();
1286 /* free blocks of flushed transaction */
1287 dealloc_tx_list(&ch
);
1290 put_overwrite_set(&ch
);
1292 done_commit_handle(&ch
);
1294 writeout_mode_disable();
1299 /* consistency checks for journal data/control blocks: header, footer, log
1300 records, transactions head blocks. All functions return zero on success. */
1302 static int check_journal_header(const jnode
* node UNUSED_ARG
)
1304 /* FIXME: journal header has no magic field yet. */
1308 /* wait for write completion for all jnodes from given list */
1309 static int wait_on_jnode_list(struct list_head
*head
)
1314 list_for_each_entry(scan
, head
, capture_link
) {
1315 struct page
*pg
= jnode_page(scan
);
1318 if (PageWriteback(pg
))
1319 wait_on_page_writeback(pg
);
1329 static int check_journal_footer(const jnode
* node UNUSED_ARG
)
1331 /* FIXME: journal footer has no magic field yet. */
1335 static int check_tx_head(const jnode
* node
)
1337 struct tx_header
*header
= (struct tx_header
*)jdata(node
);
1339 if (memcmp(&header
->magic
, TX_HEADER_MAGIC
, TX_HEADER_MAGIC_SIZE
) != 0) {
1340 warning("zam-627", "tx head at block %s corrupted\n",
1341 sprint_address(jnode_get_block(node
)));
1342 return RETERR(-EIO
);
1348 static int check_wander_record(const jnode
* node
)
1350 struct wander_record_header
*RH
=
1351 (struct wander_record_header
*)jdata(node
);
1353 if (memcmp(&RH
->magic
, WANDER_RECORD_MAGIC
, WANDER_RECORD_MAGIC_SIZE
) !=
1355 warning("zam-628", "wander record at block %s corrupted\n",
1356 sprint_address(jnode_get_block(node
)));
1357 return RETERR(-EIO
);
1363 /* fill commit_handler structure by everything what is needed for update_journal_footer */
1364 static int restore_commit_handle(struct commit_handle
*ch
, jnode
*tx_head
)
1366 struct tx_header
*TXH
;
1369 ret
= jload(tx_head
);
1373 TXH
= (struct tx_header
*)jdata(tx_head
);
1375 ch
->free_blocks
= le64_to_cpu(get_unaligned(&TXH
->free_blocks
));
1376 ch
->nr_files
= le64_to_cpu(get_unaligned(&TXH
->nr_files
));
1377 ch
->next_oid
= le64_to_cpu(get_unaligned(&TXH
->next_oid
));
1381 list_add(&tx_head
->capture_link
, &ch
->tx_list
);
1386 /* replay one transaction: restore and write overwrite set in place */
1387 static int replay_transaction(const struct super_block
*s
,
1389 const reiser4_block_nr
* log_rec_block_p
,
1390 const reiser4_block_nr
* end_block
,
1391 unsigned int nr_wander_records
)
1393 reiser4_block_nr log_rec_block
= *log_rec_block_p
;
1394 struct commit_handle ch
;
1395 LIST_HEAD(overwrite_set
);
1399 init_commit_handle(&ch
, NULL
);
1400 ch
.overwrite_set
= &overwrite_set
;
1402 restore_commit_handle(&ch
, tx_head
);
1404 while (log_rec_block
!= *end_block
) {
1405 struct wander_record_header
*header
;
1406 struct wander_entry
*entry
;
1410 if (nr_wander_records
== 0) {
1412 "number of wander records in the linked list"
1413 " greater than number stored in tx head.\n");
1418 log
= reiser4_alloc_io_head(&log_rec_block
);
1420 return RETERR(-ENOMEM
);
1424 reiser4_drop_io_head(log
);
1428 ret
= check_wander_record(log
);
1431 reiser4_drop_io_head(log
);
1435 header
= (struct wander_record_header
*)jdata(log
);
1436 log_rec_block
= le64_to_cpu(get_unaligned(&header
->next_block
));
1438 entry
= (struct wander_entry
*)(header
+ 1);
1440 /* restore overwrite set from wander record content */
1441 for (i
= 0; i
< wander_record_capacity(s
); i
++) {
1442 reiser4_block_nr block
;
1445 block
= le64_to_cpu(get_unaligned(&entry
->wandered
));
1449 node
= reiser4_alloc_io_head(&block
);
1451 ret
= RETERR(-ENOMEM
);
1456 reiser4_drop_io_head(log
);
1463 reiser4_drop_io_head(node
);
1468 reiser4_drop_io_head(log
);
1472 block
= le64_to_cpu(get_unaligned(&entry
->original
));
1474 assert("zam-603", block
!= 0);
1476 jnode_set_block(node
, &block
);
1478 list_add_tail(&node
->capture_link
, ch
.overwrite_set
);
1484 reiser4_drop_io_head(log
);
1486 --nr_wander_records
;
1489 if (nr_wander_records
!= 0) {
1490 warning("zam-632", "number of wander records in the linked list"
1491 " less than number stored in tx head.\n");
1496 { /* write wandered set in place */
1497 write_jnode_list(ch
.overwrite_set
, NULL
, NULL
, 0);
1498 ret
= wait_on_jnode_list(ch
.overwrite_set
);
1506 ret
= update_journal_footer(&ch
, 0);
1510 while (!list_empty(ch
.overwrite_set
)) {
1511 jnode
*cur
= list_entry(ch
.overwrite_set
->next
, jnode
, capture_link
);
1512 list_del_init(&cur
->capture_link
);
1514 reiser4_drop_io_head(cur
);
1517 list_del_init(&tx_head
->capture_link
);
1519 done_commit_handle(&ch
);
1524 /* find oldest committed and not played transaction and play it. The transaction
1525 * was committed and journal header block was updated but the blocks from the
1526 * process of writing the atom's overwrite set in-place and updating of journal
1527 * footer block were not completed. This function completes the process by
1528 * recovering the atom's overwrite set from their wandered locations and writes
1529 * them in-place and updating the journal footer. */
1530 static int replay_oldest_transaction(struct super_block
*s
)
1532 reiser4_super_info_data
*sbinfo
= get_super_private(s
);
1533 jnode
*jf
= sbinfo
->journal_footer
;
1535 struct journal_footer
*F
;
1536 struct tx_header
*T
;
1538 reiser4_block_nr prev_tx
;
1539 reiser4_block_nr last_flushed_tx
;
1540 reiser4_block_nr log_rec_block
= 0;
1546 if ((ret
= jload(jf
)) < 0)
1549 F
= (struct journal_footer
*)jdata(jf
);
1551 last_flushed_tx
= le64_to_cpu(get_unaligned(&F
->last_flushed_tx
));
1555 if (sbinfo
->last_committed_tx
== last_flushed_tx
) {
1556 /* all transactions are replayed */
1560 prev_tx
= sbinfo
->last_committed_tx
;
1562 /* searching for oldest not flushed transaction */
1564 tx_head
= reiser4_alloc_io_head(&prev_tx
);
1566 return RETERR(-ENOMEM
);
1568 ret
= jload(tx_head
);
1570 reiser4_drop_io_head(tx_head
);
1574 ret
= check_tx_head(tx_head
);
1577 reiser4_drop_io_head(tx_head
);
1581 T
= (struct tx_header
*)jdata(tx_head
);
1583 prev_tx
= le64_to_cpu(get_unaligned(&T
->prev_tx
));
1585 if (prev_tx
== last_flushed_tx
)
1589 reiser4_drop_io_head(tx_head
);
1592 total
= le32_to_cpu(get_unaligned(&T
->total
));
1593 log_rec_block
= le64_to_cpu(get_unaligned(&T
->next_block
));
1595 pin_jnode_data(tx_head
);
1599 replay_transaction(s
, tx_head
, &log_rec_block
,
1600 jnode_get_block(tx_head
), total
- 1);
1602 unpin_jnode_data(tx_head
);
1603 reiser4_drop_io_head(tx_head
);
1610 /* The reiser4 journal current implementation was optimized to not to capture
1611 super block if certain super blocks fields are modified. Currently, the set
1612 is (<free block count>, <OID allocator>). These fields are logged by
1613 special way which includes storing them in each transaction head block at
1614 atom commit time and writing that information to journal footer block at
1615 atom flush time. For getting info from journal footer block to the
1616 in-memory super block there is a special function
1617 reiser4_journal_recover_sb_data() which should be called after disk format
1618 plugin re-reads super block after journal replaying.
1621 /* get the information from journal footer in-memory super block */
1622 int reiser4_journal_recover_sb_data(struct super_block
*s
)
1624 reiser4_super_info_data
*sbinfo
= get_super_private(s
);
1625 struct journal_footer
*jf
;
1628 assert("zam-673", sbinfo
->journal_footer
!= NULL
);
1630 ret
= jload(sbinfo
->journal_footer
);
1634 ret
= check_journal_footer(sbinfo
->journal_footer
);
1638 jf
= (struct journal_footer
*)jdata(sbinfo
->journal_footer
);
1640 /* was there at least one flushed transaction? */
1641 if (jf
->last_flushed_tx
) {
1643 /* restore free block counter logged in this transaction */
1644 reiser4_set_free_blocks(s
, le64_to_cpu(get_unaligned(&jf
->free_blocks
)));
1646 /* restore oid allocator state */
1647 oid_init_allocator(s
,
1648 le64_to_cpu(get_unaligned(&jf
->nr_files
)),
1649 le64_to_cpu(get_unaligned(&jf
->next_oid
)));
1652 jrelse(sbinfo
->journal_footer
);
1656 /* reiser4 replay journal procedure */
1657 int reiser4_journal_replay(struct super_block
*s
)
1659 reiser4_super_info_data
*sbinfo
= get_super_private(s
);
1661 struct journal_header
*header
;
1662 int nr_tx_replayed
= 0;
1665 assert("zam-582", sbinfo
!= NULL
);
1667 jh
= sbinfo
->journal_header
;
1668 jf
= sbinfo
->journal_footer
;
1671 /* it is possible that disk layout does not support journal
1672 structures, we just warn about this */
1674 "journal control blocks were not loaded by disk layout plugin. "
1675 "journal replaying is not possible.\n");
1679 /* Take free block count from journal footer block. The free block
1680 counter value corresponds the last flushed transaction state */
1685 ret
= check_journal_footer(jf
);
1693 /* store last committed transaction info in reiser4 in-memory super
1699 ret
= check_journal_header(jh
);
1705 header
= (struct journal_header
*)jdata(jh
);
1706 sbinfo
->last_committed_tx
= le64_to_cpu(get_unaligned(&header
->last_committed_tx
));
1710 /* replay committed transactions */
1711 while ((ret
= replay_oldest_transaction(s
)) == -E_REPEAT
)
1717 /* load journal control block (either journal header or journal footer block) */
1719 load_journal_control_block(jnode
** node
, const reiser4_block_nr
* block
)
1723 *node
= reiser4_alloc_io_head(block
);
1725 return RETERR(-ENOMEM
);
1730 reiser4_drop_io_head(*node
);
1735 pin_jnode_data(*node
);
1741 /* unload journal header or footer and free jnode */
1742 static void unload_journal_control_block(jnode
** node
)
1745 unpin_jnode_data(*node
);
1746 reiser4_drop_io_head(*node
);
1751 /* release journal control blocks */
1752 void reiser4_done_journal_info(struct super_block
*s
)
1754 reiser4_super_info_data
*sbinfo
= get_super_private(s
);
1756 assert("zam-476", sbinfo
!= NULL
);
1758 unload_journal_control_block(&sbinfo
->journal_header
);
1759 unload_journal_control_block(&sbinfo
->journal_footer
);
1763 /* load journal control blocks */
1764 int reiser4_init_journal_info(struct super_block
*s
)
1766 reiser4_super_info_data
*sbinfo
= get_super_private(s
);
1767 journal_location
*loc
;
1770 loc
= &sbinfo
->jloc
;
1772 assert("zam-651", loc
!= NULL
);
1773 assert("zam-652", loc
->header
!= 0);
1774 assert("zam-653", loc
->footer
!= 0);
1776 ret
= load_journal_control_block(&sbinfo
->journal_header
, &loc
->header
);
1781 ret
= load_journal_control_block(&sbinfo
->journal_footer
, &loc
->footer
);
1784 unload_journal_control_block(&sbinfo
->journal_header
);
1790 /* Make Linus happy.
1792 c-indentation-style: "K&R"