2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <linux/crc32.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/freezer.h>
20 #include <linux/bio.h>
21 #include <linux/blkdev.h>
22 #include <linux/writeback.h>
23 #include <linux/list_sort.h>
34 #include "trace_gfs2.h"
37 * gfs2_struct2blk - compute stuff
38 * @sdp: the filesystem
39 * @nstruct: the number of structures
40 * @ssize: the size of the structures
42 * Compute the number of log descriptor blocks needed to hold a certain number
43 * of structures of a certain size.
45 * Returns: the number of blocks needed (minimum is always 1)
48 unsigned int gfs2_struct2blk(struct gfs2_sbd
*sdp
, unsigned int nstruct
,
52 unsigned int first
, second
;
55 first
= (sdp
->sd_sb
.sb_bsize
- sizeof(struct gfs2_log_descriptor
)) / ssize
;
57 if (nstruct
> first
) {
58 second
= (sdp
->sd_sb
.sb_bsize
-
59 sizeof(struct gfs2_meta_header
)) / ssize
;
60 blks
+= DIV_ROUND_UP(nstruct
- first
, second
);
67 * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
68 * @mapping: The associated mapping (maybe NULL)
69 * @bd: The gfs2_bufdata to remove
71 * The ail lock _must_ be held when calling this function
75 void gfs2_remove_from_ail(struct gfs2_bufdata
*bd
)
78 list_del_init(&bd
->bd_ail_st_list
);
79 list_del_init(&bd
->bd_ail_gl_list
);
80 atomic_dec(&bd
->bd_gl
->gl_ail_count
);
85 * gfs2_ail1_start_one - Start I/O on a part of the AIL
86 * @sdp: the filesystem
87 * @wbc: The writeback control structure
88 * @ai: The ail structure
92 static int gfs2_ail1_start_one(struct gfs2_sbd
*sdp
,
93 struct writeback_control
*wbc
,
94 struct gfs2_trans
*tr
)
95 __releases(&sdp
->sd_ail_lock
)
96 __acquires(&sdp
->sd_ail_lock
)
98 struct gfs2_glock
*gl
= NULL
;
99 struct address_space
*mapping
;
100 struct gfs2_bufdata
*bd
, *s
;
101 struct buffer_head
*bh
;
103 list_for_each_entry_safe_reverse(bd
, s
, &tr
->tr_ail1_list
, bd_ail_st_list
) {
106 gfs2_assert(sdp
, bd
->bd_tr
== tr
);
108 if (!buffer_busy(bh
)) {
109 if (!buffer_uptodate(bh
))
110 gfs2_io_error_bh(sdp
, bh
);
111 list_move(&bd
->bd_ail_st_list
, &tr
->tr_ail2_list
);
115 if (!buffer_dirty(bh
))
120 list_move(&bd
->bd_ail_st_list
, &tr
->tr_ail1_list
);
121 mapping
= bh
->b_page
->mapping
;
124 spin_unlock(&sdp
->sd_ail_lock
);
125 generic_writepages(mapping
, wbc
);
126 spin_lock(&sdp
->sd_ail_lock
);
127 if (wbc
->nr_to_write
<= 0)
137 * gfs2_ail1_flush - start writeback of some ail1 entries
138 * @sdp: The super block
139 * @wbc: The writeback control structure
141 * Writes back some ail1 entries, according to the limits in the
142 * writeback control structure
145 void gfs2_ail1_flush(struct gfs2_sbd
*sdp
, struct writeback_control
*wbc
)
147 struct list_head
*head
= &sdp
->sd_ail1_list
;
148 struct gfs2_trans
*tr
;
149 struct blk_plug plug
;
151 trace_gfs2_ail_flush(sdp
, wbc
, 1);
152 blk_start_plug(&plug
);
153 spin_lock(&sdp
->sd_ail_lock
);
155 list_for_each_entry_reverse(tr
, head
, tr_list
) {
156 if (wbc
->nr_to_write
<= 0)
158 if (gfs2_ail1_start_one(sdp
, wbc
, tr
))
161 spin_unlock(&sdp
->sd_ail_lock
);
162 blk_finish_plug(&plug
);
163 trace_gfs2_ail_flush(sdp
, wbc
, 0);
167 * gfs2_ail1_start - start writeback of all ail1 entries
168 * @sdp: The superblock
171 static void gfs2_ail1_start(struct gfs2_sbd
*sdp
)
173 struct writeback_control wbc
= {
174 .sync_mode
= WB_SYNC_NONE
,
175 .nr_to_write
= LONG_MAX
,
177 .range_end
= LLONG_MAX
,
180 return gfs2_ail1_flush(sdp
, &wbc
);
184 * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
185 * @sdp: the filesystem
190 static void gfs2_ail1_empty_one(struct gfs2_sbd
*sdp
, struct gfs2_trans
*tr
)
192 struct gfs2_bufdata
*bd
, *s
;
193 struct buffer_head
*bh
;
195 list_for_each_entry_safe_reverse(bd
, s
, &tr
->tr_ail1_list
,
198 gfs2_assert(sdp
, bd
->bd_tr
== tr
);
201 if (!buffer_uptodate(bh
))
202 gfs2_io_error_bh(sdp
, bh
);
203 list_move(&bd
->bd_ail_st_list
, &tr
->tr_ail2_list
);
209 * gfs2_ail1_empty - Try to empty the ail1 lists
210 * @sdp: The superblock
212 * Tries to empty the ail1 lists, starting with the oldest first
215 static int gfs2_ail1_empty(struct gfs2_sbd
*sdp
)
217 struct gfs2_trans
*tr
, *s
;
221 spin_lock(&sdp
->sd_ail_lock
);
222 list_for_each_entry_safe_reverse(tr
, s
, &sdp
->sd_ail1_list
, tr_list
) {
223 gfs2_ail1_empty_one(sdp
, tr
);
224 if (list_empty(&tr
->tr_ail1_list
) && oldest_tr
)
225 list_move(&tr
->tr_list
, &sdp
->sd_ail2_list
);
229 ret
= list_empty(&sdp
->sd_ail1_list
);
230 spin_unlock(&sdp
->sd_ail_lock
);
235 static void gfs2_ail1_wait(struct gfs2_sbd
*sdp
)
237 struct gfs2_trans
*tr
;
238 struct gfs2_bufdata
*bd
;
239 struct buffer_head
*bh
;
241 spin_lock(&sdp
->sd_ail_lock
);
242 list_for_each_entry_reverse(tr
, &sdp
->sd_ail1_list
, tr_list
) {
243 list_for_each_entry(bd
, &tr
->tr_ail1_list
, bd_ail_st_list
) {
245 if (!buffer_locked(bh
))
248 spin_unlock(&sdp
->sd_ail_lock
);
254 spin_unlock(&sdp
->sd_ail_lock
);
258 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
259 * @sdp: the filesystem
264 static void gfs2_ail2_empty_one(struct gfs2_sbd
*sdp
, struct gfs2_trans
*tr
)
266 struct list_head
*head
= &tr
->tr_ail2_list
;
267 struct gfs2_bufdata
*bd
;
269 while (!list_empty(head
)) {
270 bd
= list_entry(head
->prev
, struct gfs2_bufdata
,
272 gfs2_assert(sdp
, bd
->bd_tr
== tr
);
273 gfs2_remove_from_ail(bd
);
277 static void ail2_empty(struct gfs2_sbd
*sdp
, unsigned int new_tail
)
279 struct gfs2_trans
*tr
, *safe
;
280 unsigned int old_tail
= sdp
->sd_log_tail
;
281 int wrap
= (new_tail
< old_tail
);
284 spin_lock(&sdp
->sd_ail_lock
);
286 list_for_each_entry_safe(tr
, safe
, &sdp
->sd_ail2_list
, tr_list
) {
287 a
= (old_tail
<= tr
->tr_first
);
288 b
= (tr
->tr_first
< new_tail
);
289 rm
= (wrap
) ? (a
|| b
) : (a
&& b
);
293 gfs2_ail2_empty_one(sdp
, tr
);
294 list_del(&tr
->tr_list
);
295 gfs2_assert_warn(sdp
, list_empty(&tr
->tr_ail1_list
));
296 gfs2_assert_warn(sdp
, list_empty(&tr
->tr_ail2_list
));
300 spin_unlock(&sdp
->sd_ail_lock
);
304 * gfs2_log_release - Release a given number of log blocks
305 * @sdp: The GFS2 superblock
306 * @blks: The number of blocks
310 void gfs2_log_release(struct gfs2_sbd
*sdp
, unsigned int blks
)
313 atomic_add(blks
, &sdp
->sd_log_blks_free
);
314 trace_gfs2_log_blocks(sdp
, blks
);
315 gfs2_assert_withdraw(sdp
, atomic_read(&sdp
->sd_log_blks_free
) <=
316 sdp
->sd_jdesc
->jd_blocks
);
317 up_read(&sdp
->sd_log_flush_lock
);
321 * gfs2_log_reserve - Make a log reservation
322 * @sdp: The GFS2 superblock
323 * @blks: The number of blocks to reserve
325 * Note that we never give out the last few blocks of the journal. Thats
326 * due to the fact that there is a small number of header blocks
327 * associated with each log flush. The exact number can't be known until
328 * flush time, so we ensure that we have just enough free blocks at all
329 * times to avoid running out during a log flush.
331 * We no longer flush the log here, instead we wake up logd to do that
332 * for us. To avoid the thundering herd and to ensure that we deal fairly
333 * with queued waiters, we use an exclusive wait. This means that when we
334 * get woken with enough journal space to get our reservation, we need to
335 * wake the next waiter on the list.
340 int gfs2_log_reserve(struct gfs2_sbd
*sdp
, unsigned int blks
)
343 unsigned reserved_blks
= 7 * (4096 / sdp
->sd_vfs
->s_blocksize
);
344 unsigned wanted
= blks
+ reserved_blks
;
347 unsigned int free_blocks
;
349 if (gfs2_assert_warn(sdp
, blks
) ||
350 gfs2_assert_warn(sdp
, blks
<= sdp
->sd_jdesc
->jd_blocks
))
352 atomic_add(blks
, &sdp
->sd_log_blks_needed
);
354 free_blocks
= atomic_read(&sdp
->sd_log_blks_free
);
355 if (unlikely(free_blocks
<= wanted
)) {
357 prepare_to_wait_exclusive(&sdp
->sd_log_waitq
, &wait
,
358 TASK_UNINTERRUPTIBLE
);
359 wake_up(&sdp
->sd_logd_waitq
);
361 if (atomic_read(&sdp
->sd_log_blks_free
) <= wanted
)
363 free_blocks
= atomic_read(&sdp
->sd_log_blks_free
);
364 } while(free_blocks
<= wanted
);
365 finish_wait(&sdp
->sd_log_waitq
, &wait
);
367 atomic_inc(&sdp
->sd_reserving_log
);
368 if (atomic_cmpxchg(&sdp
->sd_log_blks_free
, free_blocks
,
369 free_blocks
- blks
) != free_blocks
) {
370 if (atomic_dec_and_test(&sdp
->sd_reserving_log
))
371 wake_up(&sdp
->sd_reserving_log_wait
);
374 atomic_sub(blks
, &sdp
->sd_log_blks_needed
);
375 trace_gfs2_log_blocks(sdp
, -blks
);
378 * If we waited, then so might others, wake them up _after_ we get
379 * our share of the log.
381 if (unlikely(did_wait
))
382 wake_up(&sdp
->sd_log_waitq
);
384 down_read(&sdp
->sd_log_flush_lock
);
385 if (unlikely(!test_bit(SDF_JOURNAL_LIVE
, &sdp
->sd_flags
))) {
386 gfs2_log_release(sdp
, blks
);
389 if (atomic_dec_and_test(&sdp
->sd_reserving_log
))
390 wake_up(&sdp
->sd_reserving_log_wait
);
395 * log_distance - Compute distance between two journal blocks
396 * @sdp: The GFS2 superblock
397 * @newer: The most recent journal block of the pair
398 * @older: The older journal block of the pair
400 * Compute the distance (in the journal direction) between two
401 * blocks in the journal
403 * Returns: the distance in blocks
406 static inline unsigned int log_distance(struct gfs2_sbd
*sdp
, unsigned int newer
,
411 dist
= newer
- older
;
413 dist
+= sdp
->sd_jdesc
->jd_blocks
;
419 * calc_reserved - Calculate the number of blocks to reserve when
420 * refunding a transaction's unused buffers.
421 * @sdp: The GFS2 superblock
423 * This is complex. We need to reserve room for all our currently used
424 * metadata buffers (e.g. normal file I/O rewriting file time stamps) and
425 * all our journaled data buffers for journaled files (e.g. files in the
426 * meta_fs like rindex, or files for which chattr +j was done.)
427 * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
428 * will count it as free space (sd_log_blks_free) and corruption will follow.
430 * We can have metadata bufs and jdata bufs in the same journal. So each
431 * type gets its own log header, for which we need to reserve a block.
432 * In fact, each type has the potential for needing more than one header
433 * in cases where we have more buffers than will fit on a journal page.
434 * Metadata journal entries take up half the space of journaled buffer entries.
435 * Thus, metadata entries have buf_limit (502) and journaled buffers have
436 * databuf_limit (251) before they cause a wrap around.
438 * Also, we need to reserve blocks for revoke journal entries and one for an
439 * overall header for the lot.
441 * Returns: the number of blocks reserved
443 static unsigned int calc_reserved(struct gfs2_sbd
*sdp
)
445 unsigned int reserved
= 0;
448 struct gfs2_trans
*tr
= sdp
->sd_log_tr
;
451 mbuf
= tr
->tr_num_buf_new
- tr
->tr_num_buf_rm
;
452 dbuf
= tr
->tr_num_databuf_new
- tr
->tr_num_databuf_rm
;
453 reserved
= mbuf
+ dbuf
;
454 /* Account for header blocks */
455 reserved
+= DIV_ROUND_UP(mbuf
, buf_limit(sdp
));
456 reserved
+= DIV_ROUND_UP(dbuf
, databuf_limit(sdp
));
459 if (sdp
->sd_log_commited_revoke
> 0)
460 reserved
+= gfs2_struct2blk(sdp
, sdp
->sd_log_commited_revoke
,
462 /* One for the overall header */
468 static unsigned int current_tail(struct gfs2_sbd
*sdp
)
470 struct gfs2_trans
*tr
;
473 spin_lock(&sdp
->sd_ail_lock
);
475 if (list_empty(&sdp
->sd_ail1_list
)) {
476 tail
= sdp
->sd_log_head
;
478 tr
= list_entry(sdp
->sd_ail1_list
.prev
, struct gfs2_trans
,
483 spin_unlock(&sdp
->sd_ail_lock
);
488 static void log_pull_tail(struct gfs2_sbd
*sdp
, unsigned int new_tail
)
490 unsigned int dist
= log_distance(sdp
, new_tail
, sdp
->sd_log_tail
);
492 ail2_empty(sdp
, new_tail
);
494 atomic_add(dist
, &sdp
->sd_log_blks_free
);
495 trace_gfs2_log_blocks(sdp
, dist
);
496 gfs2_assert_withdraw(sdp
, atomic_read(&sdp
->sd_log_blks_free
) <=
497 sdp
->sd_jdesc
->jd_blocks
);
499 sdp
->sd_log_tail
= new_tail
;
503 static void log_flush_wait(struct gfs2_sbd
*sdp
)
507 if (atomic_read(&sdp
->sd_log_in_flight
)) {
509 prepare_to_wait(&sdp
->sd_log_flush_wait
, &wait
,
510 TASK_UNINTERRUPTIBLE
);
511 if (atomic_read(&sdp
->sd_log_in_flight
))
513 } while(atomic_read(&sdp
->sd_log_in_flight
));
514 finish_wait(&sdp
->sd_log_flush_wait
, &wait
);
518 static int ip_cmp(void *priv
, struct list_head
*a
, struct list_head
*b
)
520 struct gfs2_inode
*ipa
, *ipb
;
522 ipa
= list_entry(a
, struct gfs2_inode
, i_ordered
);
523 ipb
= list_entry(b
, struct gfs2_inode
, i_ordered
);
525 if (ipa
->i_no_addr
< ipb
->i_no_addr
)
527 if (ipa
->i_no_addr
> ipb
->i_no_addr
)
532 static void gfs2_ordered_write(struct gfs2_sbd
*sdp
)
534 struct gfs2_inode
*ip
;
537 spin_lock(&sdp
->sd_ordered_lock
);
538 list_sort(NULL
, &sdp
->sd_log_le_ordered
, &ip_cmp
);
539 while (!list_empty(&sdp
->sd_log_le_ordered
)) {
540 ip
= list_entry(sdp
->sd_log_le_ordered
.next
, struct gfs2_inode
, i_ordered
);
541 list_move(&ip
->i_ordered
, &written
);
542 if (ip
->i_inode
.i_mapping
->nrpages
== 0)
544 spin_unlock(&sdp
->sd_ordered_lock
);
545 filemap_fdatawrite(ip
->i_inode
.i_mapping
);
546 spin_lock(&sdp
->sd_ordered_lock
);
548 list_splice(&written
, &sdp
->sd_log_le_ordered
);
549 spin_unlock(&sdp
->sd_ordered_lock
);
552 static void gfs2_ordered_wait(struct gfs2_sbd
*sdp
)
554 struct gfs2_inode
*ip
;
556 spin_lock(&sdp
->sd_ordered_lock
);
557 while (!list_empty(&sdp
->sd_log_le_ordered
)) {
558 ip
= list_entry(sdp
->sd_log_le_ordered
.next
, struct gfs2_inode
, i_ordered
);
559 list_del(&ip
->i_ordered
);
560 WARN_ON(!test_and_clear_bit(GIF_ORDERED
, &ip
->i_flags
));
561 if (ip
->i_inode
.i_mapping
->nrpages
== 0)
563 spin_unlock(&sdp
->sd_ordered_lock
);
564 filemap_fdatawait(ip
->i_inode
.i_mapping
);
565 spin_lock(&sdp
->sd_ordered_lock
);
567 spin_unlock(&sdp
->sd_ordered_lock
);
570 void gfs2_ordered_del_inode(struct gfs2_inode
*ip
)
572 struct gfs2_sbd
*sdp
= GFS2_SB(&ip
->i_inode
);
574 spin_lock(&sdp
->sd_ordered_lock
);
575 if (test_and_clear_bit(GIF_ORDERED
, &ip
->i_flags
))
576 list_del(&ip
->i_ordered
);
577 spin_unlock(&sdp
->sd_ordered_lock
);
580 void gfs2_add_revoke(struct gfs2_sbd
*sdp
, struct gfs2_bufdata
*bd
)
582 struct buffer_head
*bh
= bd
->bd_bh
;
583 struct gfs2_glock
*gl
= bd
->bd_gl
;
585 bh
->b_private
= NULL
;
586 bd
->bd_blkno
= bh
->b_blocknr
;
587 gfs2_remove_from_ail(bd
); /* drops ref on bh */
589 bd
->bd_ops
= &gfs2_revoke_lops
;
590 sdp
->sd_log_num_revoke
++;
591 if (atomic_inc_return(&gl
->gl_revokes
) == 1)
593 set_bit(GLF_LFLUSH
, &gl
->gl_flags
);
594 list_add(&bd
->bd_list
, &sdp
->sd_log_le_revoke
);
597 void gfs2_glock_remove_revoke(struct gfs2_glock
*gl
)
599 if (atomic_dec_return(&gl
->gl_revokes
) == 0) {
600 clear_bit(GLF_LFLUSH
, &gl
->gl_flags
);
601 gfs2_glock_queue_put(gl
);
605 void gfs2_write_revokes(struct gfs2_sbd
*sdp
)
607 struct gfs2_trans
*tr
;
608 struct gfs2_bufdata
*bd
, *tmp
;
609 int have_revokes
= 0;
610 int max_revokes
= (sdp
->sd_sb
.sb_bsize
- sizeof(struct gfs2_log_descriptor
)) / sizeof(u64
);
612 gfs2_ail1_empty(sdp
);
613 spin_lock(&sdp
->sd_ail_lock
);
614 list_for_each_entry(tr
, &sdp
->sd_ail1_list
, tr_list
) {
615 list_for_each_entry(bd
, &tr
->tr_ail2_list
, bd_ail_st_list
) {
616 if (list_empty(&bd
->bd_list
)) {
623 spin_unlock(&sdp
->sd_ail_lock
);
624 if (have_revokes
== 0)
626 while (sdp
->sd_log_num_revoke
> max_revokes
)
627 max_revokes
+= (sdp
->sd_sb
.sb_bsize
- sizeof(struct gfs2_meta_header
)) / sizeof(u64
);
628 max_revokes
-= sdp
->sd_log_num_revoke
;
629 if (!sdp
->sd_log_num_revoke
) {
630 atomic_dec(&sdp
->sd_log_blks_free
);
631 /* If no blocks have been reserved, we need to also
632 * reserve a block for the header */
633 if (!sdp
->sd_log_blks_reserved
)
634 atomic_dec(&sdp
->sd_log_blks_free
);
637 spin_lock(&sdp
->sd_ail_lock
);
638 list_for_each_entry(tr
, &sdp
->sd_ail1_list
, tr_list
) {
639 list_for_each_entry_safe(bd
, tmp
, &tr
->tr_ail2_list
, bd_ail_st_list
) {
640 if (max_revokes
== 0)
642 if (!list_empty(&bd
->bd_list
))
644 gfs2_add_revoke(sdp
, bd
);
649 spin_unlock(&sdp
->sd_ail_lock
);
650 gfs2_log_unlock(sdp
);
652 if (!sdp
->sd_log_num_revoke
) {
653 atomic_inc(&sdp
->sd_log_blks_free
);
654 if (!sdp
->sd_log_blks_reserved
)
655 atomic_inc(&sdp
->sd_log_blks_free
);
660 * log_write_header - Get and initialize a journal header buffer
661 * @sdp: The GFS2 superblock
663 * Returns: the initialized log buffer descriptor
666 static void log_write_header(struct gfs2_sbd
*sdp
, u32 flags
)
668 struct gfs2_log_header
*lh
;
671 int op_flags
= REQ_PREFLUSH
| REQ_FUA
| REQ_META
| REQ_SYNC
;
672 struct page
*page
= mempool_alloc(gfs2_page_pool
, GFP_NOIO
);
673 enum gfs2_freeze_state state
= atomic_read(&sdp
->sd_freeze_state
);
674 lh
= page_address(page
);
677 gfs2_assert_withdraw(sdp
, (state
!= SFS_FROZEN
));
679 tail
= current_tail(sdp
);
681 lh
->lh_header
.mh_magic
= cpu_to_be32(GFS2_MAGIC
);
682 lh
->lh_header
.mh_type
= cpu_to_be32(GFS2_METATYPE_LH
);
683 lh
->lh_header
.__pad0
= cpu_to_be64(0);
684 lh
->lh_header
.mh_format
= cpu_to_be32(GFS2_FORMAT_LH
);
685 lh
->lh_header
.mh_jid
= cpu_to_be32(sdp
->sd_jdesc
->jd_jid
);
686 lh
->lh_sequence
= cpu_to_be64(sdp
->sd_log_sequence
++);
687 lh
->lh_flags
= cpu_to_be32(flags
);
688 lh
->lh_tail
= cpu_to_be32(tail
);
689 lh
->lh_blkno
= cpu_to_be32(sdp
->sd_log_flush_head
);
690 hash
= gfs2_disk_hash(page_address(page
), sizeof(struct gfs2_log_header
));
691 lh
->lh_hash
= cpu_to_be32(hash
);
693 if (test_bit(SDF_NOBARRIERS
, &sdp
->sd_flags
)) {
694 gfs2_ordered_wait(sdp
);
696 op_flags
= REQ_SYNC
| REQ_META
| REQ_PRIO
;
699 sdp
->sd_log_idle
= (tail
== sdp
->sd_log_flush_head
);
700 gfs2_log_write_page(sdp
, page
);
701 gfs2_log_flush_bio(sdp
, REQ_OP_WRITE
, op_flags
);
704 if (sdp
->sd_log_tail
!= tail
)
705 log_pull_tail(sdp
, tail
);
709 * gfs2_log_flush - flush incore transaction(s)
710 * @sdp: the filesystem
711 * @gl: The glock structure to flush. If NULL, flush the whole incore log
715 void gfs2_log_flush(struct gfs2_sbd
*sdp
, struct gfs2_glock
*gl
,
716 enum gfs2_flush_type type
)
718 struct gfs2_trans
*tr
;
719 enum gfs2_freeze_state state
= atomic_read(&sdp
->sd_freeze_state
);
721 down_write(&sdp
->sd_log_flush_lock
);
723 /* Log might have been flushed while we waited for the flush lock */
724 if (gl
&& !test_bit(GLF_LFLUSH
, &gl
->gl_flags
)) {
725 up_write(&sdp
->sd_log_flush_lock
);
728 trace_gfs2_log_flush(sdp
, 1);
730 if (type
== SHUTDOWN_FLUSH
)
731 clear_bit(SDF_JOURNAL_LIVE
, &sdp
->sd_flags
);
733 sdp
->sd_log_flush_head
= sdp
->sd_log_head
;
736 sdp
->sd_log_tr
= NULL
;
737 tr
->tr_first
= sdp
->sd_log_flush_head
;
738 if (unlikely (state
== SFS_FROZEN
))
739 gfs2_assert_withdraw(sdp
, !tr
->tr_num_buf_new
&& !tr
->tr_num_databuf_new
);
742 if (unlikely(state
== SFS_FROZEN
))
743 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_num_revoke
);
744 gfs2_assert_withdraw(sdp
,
745 sdp
->sd_log_num_revoke
== sdp
->sd_log_commited_revoke
);
747 gfs2_ordered_write(sdp
);
748 lops_before_commit(sdp
, tr
);
749 gfs2_log_flush_bio(sdp
, REQ_OP_WRITE
, 0);
751 if (sdp
->sd_log_head
!= sdp
->sd_log_flush_head
) {
753 log_write_header(sdp
, 0);
754 } else if (sdp
->sd_log_tail
!= current_tail(sdp
) && !sdp
->sd_log_idle
){
755 atomic_dec(&sdp
->sd_log_blks_free
); /* Adjust for unreserved buffer */
756 trace_gfs2_log_blocks(sdp
, -1);
757 log_write_header(sdp
, 0);
759 lops_after_commit(sdp
, tr
);
762 sdp
->sd_log_head
= sdp
->sd_log_flush_head
;
763 sdp
->sd_log_blks_reserved
= 0;
764 sdp
->sd_log_commited_revoke
= 0;
766 spin_lock(&sdp
->sd_ail_lock
);
767 if (tr
&& !list_empty(&tr
->tr_ail1_list
)) {
768 list_add(&tr
->tr_list
, &sdp
->sd_ail1_list
);
771 spin_unlock(&sdp
->sd_ail_lock
);
772 gfs2_log_unlock(sdp
);
774 if (type
!= NORMAL_FLUSH
) {
775 if (!sdp
->sd_log_idle
) {
777 gfs2_ail1_start(sdp
);
779 if (gfs2_ail1_empty(sdp
))
782 atomic_dec(&sdp
->sd_log_blks_free
); /* Adjust for unreserved buffer */
783 trace_gfs2_log_blocks(sdp
, -1);
784 log_write_header(sdp
, 0);
785 sdp
->sd_log_head
= sdp
->sd_log_flush_head
;
787 if (type
== SHUTDOWN_FLUSH
|| type
== FREEZE_FLUSH
)
788 gfs2_log_shutdown(sdp
);
789 if (type
== FREEZE_FLUSH
)
790 atomic_set(&sdp
->sd_freeze_state
, SFS_FROZEN
);
793 trace_gfs2_log_flush(sdp
, 0);
794 up_write(&sdp
->sd_log_flush_lock
);
800 * gfs2_merge_trans - Merge a new transaction into a cached transaction
801 * @old: Original transaction to be expanded
802 * @new: New transaction to be merged
805 static void gfs2_merge_trans(struct gfs2_sbd
*sdp
, struct gfs2_trans
*new)
807 struct gfs2_trans
*old
= sdp
->sd_log_tr
;
809 WARN_ON_ONCE(!test_bit(TR_ATTACHED
, &old
->tr_flags
));
811 old
->tr_num_buf_new
+= new->tr_num_buf_new
;
812 old
->tr_num_databuf_new
+= new->tr_num_databuf_new
;
813 old
->tr_num_buf_rm
+= new->tr_num_buf_rm
;
814 old
->tr_num_databuf_rm
+= new->tr_num_databuf_rm
;
815 old
->tr_num_revoke
+= new->tr_num_revoke
;
816 old
->tr_num_revoke_rm
+= new->tr_num_revoke_rm
;
818 list_splice_tail_init(&new->tr_databuf
, &old
->tr_databuf
);
819 list_splice_tail_init(&new->tr_buf
, &old
->tr_buf
);
821 spin_lock(&sdp
->sd_ail_lock
);
822 list_splice_tail_init(&new->tr_ail1_list
, &old
->tr_ail1_list
);
823 list_splice_tail_init(&new->tr_ail2_list
, &old
->tr_ail2_list
);
824 spin_unlock(&sdp
->sd_ail_lock
);
827 static void log_refund(struct gfs2_sbd
*sdp
, struct gfs2_trans
*tr
)
829 unsigned int reserved
;
835 if (sdp
->sd_log_tr
) {
836 gfs2_merge_trans(sdp
, tr
);
837 } else if (tr
->tr_num_buf_new
|| tr
->tr_num_databuf_new
) {
838 gfs2_assert_withdraw(sdp
, test_bit(TR_ALLOCED
, &tr
->tr_flags
));
840 set_bit(TR_ATTACHED
, &tr
->tr_flags
);
843 sdp
->sd_log_commited_revoke
+= tr
->tr_num_revoke
- tr
->tr_num_revoke_rm
;
844 reserved
= calc_reserved(sdp
);
845 maxres
= sdp
->sd_log_blks_reserved
+ tr
->tr_reserved
;
846 gfs2_assert_withdraw(sdp
, maxres
>= reserved
);
847 unused
= maxres
- reserved
;
848 atomic_add(unused
, &sdp
->sd_log_blks_free
);
849 trace_gfs2_log_blocks(sdp
, unused
);
850 gfs2_assert_withdraw(sdp
, atomic_read(&sdp
->sd_log_blks_free
) <=
851 sdp
->sd_jdesc
->jd_blocks
);
852 sdp
->sd_log_blks_reserved
= reserved
;
854 gfs2_log_unlock(sdp
);
858 * gfs2_log_commit - Commit a transaction to the log
859 * @sdp: the filesystem
860 * @tr: the transaction
862 * We wake up gfs2_logd if the number of pinned blocks exceed thresh1
863 * or the total number of used blocks (pinned blocks plus AIL blocks)
864 * is greater than thresh2.
866 * At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of
872 void gfs2_log_commit(struct gfs2_sbd
*sdp
, struct gfs2_trans
*tr
)
876 if (atomic_read(&sdp
->sd_log_pinned
) > atomic_read(&sdp
->sd_log_thresh1
) ||
877 ((sdp
->sd_jdesc
->jd_blocks
- atomic_read(&sdp
->sd_log_blks_free
)) >
878 atomic_read(&sdp
->sd_log_thresh2
)))
879 wake_up(&sdp
->sd_logd_waitq
);
883 * gfs2_log_shutdown - write a shutdown header into a journal
884 * @sdp: the filesystem
888 void gfs2_log_shutdown(struct gfs2_sbd
*sdp
)
890 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_blks_reserved
);
891 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_num_revoke
);
892 gfs2_assert_withdraw(sdp
, list_empty(&sdp
->sd_ail1_list
));
894 sdp
->sd_log_flush_head
= sdp
->sd_log_head
;
896 log_write_header(sdp
, GFS2_LOG_HEAD_UNMOUNT
);
898 gfs2_assert_warn(sdp
, sdp
->sd_log_head
== sdp
->sd_log_tail
);
899 gfs2_assert_warn(sdp
, list_empty(&sdp
->sd_ail2_list
));
901 sdp
->sd_log_head
= sdp
->sd_log_flush_head
;
902 sdp
->sd_log_tail
= sdp
->sd_log_head
;
905 static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd
*sdp
)
907 return (atomic_read(&sdp
->sd_log_pinned
) +
908 atomic_read(&sdp
->sd_log_blks_needed
) >=
909 atomic_read(&sdp
->sd_log_thresh1
));
912 static inline int gfs2_ail_flush_reqd(struct gfs2_sbd
*sdp
)
914 unsigned int used_blocks
= sdp
->sd_jdesc
->jd_blocks
- atomic_read(&sdp
->sd_log_blks_free
);
916 if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH
, &sdp
->sd_flags
))
919 return used_blocks
+ atomic_read(&sdp
->sd_log_blks_needed
) >=
920 atomic_read(&sdp
->sd_log_thresh2
);
924 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
925 * @sdp: Pointer to GFS2 superblock
927 * Also, periodically check to make sure that we're using the most recent
931 int gfs2_logd(void *data
)
933 struct gfs2_sbd
*sdp
= data
;
938 while (!kthread_should_stop()) {
940 /* Check for errors writing to the journal */
941 if (sdp
->sd_log_error
) {
942 gfs2_lm_withdraw(sdp
,
943 "GFS2: fsid=%s: error %d: "
944 "withdrawing the file system to "
945 "prevent further damage.\n",
946 sdp
->sd_fsname
, sdp
->sd_log_error
);
950 if (gfs2_jrnl_flush_reqd(sdp
) || t
== 0) {
951 gfs2_ail1_empty(sdp
);
952 gfs2_log_flush(sdp
, NULL
, NORMAL_FLUSH
);
956 if (gfs2_ail_flush_reqd(sdp
)) {
957 gfs2_ail1_start(sdp
);
959 gfs2_ail1_empty(sdp
);
960 gfs2_log_flush(sdp
, NULL
, NORMAL_FLUSH
);
964 if (!gfs2_ail_flush_reqd(sdp
) || did_flush
)
965 wake_up(&sdp
->sd_log_waitq
);
967 t
= gfs2_tune_get(sdp
, gt_logd_secs
) * HZ
;
972 prepare_to_wait(&sdp
->sd_logd_waitq
, &wait
,
974 if (!gfs2_ail_flush_reqd(sdp
) &&
975 !gfs2_jrnl_flush_reqd(sdp
) &&
976 !kthread_should_stop())
977 t
= schedule_timeout(t
);
978 } while(t
&& !gfs2_ail_flush_reqd(sdp
) &&
979 !gfs2_jrnl_flush_reqd(sdp
) &&
980 !kthread_should_stop());
981 finish_wait(&sdp
->sd_logd_waitq
, &wait
);