2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <linux/crc32.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/freezer.h>
20 #include <linux/bio.h>
21 #include <linux/writeback.h>
32 #include "trace_gfs2.h"
37 * gfs2_struct2blk - compute stuff
38 * @sdp: the filesystem
39 * @nstruct: the number of structures
40 * @ssize: the size of the structures
42 * Compute the number of log descriptor blocks needed to hold a certain number
43 * of structures of a certain size.
45 * Returns: the number of blocks needed (minimum is always 1)
48 unsigned int gfs2_struct2blk(struct gfs2_sbd
*sdp
, unsigned int nstruct
,
52 unsigned int first
, second
;
55 first
= (sdp
->sd_sb
.sb_bsize
- sizeof(struct gfs2_log_descriptor
)) / ssize
;
57 if (nstruct
> first
) {
58 second
= (sdp
->sd_sb
.sb_bsize
-
59 sizeof(struct gfs2_meta_header
)) / ssize
;
60 blks
+= DIV_ROUND_UP(nstruct
- first
, second
);
67 * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
68 * @mapping: The associated mapping (maybe NULL)
69 * @bd: The gfs2_bufdata to remove
71 * The ail lock _must_ be held when calling this function
75 void gfs2_remove_from_ail(struct gfs2_bufdata
*bd
)
78 list_del_init(&bd
->bd_ail_st_list
);
79 list_del_init(&bd
->bd_ail_gl_list
);
80 atomic_dec(&bd
->bd_gl
->gl_ail_count
);
85 * gfs2_ail1_start_one - Start I/O on a part of the AIL
86 * @sdp: the filesystem
87 * @wbc: The writeback control structure
88 * @ai: The ail structure
92 static int gfs2_ail1_start_one(struct gfs2_sbd
*sdp
,
93 struct writeback_control
*wbc
,
95 __releases(&sdp
->sd_ail_lock
)
96 __acquires(&sdp
->sd_ail_lock
)
98 struct gfs2_glock
*gl
= NULL
;
99 struct address_space
*mapping
;
100 struct gfs2_bufdata
*bd
, *s
;
101 struct buffer_head
*bh
;
103 list_for_each_entry_safe_reverse(bd
, s
, &ai
->ai_ail1_list
, bd_ail_st_list
) {
106 gfs2_assert(sdp
, bd
->bd_ail
== ai
);
108 if (!buffer_busy(bh
)) {
109 if (!buffer_uptodate(bh
))
110 gfs2_io_error_bh(sdp
, bh
);
111 list_move(&bd
->bd_ail_st_list
, &ai
->ai_ail2_list
);
115 if (!buffer_dirty(bh
))
120 list_move(&bd
->bd_ail_st_list
, &ai
->ai_ail1_list
);
121 mapping
= bh
->b_page
->mapping
;
124 spin_unlock(&sdp
->sd_ail_lock
);
125 generic_writepages(mapping
, wbc
);
126 spin_lock(&sdp
->sd_ail_lock
);
127 if (wbc
->nr_to_write
<= 0)
137 * gfs2_ail1_flush - start writeback of some ail1 entries
138 * @sdp: The super block
139 * @wbc: The writeback control structure
141 * Writes back some ail1 entries, according to the limits in the
142 * writeback control structure
145 void gfs2_ail1_flush(struct gfs2_sbd
*sdp
, struct writeback_control
*wbc
)
147 struct list_head
*head
= &sdp
->sd_ail1_list
;
150 trace_gfs2_ail_flush(sdp
, wbc
, 1);
151 spin_lock(&sdp
->sd_ail_lock
);
153 list_for_each_entry_reverse(ai
, head
, ai_list
) {
154 if (wbc
->nr_to_write
<= 0)
156 if (gfs2_ail1_start_one(sdp
, wbc
, ai
))
159 spin_unlock(&sdp
->sd_ail_lock
);
160 trace_gfs2_ail_flush(sdp
, wbc
, 0);
164 * gfs2_ail1_start - start writeback of all ail1 entries
165 * @sdp: The superblock
168 static void gfs2_ail1_start(struct gfs2_sbd
*sdp
)
170 struct writeback_control wbc
= {
171 .sync_mode
= WB_SYNC_NONE
,
172 .nr_to_write
= LONG_MAX
,
174 .range_end
= LLONG_MAX
,
177 return gfs2_ail1_flush(sdp
, &wbc
);
181 * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
182 * @sdp: the filesystem
187 static void gfs2_ail1_empty_one(struct gfs2_sbd
*sdp
, struct gfs2_ail
*ai
)
189 struct gfs2_bufdata
*bd
, *s
;
190 struct buffer_head
*bh
;
192 list_for_each_entry_safe_reverse(bd
, s
, &ai
->ai_ail1_list
,
195 gfs2_assert(sdp
, bd
->bd_ail
== ai
);
198 if (!buffer_uptodate(bh
))
199 gfs2_io_error_bh(sdp
, bh
);
200 list_move(&bd
->bd_ail_st_list
, &ai
->ai_ail2_list
);
206 * gfs2_ail1_empty - Try to empty the ail1 lists
207 * @sdp: The superblock
209 * Tries to empty the ail1 lists, starting with the oldest first
212 static int gfs2_ail1_empty(struct gfs2_sbd
*sdp
)
214 struct gfs2_ail
*ai
, *s
;
217 spin_lock(&sdp
->sd_ail_lock
);
218 list_for_each_entry_safe_reverse(ai
, s
, &sdp
->sd_ail1_list
, ai_list
) {
219 gfs2_ail1_empty_one(sdp
, ai
);
220 if (list_empty(&ai
->ai_ail1_list
))
221 list_move(&ai
->ai_list
, &sdp
->sd_ail2_list
);
225 ret
= list_empty(&sdp
->sd_ail1_list
);
226 spin_unlock(&sdp
->sd_ail_lock
);
231 static void gfs2_ail1_wait(struct gfs2_sbd
*sdp
)
234 struct gfs2_bufdata
*bd
;
235 struct buffer_head
*bh
;
237 spin_lock(&sdp
->sd_ail_lock
);
238 list_for_each_entry_reverse(ai
, &sdp
->sd_ail1_list
, ai_list
) {
239 list_for_each_entry(bd
, &ai
->ai_ail1_list
, bd_ail_st_list
) {
241 if (!buffer_locked(bh
))
244 spin_unlock(&sdp
->sd_ail_lock
);
250 spin_unlock(&sdp
->sd_ail_lock
);
254 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
255 * @sdp: the filesystem
260 static void gfs2_ail2_empty_one(struct gfs2_sbd
*sdp
, struct gfs2_ail
*ai
)
262 struct list_head
*head
= &ai
->ai_ail2_list
;
263 struct gfs2_bufdata
*bd
;
265 while (!list_empty(head
)) {
266 bd
= list_entry(head
->prev
, struct gfs2_bufdata
,
268 gfs2_assert(sdp
, bd
->bd_ail
== ai
);
269 gfs2_remove_from_ail(bd
);
273 static void ail2_empty(struct gfs2_sbd
*sdp
, unsigned int new_tail
)
275 struct gfs2_ail
*ai
, *safe
;
276 unsigned int old_tail
= sdp
->sd_log_tail
;
277 int wrap
= (new_tail
< old_tail
);
280 spin_lock(&sdp
->sd_ail_lock
);
282 list_for_each_entry_safe(ai
, safe
, &sdp
->sd_ail2_list
, ai_list
) {
283 a
= (old_tail
<= ai
->ai_first
);
284 b
= (ai
->ai_first
< new_tail
);
285 rm
= (wrap
) ? (a
|| b
) : (a
&& b
);
289 gfs2_ail2_empty_one(sdp
, ai
);
290 list_del(&ai
->ai_list
);
291 gfs2_assert_warn(sdp
, list_empty(&ai
->ai_ail1_list
));
292 gfs2_assert_warn(sdp
, list_empty(&ai
->ai_ail2_list
));
296 spin_unlock(&sdp
->sd_ail_lock
);
300 * gfs2_log_reserve - Make a log reservation
301 * @sdp: The GFS2 superblock
302 * @blks: The number of blocks to reserve
304 * Note that we never give out the last few blocks of the journal. Thats
305 * due to the fact that there is a small number of header blocks
306 * associated with each log flush. The exact number can't be known until
307 * flush time, so we ensure that we have just enough free blocks at all
308 * times to avoid running out during a log flush.
310 * We no longer flush the log here, instead we wake up logd to do that
311 * for us. To avoid the thundering herd and to ensure that we deal fairly
312 * with queued waiters, we use an exclusive wait. This means that when we
313 * get woken with enough journal space to get our reservation, we need to
314 * wake the next waiter on the list.
319 int gfs2_log_reserve(struct gfs2_sbd
*sdp
, unsigned int blks
)
321 unsigned reserved_blks
= 6 * (4096 / sdp
->sd_vfs
->s_blocksize
);
322 unsigned wanted
= blks
+ reserved_blks
;
325 unsigned int free_blocks
;
327 if (gfs2_assert_warn(sdp
, blks
) ||
328 gfs2_assert_warn(sdp
, blks
<= sdp
->sd_jdesc
->jd_blocks
))
331 free_blocks
= atomic_read(&sdp
->sd_log_blks_free
);
332 if (unlikely(free_blocks
<= wanted
)) {
334 prepare_to_wait_exclusive(&sdp
->sd_log_waitq
, &wait
,
335 TASK_UNINTERRUPTIBLE
);
336 wake_up(&sdp
->sd_logd_waitq
);
338 if (atomic_read(&sdp
->sd_log_blks_free
) <= wanted
)
340 free_blocks
= atomic_read(&sdp
->sd_log_blks_free
);
341 } while(free_blocks
<= wanted
);
342 finish_wait(&sdp
->sd_log_waitq
, &wait
);
344 if (atomic_cmpxchg(&sdp
->sd_log_blks_free
, free_blocks
,
345 free_blocks
- blks
) != free_blocks
)
347 trace_gfs2_log_blocks(sdp
, -blks
);
350 * If we waited, then so might others, wake them up _after_ we get
351 * our share of the log.
353 if (unlikely(did_wait
))
354 wake_up(&sdp
->sd_log_waitq
);
356 down_read(&sdp
->sd_log_flush_lock
);
361 static u64
log_bmap(struct gfs2_sbd
*sdp
, unsigned int lbn
)
363 struct gfs2_journal_extent
*je
;
365 list_for_each_entry(je
, &sdp
->sd_jdesc
->extent_list
, extent_list
) {
366 if (lbn
>= je
->lblock
&& lbn
< je
->lblock
+ je
->blocks
)
367 return je
->dblock
+ lbn
- je
->lblock
;
374 * log_distance - Compute distance between two journal blocks
375 * @sdp: The GFS2 superblock
376 * @newer: The most recent journal block of the pair
377 * @older: The older journal block of the pair
379 * Compute the distance (in the journal direction) between two
380 * blocks in the journal
382 * Returns: the distance in blocks
385 static inline unsigned int log_distance(struct gfs2_sbd
*sdp
, unsigned int newer
,
390 dist
= newer
- older
;
392 dist
+= sdp
->sd_jdesc
->jd_blocks
;
398 * calc_reserved - Calculate the number of blocks to reserve when
399 * refunding a transaction's unused buffers.
400 * @sdp: The GFS2 superblock
402 * This is complex. We need to reserve room for all our currently used
403 * metadata buffers (e.g. normal file I/O rewriting file time stamps) and
404 * all our journaled data buffers for journaled files (e.g. files in the
405 * meta_fs like rindex, or files for which chattr +j was done.)
406 * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
407 * will count it as free space (sd_log_blks_free) and corruption will follow.
409 * We can have metadata bufs and jdata bufs in the same journal. So each
410 * type gets its own log header, for which we need to reserve a block.
411 * In fact, each type has the potential for needing more than one header
412 * in cases where we have more buffers than will fit on a journal page.
413 * Metadata journal entries take up half the space of journaled buffer entries.
414 * Thus, metadata entries have buf_limit (502) and journaled buffers have
415 * databuf_limit (251) before they cause a wrap around.
417 * Also, we need to reserve blocks for revoke journal entries and one for an
418 * overall header for the lot.
420 * Returns: the number of blocks reserved
422 static unsigned int calc_reserved(struct gfs2_sbd
*sdp
)
424 unsigned int reserved
= 0;
425 unsigned int mbuf_limit
, metabufhdrs_needed
;
426 unsigned int dbuf_limit
, databufhdrs_needed
;
427 unsigned int revokes
= 0;
429 mbuf_limit
= buf_limit(sdp
);
430 metabufhdrs_needed
= (sdp
->sd_log_commited_buf
+
431 (mbuf_limit
- 1)) / mbuf_limit
;
432 dbuf_limit
= databuf_limit(sdp
);
433 databufhdrs_needed
= (sdp
->sd_log_commited_databuf
+
434 (dbuf_limit
- 1)) / dbuf_limit
;
436 if (sdp
->sd_log_commited_revoke
> 0)
437 revokes
= gfs2_struct2blk(sdp
, sdp
->sd_log_commited_revoke
,
440 reserved
= sdp
->sd_log_commited_buf
+ metabufhdrs_needed
+
441 sdp
->sd_log_commited_databuf
+ databufhdrs_needed
+
443 /* One for the overall header */
449 static unsigned int current_tail(struct gfs2_sbd
*sdp
)
454 spin_lock(&sdp
->sd_ail_lock
);
456 if (list_empty(&sdp
->sd_ail1_list
)) {
457 tail
= sdp
->sd_log_head
;
459 ai
= list_entry(sdp
->sd_ail1_list
.prev
, struct gfs2_ail
, ai_list
);
463 spin_unlock(&sdp
->sd_ail_lock
);
468 void gfs2_log_incr_head(struct gfs2_sbd
*sdp
)
470 if (sdp
->sd_log_flush_head
== sdp
->sd_log_tail
)
471 BUG_ON(sdp
->sd_log_flush_head
!= sdp
->sd_log_head
);
473 if (++sdp
->sd_log_flush_head
== sdp
->sd_jdesc
->jd_blocks
) {
474 sdp
->sd_log_flush_head
= 0;
475 sdp
->sd_log_flush_wrapped
= 1;
480 * gfs2_log_write_endio - End of I/O for a log buffer
481 * @bh: The buffer head
482 * @uptodate: I/O Status
486 static void gfs2_log_write_endio(struct buffer_head
*bh
, int uptodate
)
488 struct gfs2_sbd
*sdp
= bh
->b_private
;
489 bh
->b_private
= NULL
;
491 end_buffer_write_sync(bh
, uptodate
);
492 if (atomic_dec_and_test(&sdp
->sd_log_in_flight
))
493 wake_up(&sdp
->sd_log_flush_wait
);
497 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
498 * @sdp: The GFS2 superblock
500 * Returns: the buffer_head
503 struct buffer_head
*gfs2_log_get_buf(struct gfs2_sbd
*sdp
)
505 u64 blkno
= log_bmap(sdp
, sdp
->sd_log_flush_head
);
506 struct buffer_head
*bh
;
508 bh
= sb_getblk(sdp
->sd_vfs
, blkno
);
510 memset(bh
->b_data
, 0, bh
->b_size
);
511 set_buffer_uptodate(bh
);
512 clear_buffer_dirty(bh
);
513 gfs2_log_incr_head(sdp
);
514 atomic_inc(&sdp
->sd_log_in_flight
);
516 bh
->b_end_io
= gfs2_log_write_endio
;
522 * gfs2_fake_write_endio -
523 * @bh: The buffer head
524 * @uptodate: The I/O Status
528 static void gfs2_fake_write_endio(struct buffer_head
*bh
, int uptodate
)
530 struct buffer_head
*real_bh
= bh
->b_private
;
531 struct gfs2_bufdata
*bd
= real_bh
->b_private
;
532 struct gfs2_sbd
*sdp
= bd
->bd_gl
->gl_sbd
;
534 end_buffer_write_sync(bh
, uptodate
);
535 free_buffer_head(bh
);
536 unlock_buffer(real_bh
);
538 if (atomic_dec_and_test(&sdp
->sd_log_in_flight
))
539 wake_up(&sdp
->sd_log_flush_wait
);
543 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
544 * @sdp: the filesystem
545 * @data: the data the buffer_head should point to
547 * Returns: the log buffer descriptor
550 struct buffer_head
*gfs2_log_fake_buf(struct gfs2_sbd
*sdp
,
551 struct buffer_head
*real
)
553 u64 blkno
= log_bmap(sdp
, sdp
->sd_log_flush_head
);
554 struct buffer_head
*bh
;
556 bh
= alloc_buffer_head(GFP_NOFS
| __GFP_NOFAIL
);
557 atomic_set(&bh
->b_count
, 1);
558 bh
->b_state
= (1 << BH_Mapped
) | (1 << BH_Uptodate
) | (1 << BH_Lock
);
559 set_bh_page(bh
, real
->b_page
, bh_offset(real
));
560 bh
->b_blocknr
= blkno
;
561 bh
->b_size
= sdp
->sd_sb
.sb_bsize
;
562 bh
->b_bdev
= sdp
->sd_vfs
->s_bdev
;
563 bh
->b_private
= real
;
564 bh
->b_end_io
= gfs2_fake_write_endio
;
566 gfs2_log_incr_head(sdp
);
567 atomic_inc(&sdp
->sd_log_in_flight
);
572 static void log_pull_tail(struct gfs2_sbd
*sdp
, unsigned int new_tail
)
574 unsigned int dist
= log_distance(sdp
, new_tail
, sdp
->sd_log_tail
);
576 ail2_empty(sdp
, new_tail
);
578 atomic_add(dist
, &sdp
->sd_log_blks_free
);
579 trace_gfs2_log_blocks(sdp
, dist
);
580 gfs2_assert_withdraw(sdp
, atomic_read(&sdp
->sd_log_blks_free
) <=
581 sdp
->sd_jdesc
->jd_blocks
);
583 sdp
->sd_log_tail
= new_tail
;
587 * log_write_header - Get and initialize a journal header buffer
588 * @sdp: The GFS2 superblock
590 * Returns: the initialized log buffer descriptor
593 static void log_write_header(struct gfs2_sbd
*sdp
, u32 flags
, int pull
)
595 u64 blkno
= log_bmap(sdp
, sdp
->sd_log_flush_head
);
596 struct buffer_head
*bh
;
597 struct gfs2_log_header
*lh
;
601 bh
= sb_getblk(sdp
->sd_vfs
, blkno
);
603 memset(bh
->b_data
, 0, bh
->b_size
);
604 set_buffer_uptodate(bh
);
605 clear_buffer_dirty(bh
);
607 gfs2_ail1_empty(sdp
);
608 tail
= current_tail(sdp
);
610 lh
= (struct gfs2_log_header
*)bh
->b_data
;
611 memset(lh
, 0, sizeof(struct gfs2_log_header
));
612 lh
->lh_header
.mh_magic
= cpu_to_be32(GFS2_MAGIC
);
613 lh
->lh_header
.mh_type
= cpu_to_be32(GFS2_METATYPE_LH
);
614 lh
->lh_header
.__pad0
= cpu_to_be64(0);
615 lh
->lh_header
.mh_format
= cpu_to_be32(GFS2_FORMAT_LH
);
616 lh
->lh_header
.mh_jid
= cpu_to_be32(sdp
->sd_jdesc
->jd_jid
);
617 lh
->lh_sequence
= cpu_to_be64(sdp
->sd_log_sequence
++);
618 lh
->lh_flags
= cpu_to_be32(flags
);
619 lh
->lh_tail
= cpu_to_be32(tail
);
620 lh
->lh_blkno
= cpu_to_be32(sdp
->sd_log_flush_head
);
621 hash
= gfs2_disk_hash(bh
->b_data
, sizeof(struct gfs2_log_header
));
622 lh
->lh_hash
= cpu_to_be32(hash
);
624 bh
->b_end_io
= end_buffer_write_sync
;
626 if (test_bit(SDF_NOBARRIERS
, &sdp
->sd_flags
))
627 submit_bh(WRITE_SYNC
| REQ_META
| REQ_PRIO
, bh
);
629 submit_bh(WRITE_FLUSH_FUA
| REQ_META
, bh
);
632 if (!buffer_uptodate(bh
))
633 gfs2_io_error_bh(sdp
, bh
);
636 if (sdp
->sd_log_tail
!= tail
)
637 log_pull_tail(sdp
, tail
);
639 gfs2_assert_withdraw(sdp
, !pull
);
641 sdp
->sd_log_idle
= (tail
== sdp
->sd_log_flush_head
);
642 gfs2_log_incr_head(sdp
);
645 static void log_flush_commit(struct gfs2_sbd
*sdp
)
649 if (atomic_read(&sdp
->sd_log_in_flight
)) {
651 prepare_to_wait(&sdp
->sd_log_flush_wait
, &wait
,
652 TASK_UNINTERRUPTIBLE
);
653 if (atomic_read(&sdp
->sd_log_in_flight
))
655 } while(atomic_read(&sdp
->sd_log_in_flight
));
656 finish_wait(&sdp
->sd_log_flush_wait
, &wait
);
659 log_write_header(sdp
, 0, 0);
662 static void gfs2_ordered_write(struct gfs2_sbd
*sdp
)
664 struct gfs2_bufdata
*bd
;
665 struct buffer_head
*bh
;
669 while (!list_empty(&sdp
->sd_log_le_ordered
)) {
670 bd
= list_entry(sdp
->sd_log_le_ordered
.next
, struct gfs2_bufdata
, bd_le
.le_list
);
671 list_move(&bd
->bd_le
.le_list
, &written
);
673 if (!buffer_dirty(bh
))
676 gfs2_log_unlock(sdp
);
678 if (buffer_mapped(bh
) && test_clear_buffer_dirty(bh
)) {
679 bh
->b_end_io
= end_buffer_write_sync
;
680 submit_bh(WRITE_SYNC
, bh
);
687 list_splice(&written
, &sdp
->sd_log_le_ordered
);
688 gfs2_log_unlock(sdp
);
691 static void gfs2_ordered_wait(struct gfs2_sbd
*sdp
)
693 struct gfs2_bufdata
*bd
;
694 struct buffer_head
*bh
;
697 while (!list_empty(&sdp
->sd_log_le_ordered
)) {
698 bd
= list_entry(sdp
->sd_log_le_ordered
.prev
, struct gfs2_bufdata
, bd_le
.le_list
);
700 if (buffer_locked(bh
)) {
702 gfs2_log_unlock(sdp
);
708 list_del_init(&bd
->bd_le
.le_list
);
710 gfs2_log_unlock(sdp
);
714 * gfs2_log_flush - flush incore transaction(s)
715 * @sdp: the filesystem
716 * @gl: The glock structure to flush. If NULL, flush the whole incore log
720 void gfs2_log_flush(struct gfs2_sbd
*sdp
, struct gfs2_glock
*gl
)
724 down_write(&sdp
->sd_log_flush_lock
);
726 /* Log might have been flushed while we waited for the flush lock */
727 if (gl
&& !test_bit(GLF_LFLUSH
, &gl
->gl_flags
)) {
728 up_write(&sdp
->sd_log_flush_lock
);
731 trace_gfs2_log_flush(sdp
, 1);
733 ai
= kzalloc(sizeof(struct gfs2_ail
), GFP_NOFS
| __GFP_NOFAIL
);
734 INIT_LIST_HEAD(&ai
->ai_ail1_list
);
735 INIT_LIST_HEAD(&ai
->ai_ail2_list
);
737 if (sdp
->sd_log_num_buf
!= sdp
->sd_log_commited_buf
) {
738 printk(KERN_INFO
"GFS2: log buf %u %u\n", sdp
->sd_log_num_buf
,
739 sdp
->sd_log_commited_buf
);
740 gfs2_assert_withdraw(sdp
, 0);
742 if (sdp
->sd_log_num_databuf
!= sdp
->sd_log_commited_databuf
) {
743 printk(KERN_INFO
"GFS2: log databuf %u %u\n",
744 sdp
->sd_log_num_databuf
, sdp
->sd_log_commited_databuf
);
745 gfs2_assert_withdraw(sdp
, 0);
747 gfs2_assert_withdraw(sdp
,
748 sdp
->sd_log_num_revoke
== sdp
->sd_log_commited_revoke
);
750 sdp
->sd_log_flush_head
= sdp
->sd_log_head
;
751 sdp
->sd_log_flush_wrapped
= 0;
752 ai
->ai_first
= sdp
->sd_log_flush_head
;
754 gfs2_ordered_write(sdp
);
755 lops_before_commit(sdp
);
756 gfs2_ordered_wait(sdp
);
758 if (sdp
->sd_log_head
!= sdp
->sd_log_flush_head
)
759 log_flush_commit(sdp
);
760 else if (sdp
->sd_log_tail
!= current_tail(sdp
) && !sdp
->sd_log_idle
){
762 atomic_dec(&sdp
->sd_log_blks_free
); /* Adjust for unreserved buffer */
763 trace_gfs2_log_blocks(sdp
, -1);
764 gfs2_log_unlock(sdp
);
765 log_write_header(sdp
, 0, PULL
);
767 lops_after_commit(sdp
, ai
);
770 sdp
->sd_log_head
= sdp
->sd_log_flush_head
;
771 sdp
->sd_log_blks_reserved
= 0;
772 sdp
->sd_log_commited_buf
= 0;
773 sdp
->sd_log_commited_databuf
= 0;
774 sdp
->sd_log_commited_revoke
= 0;
776 spin_lock(&sdp
->sd_ail_lock
);
777 if (!list_empty(&ai
->ai_ail1_list
)) {
778 list_add(&ai
->ai_list
, &sdp
->sd_ail1_list
);
781 spin_unlock(&sdp
->sd_ail_lock
);
782 gfs2_log_unlock(sdp
);
783 trace_gfs2_log_flush(sdp
, 0);
784 up_write(&sdp
->sd_log_flush_lock
);
789 static void log_refund(struct gfs2_sbd
*sdp
, struct gfs2_trans
*tr
)
791 unsigned int reserved
;
796 sdp
->sd_log_commited_buf
+= tr
->tr_num_buf_new
- tr
->tr_num_buf_rm
;
797 sdp
->sd_log_commited_databuf
+= tr
->tr_num_databuf_new
-
798 tr
->tr_num_databuf_rm
;
799 gfs2_assert_withdraw(sdp
, (((int)sdp
->sd_log_commited_buf
) >= 0) ||
800 (((int)sdp
->sd_log_commited_databuf
) >= 0));
801 sdp
->sd_log_commited_revoke
+= tr
->tr_num_revoke
- tr
->tr_num_revoke_rm
;
802 reserved
= calc_reserved(sdp
);
803 gfs2_assert_withdraw(sdp
, sdp
->sd_log_blks_reserved
+ tr
->tr_reserved
>= reserved
);
804 unused
= sdp
->sd_log_blks_reserved
- reserved
+ tr
->tr_reserved
;
805 atomic_add(unused
, &sdp
->sd_log_blks_free
);
806 trace_gfs2_log_blocks(sdp
, unused
);
807 gfs2_assert_withdraw(sdp
, atomic_read(&sdp
->sd_log_blks_free
) <=
808 sdp
->sd_jdesc
->jd_blocks
);
809 sdp
->sd_log_blks_reserved
= reserved
;
811 gfs2_log_unlock(sdp
);
814 static void buf_lo_incore_commit(struct gfs2_sbd
*sdp
, struct gfs2_trans
*tr
)
816 struct list_head
*head
= &tr
->tr_list_buf
;
817 struct gfs2_bufdata
*bd
;
820 while (!list_empty(head
)) {
821 bd
= list_entry(head
->next
, struct gfs2_bufdata
, bd_list_tr
);
822 list_del_init(&bd
->bd_list_tr
);
825 gfs2_log_unlock(sdp
);
826 gfs2_assert_warn(sdp
, !tr
->tr_num_buf
);
830 * gfs2_log_commit - Commit a transaction to the log
831 * @sdp: the filesystem
832 * @tr: the transaction
834 * We wake up gfs2_logd if the number of pinned blocks exceed thresh1
835 * or the total number of used blocks (pinned blocks plus AIL blocks)
836 * is greater than thresh2.
838 * At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of
844 void gfs2_log_commit(struct gfs2_sbd
*sdp
, struct gfs2_trans
*tr
)
847 buf_lo_incore_commit(sdp
, tr
);
849 up_read(&sdp
->sd_log_flush_lock
);
851 if (atomic_read(&sdp
->sd_log_pinned
) > atomic_read(&sdp
->sd_log_thresh1
) ||
852 ((sdp
->sd_jdesc
->jd_blocks
- atomic_read(&sdp
->sd_log_blks_free
)) >
853 atomic_read(&sdp
->sd_log_thresh2
)))
854 wake_up(&sdp
->sd_logd_waitq
);
858 * gfs2_log_shutdown - write a shutdown header into a journal
859 * @sdp: the filesystem
863 void gfs2_log_shutdown(struct gfs2_sbd
*sdp
)
865 down_write(&sdp
->sd_log_flush_lock
);
867 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_blks_reserved
);
868 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_num_buf
);
869 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_num_revoke
);
870 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_num_rg
);
871 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_num_databuf
);
872 gfs2_assert_withdraw(sdp
, list_empty(&sdp
->sd_ail1_list
));
874 sdp
->sd_log_flush_head
= sdp
->sd_log_head
;
875 sdp
->sd_log_flush_wrapped
= 0;
877 log_write_header(sdp
, GFS2_LOG_HEAD_UNMOUNT
,
878 (sdp
->sd_log_tail
== current_tail(sdp
)) ? 0 : PULL
);
880 gfs2_assert_warn(sdp
, atomic_read(&sdp
->sd_log_blks_free
) == sdp
->sd_jdesc
->jd_blocks
);
881 gfs2_assert_warn(sdp
, sdp
->sd_log_head
== sdp
->sd_log_tail
);
882 gfs2_assert_warn(sdp
, list_empty(&sdp
->sd_ail2_list
));
884 sdp
->sd_log_head
= sdp
->sd_log_flush_head
;
885 sdp
->sd_log_tail
= sdp
->sd_log_head
;
887 up_write(&sdp
->sd_log_flush_lock
);
892 * gfs2_meta_syncfs - sync all the buffers in a filesystem
893 * @sdp: the filesystem
897 void gfs2_meta_syncfs(struct gfs2_sbd
*sdp
)
899 gfs2_log_flush(sdp
, NULL
);
901 gfs2_ail1_start(sdp
);
903 if (gfs2_ail1_empty(sdp
))
906 gfs2_log_flush(sdp
, NULL
);
909 static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd
*sdp
)
911 return (atomic_read(&sdp
->sd_log_pinned
) >= atomic_read(&sdp
->sd_log_thresh1
));
914 static inline int gfs2_ail_flush_reqd(struct gfs2_sbd
*sdp
)
916 unsigned int used_blocks
= sdp
->sd_jdesc
->jd_blocks
- atomic_read(&sdp
->sd_log_blks_free
);
917 return used_blocks
>= atomic_read(&sdp
->sd_log_thresh2
);
921 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
922 * @sdp: Pointer to GFS2 superblock
924 * Also, periodically check to make sure that we're using the most recent
928 int gfs2_logd(void *data
)
930 struct gfs2_sbd
*sdp
= data
;
935 while (!kthread_should_stop()) {
937 preflush
= atomic_read(&sdp
->sd_log_pinned
);
938 if (gfs2_jrnl_flush_reqd(sdp
) || t
== 0) {
939 gfs2_ail1_empty(sdp
);
940 gfs2_log_flush(sdp
, NULL
);
943 if (gfs2_ail_flush_reqd(sdp
)) {
944 gfs2_ail1_start(sdp
);
946 gfs2_ail1_empty(sdp
);
947 gfs2_log_flush(sdp
, NULL
);
950 if (!gfs2_ail_flush_reqd(sdp
))
951 wake_up(&sdp
->sd_log_waitq
);
953 t
= gfs2_tune_get(sdp
, gt_logd_secs
) * HZ
;
958 prepare_to_wait(&sdp
->sd_logd_waitq
, &wait
,
960 if (!gfs2_ail_flush_reqd(sdp
) &&
961 !gfs2_jrnl_flush_reqd(sdp
) &&
962 !kthread_should_stop())
963 t
= schedule_timeout(t
);
964 } while(t
&& !gfs2_ail_flush_reqd(sdp
) &&
965 !gfs2_jrnl_flush_reqd(sdp
) &&
966 !kthread_should_stop());
967 finish_wait(&sdp
->sd_logd_waitq
, &wait
);