2 * Copyright (C) International Business Machines Corp., 2000-2005
3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <linux/module.h>
23 #include <linux/bio.h>
24 #include <linux/slab.h>
25 #include <linux/init.h>
26 #include <linux/buffer_head.h>
27 #include <linux/mempool.h>
28 #include <linux/seq_file.h>
29 #include "jfs_incore.h"
30 #include "jfs_superblock.h"
31 #include "jfs_filsys.h"
32 #include "jfs_metapage.h"
33 #include "jfs_txnmgr.h"
34 #include "jfs_debug.h"
36 #ifdef CONFIG_JFS_STATISTICS
38 uint pagealloc
; /* # of page allocations */
39 uint pagefree
; /* # of page frees */
40 uint lockwait
; /* # of sleeping lock_metapage() calls */
44 #define metapage_locked(mp) test_bit(META_locked, &(mp)->flag)
45 #define trylock_metapage(mp) test_and_set_bit_lock(META_locked, &(mp)->flag)
47 static inline void unlock_metapage(struct metapage
*mp
)
49 clear_bit_unlock(META_locked
, &mp
->flag
);
53 static inline void __lock_metapage(struct metapage
*mp
)
55 DECLARE_WAITQUEUE(wait
, current
);
56 INCREMENT(mpStat
.lockwait
);
57 add_wait_queue_exclusive(&mp
->wait
, &wait
);
59 set_current_state(TASK_UNINTERRUPTIBLE
);
60 if (metapage_locked(mp
)) {
61 unlock_page(mp
->page
);
65 } while (trylock_metapage(mp
));
66 __set_current_state(TASK_RUNNING
);
67 remove_wait_queue(&mp
->wait
, &wait
);
71 * Must have mp->page locked
73 static inline void lock_metapage(struct metapage
*mp
)
75 if (trylock_metapage(mp
))
79 #define METAPOOL_MIN_PAGES 32
80 static struct kmem_cache
*metapage_cache
;
81 static mempool_t
*metapage_mempool
;
83 #define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
90 struct metapage
*mp
[MPS_PER_PAGE
];
92 #define mp_anchor(page) ((struct meta_anchor *)page_private(page))
94 static inline struct metapage
*page_to_mp(struct page
*page
, int offset
)
96 if (!PagePrivate(page
))
98 return mp_anchor(page
)->mp
[offset
>> L2PSIZE
];
101 static inline int insert_metapage(struct page
*page
, struct metapage
*mp
)
103 struct meta_anchor
*a
;
105 int l2mp_blocks
; /* log2 blocks per metapage */
107 if (PagePrivate(page
))
110 a
= kzalloc(sizeof(struct meta_anchor
), GFP_NOFS
);
113 set_page_private(page
, (unsigned long)a
);
114 SetPagePrivate(page
);
119 l2mp_blocks
= L2PSIZE
- page
->mapping
->host
->i_blkbits
;
120 index
= (mp
->index
>> l2mp_blocks
) & (MPS_PER_PAGE
- 1);
128 static inline void remove_metapage(struct page
*page
, struct metapage
*mp
)
130 struct meta_anchor
*a
= mp_anchor(page
);
131 int l2mp_blocks
= L2PSIZE
- page
->mapping
->host
->i_blkbits
;
134 index
= (mp
->index
>> l2mp_blocks
) & (MPS_PER_PAGE
- 1);
136 BUG_ON(a
->mp
[index
] != mp
);
139 if (--a
->mp_count
== 0) {
141 set_page_private(page
, 0);
142 ClearPagePrivate(page
);
147 static inline void inc_io(struct page
*page
)
149 atomic_inc(&mp_anchor(page
)->io_count
);
152 static inline void dec_io(struct page
*page
, void (*handler
) (struct page
*))
154 if (atomic_dec_and_test(&mp_anchor(page
)->io_count
))
159 static inline struct metapage
*page_to_mp(struct page
*page
, int offset
)
161 return PagePrivate(page
) ? (struct metapage
*)page_private(page
) : NULL
;
164 static inline int insert_metapage(struct page
*page
, struct metapage
*mp
)
167 set_page_private(page
, (unsigned long)mp
);
168 SetPagePrivate(page
);
174 static inline void remove_metapage(struct page
*page
, struct metapage
*mp
)
176 set_page_private(page
, 0);
177 ClearPagePrivate(page
);
181 #define inc_io(page) do {} while(0)
182 #define dec_io(page, handler) handler(page)
186 static void init_once(void *foo
)
188 struct metapage
*mp
= (struct metapage
*)foo
;
196 set_bit(META_free
, &mp
->flag
);
197 init_waitqueue_head(&mp
->wait
);
200 static inline struct metapage
*alloc_metapage(gfp_t gfp_mask
)
202 return mempool_alloc(metapage_mempool
, gfp_mask
);
205 static inline void free_metapage(struct metapage
*mp
)
208 set_bit(META_free
, &mp
->flag
);
210 mempool_free(mp
, metapage_mempool
);
213 int __init
metapage_init(void)
216 * Allocate the metapage structures
218 metapage_cache
= kmem_cache_create("jfs_mp", sizeof(struct metapage
),
220 if (metapage_cache
== NULL
)
223 metapage_mempool
= mempool_create_slab_pool(METAPOOL_MIN_PAGES
,
226 if (metapage_mempool
== NULL
) {
227 kmem_cache_destroy(metapage_cache
);
234 void metapage_exit(void)
236 mempool_destroy(metapage_mempool
);
237 kmem_cache_destroy(metapage_cache
);
240 static inline void drop_metapage(struct page
*page
, struct metapage
*mp
)
242 if (mp
->count
|| mp
->nohomeok
|| test_bit(META_dirty
, &mp
->flag
) ||
243 test_bit(META_io
, &mp
->flag
))
245 remove_metapage(page
, mp
);
246 INCREMENT(mpStat
.pagefree
);
251 * Metapage address space operations
254 static sector_t
metapage_get_blocks(struct inode
*inode
, sector_t lblock
,
260 sector_t file_blocks
= (inode
->i_size
+ inode
->i_sb
->s_blocksize
- 1) >>
263 if (lblock
>= file_blocks
)
265 if (lblock
+ *len
> file_blocks
)
266 *len
= file_blocks
- lblock
;
269 rc
= xtLookup(inode
, (s64
)lblock
, *len
, &xflag
, &xaddr
, len
, 0);
270 if ((rc
== 0) && *len
)
271 lblock
= (sector_t
)xaddr
;
274 } /* else no mapping */
279 static void last_read_complete(struct page
*page
)
281 if (!PageError(page
))
282 SetPageUptodate(page
);
286 static void metapage_read_end_io(struct bio
*bio
, int err
)
288 struct page
*page
= bio
->bi_private
;
290 if (!test_bit(BIO_UPTODATE
, &bio
->bi_flags
)) {
291 printk(KERN_ERR
"metapage_read_end_io: I/O error\n");
295 dec_io(page
, last_read_complete
);
299 static void remove_from_logsync(struct metapage
*mp
)
301 struct jfs_log
*log
= mp
->log
;
304 * This can race. Recheck that log hasn't been set to null, and after
305 * acquiring logsync lock, recheck lsn
310 LOGSYNC_LOCK(log
, flags
);
316 list_del(&mp
->synclist
);
318 LOGSYNC_UNLOCK(log
, flags
);
321 static void last_write_complete(struct page
*page
)
326 for (offset
= 0; offset
< PAGE_CACHE_SIZE
; offset
+= PSIZE
) {
327 mp
= page_to_mp(page
, offset
);
328 if (mp
&& test_bit(META_io
, &mp
->flag
)) {
330 remove_from_logsync(mp
);
331 clear_bit(META_io
, &mp
->flag
);
334 * I'd like to call drop_metapage here, but I don't think it's
335 * safe unless I have the page locked
338 end_page_writeback(page
);
341 static void metapage_write_end_io(struct bio
*bio
, int err
)
343 struct page
*page
= bio
->bi_private
;
345 BUG_ON(!PagePrivate(page
));
347 if (! test_bit(BIO_UPTODATE
, &bio
->bi_flags
)) {
348 printk(KERN_ERR
"metapage_write_end_io: I/O error\n");
351 dec_io(page
, last_write_complete
);
355 static int metapage_writepage(struct page
*page
, struct writeback_control
*wbc
)
357 struct bio
*bio
= NULL
;
358 int block_offset
; /* block offset of mp within page */
359 struct inode
*inode
= page
->mapping
->host
;
360 int blocks_per_mp
= JFS_SBI(inode
->i_sb
)->nbperpage
;
368 sector_t next_block
= 0;
370 unsigned long bio_bytes
= 0;
371 unsigned long bio_offset
= 0;
375 page_start
= (sector_t
)page
->index
<<
376 (PAGE_CACHE_SHIFT
- inode
->i_blkbits
);
377 BUG_ON(!PageLocked(page
));
378 BUG_ON(PageWriteback(page
));
379 set_page_writeback(page
);
381 for (offset
= 0; offset
< PAGE_CACHE_SIZE
; offset
+= PSIZE
) {
382 mp
= page_to_mp(page
, offset
);
384 if (!mp
|| !test_bit(META_dirty
, &mp
->flag
))
387 if (mp
->nohomeok
&& !test_bit(META_forcewrite
, &mp
->flag
)) {
390 * Make sure this page isn't blocked indefinitely.
391 * If the journal isn't undergoing I/O, push it
393 if (mp
->log
&& !(mp
->log
->cflag
& logGC_PAGEOUT
))
394 jfs_flush_journal(mp
->log
, 0);
398 clear_bit(META_dirty
, &mp
->flag
);
399 set_bit(META_io
, &mp
->flag
);
400 block_offset
= offset
>> inode
->i_blkbits
;
401 lblock
= page_start
+ block_offset
;
403 if (xlen
&& lblock
== next_block
) {
404 /* Contiguous, in memory & on disk */
405 len
= min(xlen
, blocks_per_mp
);
407 bio_bytes
+= len
<< inode
->i_blkbits
;
411 if (bio_add_page(bio
, page
, bio_bytes
, bio_offset
) <
415 * Increment counter before submitting i/o to keep
416 * count from hitting zero before we're through
421 submit_bio(WRITE
, bio
);
426 xlen
= (PAGE_CACHE_SIZE
- offset
) >> inode
->i_blkbits
;
427 pblock
= metapage_get_blocks(inode
, lblock
, &xlen
);
429 printk(KERN_ERR
"JFS: metapage_get_blocks failed\n");
431 * We already called inc_io(), but can't cancel it
432 * with dec_io() until we're done with the page
437 len
= min(xlen
, (int)JFS_SBI(inode
->i_sb
)->nbperpage
);
439 bio
= bio_alloc(GFP_NOFS
, 1);
440 bio
->bi_bdev
= inode
->i_sb
->s_bdev
;
441 bio
->bi_sector
= pblock
<< (inode
->i_blkbits
- 9);
442 bio
->bi_end_io
= metapage_write_end_io
;
443 bio
->bi_private
= page
;
445 /* Don't call bio_add_page yet, we may add to this vec */
447 bio_bytes
= len
<< inode
->i_blkbits
;
450 next_block
= lblock
+ len
;
453 if (bio_add_page(bio
, page
, bio_bytes
, bio_offset
) < bio_bytes
)
458 submit_bio(WRITE
, bio
);
462 redirty_page_for_writepage(wbc
, page
);
469 if (nr_underway
== 0)
470 end_page_writeback(page
);
474 /* We should never reach here, since we're only adding one vec */
475 printk(KERN_ERR
"JFS: bio_add_page failed unexpectedly\n");
478 print_hex_dump(KERN_ERR
, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS
, 16,
479 4, bio
, sizeof(*bio
), 0);
483 dec_io(page
, last_write_complete
);
486 dec_io(page
, last_write_complete
);
490 static int metapage_readpage(struct file
*fp
, struct page
*page
)
492 struct inode
*inode
= page
->mapping
->host
;
493 struct bio
*bio
= NULL
;
495 int blocks_per_page
= PAGE_CACHE_SIZE
>> inode
->i_blkbits
;
496 sector_t page_start
; /* address of page in fs blocks */
502 BUG_ON(!PageLocked(page
));
503 page_start
= (sector_t
)page
->index
<<
504 (PAGE_CACHE_SHIFT
- inode
->i_blkbits
);
507 while (block_offset
< blocks_per_page
) {
508 xlen
= blocks_per_page
- block_offset
;
509 pblock
= metapage_get_blocks(inode
, page_start
+ block_offset
,
512 if (!PagePrivate(page
))
513 insert_metapage(page
, NULL
);
516 submit_bio(READ
, bio
);
518 bio
= bio_alloc(GFP_NOFS
, 1);
519 bio
->bi_bdev
= inode
->i_sb
->s_bdev
;
520 bio
->bi_sector
= pblock
<< (inode
->i_blkbits
- 9);
521 bio
->bi_end_io
= metapage_read_end_io
;
522 bio
->bi_private
= page
;
523 len
= xlen
<< inode
->i_blkbits
;
524 offset
= block_offset
<< inode
->i_blkbits
;
525 if (bio_add_page(bio
, page
, len
, offset
) < len
)
527 block_offset
+= xlen
;
532 submit_bio(READ
, bio
);
539 printk(KERN_ERR
"JFS: bio_add_page failed unexpectedly\n");
541 dec_io(page
, last_read_complete
);
545 static int metapage_releasepage(struct page
*page
, gfp_t gfp_mask
)
551 for (offset
= 0; offset
< PAGE_CACHE_SIZE
; offset
+= PSIZE
) {
552 mp
= page_to_mp(page
, offset
);
557 jfs_info("metapage_releasepage: mp = 0x%p", mp
);
558 if (mp
->count
|| mp
->nohomeok
||
559 test_bit(META_dirty
, &mp
->flag
)) {
560 jfs_info("count = %ld, nohomeok = %d", mp
->count
,
566 remove_from_logsync(mp
);
567 remove_metapage(page
, mp
);
568 INCREMENT(mpStat
.pagefree
);
574 static void metapage_invalidatepage(struct page
*page
, unsigned int offset
,
577 BUG_ON(offset
|| length
< PAGE_CACHE_SIZE
);
579 BUG_ON(PageWriteback(page
));
581 metapage_releasepage(page
, 0);
584 const struct address_space_operations jfs_metapage_aops
= {
585 .readpage
= metapage_readpage
,
586 .writepage
= metapage_writepage
,
587 .releasepage
= metapage_releasepage
,
588 .invalidatepage
= metapage_invalidatepage
,
589 .set_page_dirty
= __set_page_dirty_nobuffers
,
592 struct metapage
*__get_metapage(struct inode
*inode
, unsigned long lblock
,
593 unsigned int size
, int absolute
,
598 struct address_space
*mapping
;
599 struct metapage
*mp
= NULL
;
601 unsigned long page_index
;
602 unsigned long page_offset
;
604 jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d",
605 inode
->i_ino
, lblock
, absolute
);
607 l2bsize
= inode
->i_blkbits
;
608 l2BlocksPerPage
= PAGE_CACHE_SHIFT
- l2bsize
;
609 page_index
= lblock
>> l2BlocksPerPage
;
610 page_offset
= (lblock
- (page_index
<< l2BlocksPerPage
)) << l2bsize
;
611 if ((page_offset
+ size
) > PAGE_CACHE_SIZE
) {
612 jfs_err("MetaData crosses page boundary!!");
613 jfs_err("lblock = %lx, size = %d", lblock
, size
);
618 mapping
= JFS_SBI(inode
->i_sb
)->direct_inode
->i_mapping
;
621 * If an nfs client tries to read an inode that is larger
622 * than any existing inodes, we may try to read past the
623 * end of the inode map
625 if ((lblock
<< inode
->i_blkbits
) >= inode
->i_size
)
627 mapping
= inode
->i_mapping
;
630 if (new && (PSIZE
== PAGE_CACHE_SIZE
)) {
631 page
= grab_cache_page(mapping
, page_index
);
633 jfs_err("grab_cache_page failed!");
636 SetPageUptodate(page
);
638 page
= read_mapping_page(mapping
, page_index
, NULL
);
639 if (IS_ERR(page
) || !PageUptodate(page
)) {
640 jfs_err("read_mapping_page failed!");
646 mp
= page_to_mp(page
, page_offset
);
648 if (mp
->logical_size
!= size
) {
649 jfs_error(inode
->i_sb
,
650 "get_mp->logical_size != size\n");
651 jfs_err("logical_size = %d, size = %d",
652 mp
->logical_size
, size
);
658 if (test_bit(META_discard
, &mp
->flag
)) {
660 jfs_error(inode
->i_sb
,
661 "using a discarded metapage\n");
662 discard_metapage(mp
);
665 clear_bit(META_discard
, &mp
->flag
);
668 INCREMENT(mpStat
.pagealloc
);
669 mp
= alloc_metapage(GFP_NOFS
);
672 mp
->xflag
= COMMIT_PAGE
;
675 mp
->logical_size
= size
;
676 mp
->data
= page_address(page
) + page_offset
;
678 if (unlikely(insert_metapage(page
, mp
))) {
686 jfs_info("zeroing mp = 0x%p", mp
);
687 memset(mp
->data
, 0, PSIZE
);
691 jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp
, mp
->data
);
699 void grab_metapage(struct metapage
* mp
)
701 jfs_info("grab_metapage: mp = 0x%p", mp
);
702 page_cache_get(mp
->page
);
706 unlock_page(mp
->page
);
709 void force_metapage(struct metapage
*mp
)
711 struct page
*page
= mp
->page
;
712 jfs_info("force_metapage: mp = 0x%p", mp
);
713 set_bit(META_forcewrite
, &mp
->flag
);
714 clear_bit(META_sync
, &mp
->flag
);
715 page_cache_get(page
);
717 set_page_dirty(page
);
718 write_one_page(page
, 1);
719 clear_bit(META_forcewrite
, &mp
->flag
);
720 page_cache_release(page
);
723 void hold_metapage(struct metapage
*mp
)
728 void put_metapage(struct metapage
*mp
)
730 if (mp
->count
|| mp
->nohomeok
) {
731 /* Someone else will release this */
732 unlock_page(mp
->page
);
735 page_cache_get(mp
->page
);
738 unlock_page(mp
->page
);
739 release_metapage(mp
);
742 void release_metapage(struct metapage
* mp
)
744 struct page
*page
= mp
->page
;
745 jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp
, mp
->flag
);
753 if (--mp
->count
|| mp
->nohomeok
) {
755 page_cache_release(page
);
759 if (test_bit(META_dirty
, &mp
->flag
)) {
760 set_page_dirty(page
);
761 if (test_bit(META_sync
, &mp
->flag
)) {
762 clear_bit(META_sync
, &mp
->flag
);
763 write_one_page(page
, 1);
764 lock_page(page
); /* write_one_page unlocks the page */
766 } else if (mp
->lsn
) /* discard_metapage doesn't remove it */
767 remove_from_logsync(mp
);
769 /* Try to keep metapages from using up too much memory */
770 drop_metapage(page
, mp
);
773 page_cache_release(page
);
776 void __invalidate_metapages(struct inode
*ip
, s64 addr
, int len
)
779 int l2BlocksPerPage
= PAGE_CACHE_SHIFT
- ip
->i_blkbits
;
780 int BlocksPerPage
= 1 << l2BlocksPerPage
;
781 /* All callers are interested in block device's mapping */
782 struct address_space
*mapping
=
783 JFS_SBI(ip
->i_sb
)->direct_inode
->i_mapping
;
789 * Mark metapages to discard. They will eventually be
790 * released, but should not be written.
792 for (lblock
= addr
& ~(BlocksPerPage
- 1); lblock
< addr
+ len
;
793 lblock
+= BlocksPerPage
) {
794 page
= find_lock_page(mapping
, lblock
>> l2BlocksPerPage
);
797 for (offset
= 0; offset
< PAGE_CACHE_SIZE
; offset
+= PSIZE
) {
798 mp
= page_to_mp(page
, offset
);
801 if (mp
->index
< addr
)
803 if (mp
->index
>= addr
+ len
)
806 clear_bit(META_dirty
, &mp
->flag
);
807 set_bit(META_discard
, &mp
->flag
);
809 remove_from_logsync(mp
);
812 page_cache_release(page
);
816 #ifdef CONFIG_JFS_STATISTICS
817 static int jfs_mpstat_proc_show(struct seq_file
*m
, void *v
)
820 "JFS Metapage statistics\n"
821 "=======================\n"
822 "page allocations = %d\n"
831 static int jfs_mpstat_proc_open(struct inode
*inode
, struct file
*file
)
833 return single_open(file
, jfs_mpstat_proc_show
, NULL
);
836 const struct file_operations jfs_mpstat_proc_fops
= {
837 .owner
= THIS_MODULE
,
838 .open
= jfs_mpstat_proc_open
,
841 .release
= single_release
,