2 * Copyright (C) International Business Machines Corp., 2000-2003
3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include <linux/init.h>
22 #include <linux/buffer_head.h>
23 #include <linux/mempool.h>
24 #include "jfs_incore.h"
25 #include "jfs_superblock.h"
26 #include "jfs_filsys.h"
27 #include "jfs_metapage.h"
28 #include "jfs_txnmgr.h"
29 #include "jfs_debug.h"
31 static spinlock_t meta_lock
= SPIN_LOCK_UNLOCKED
;
33 #ifdef CONFIG_JFS_STATISTICS
35 uint pagealloc
; /* # of page allocations */
36 uint pagefree
; /* # of page frees */
37 uint lockwait
; /* # of sleeping lock_metapage() calls */
42 #define HASH_BITS 10 /* This makes hash_table 1 4K page */
43 #define HASH_SIZE (1 << HASH_BITS)
44 static struct metapage
**hash_table
= NULL
;
45 static unsigned long hash_order
;
48 static inline int metapage_locked(struct metapage
*mp
)
50 return test_bit(META_locked
, &mp
->flag
);
53 static inline int trylock_metapage(struct metapage
*mp
)
55 return test_and_set_bit(META_locked
, &mp
->flag
);
58 static inline void unlock_metapage(struct metapage
*mp
)
60 clear_bit(META_locked
, &mp
->flag
);
64 static void __lock_metapage(struct metapage
*mp
)
66 DECLARE_WAITQUEUE(wait
, current
);
68 INCREMENT(mpStat
.lockwait
);
70 add_wait_queue_exclusive(&mp
->wait
, &wait
);
72 set_current_state(TASK_UNINTERRUPTIBLE
);
73 if (metapage_locked(mp
)) {
74 spin_unlock(&meta_lock
);
76 spin_lock(&meta_lock
);
78 } while (trylock_metapage(mp
));
79 __set_current_state(TASK_RUNNING
);
80 remove_wait_queue(&mp
->wait
, &wait
);
84 static inline void lock_metapage(struct metapage
*mp
)
86 if (trylock_metapage(mp
))
90 #define METAPOOL_MIN_PAGES 32
91 static kmem_cache_t
*metapage_cache
;
92 static mempool_t
*metapage_mempool
;
94 static void init_once(void *foo
, kmem_cache_t
*cachep
, unsigned long flags
)
96 struct metapage
*mp
= (struct metapage
*)foo
;
98 if ((flags
& (SLAB_CTOR_VERIFY
|SLAB_CTOR_CONSTRUCTOR
)) ==
99 SLAB_CTOR_CONSTRUCTOR
) {
106 set_bit(META_free
, &mp
->flag
);
107 init_waitqueue_head(&mp
->wait
);
111 static inline struct metapage
*alloc_metapage(int no_wait
)
113 return mempool_alloc(metapage_mempool
, no_wait
? GFP_ATOMIC
: GFP_NOFS
);
116 static inline void free_metapage(struct metapage
*mp
)
119 set_bit(META_free
, &mp
->flag
);
121 mempool_free(mp
, metapage_mempool
);
124 int __init
metapage_init(void)
127 * Allocate the metapage structures
129 metapage_cache
= kmem_cache_create("jfs_mp", sizeof(struct metapage
),
130 0, 0, init_once
, NULL
);
131 if (metapage_cache
== NULL
)
134 metapage_mempool
= mempool_create(METAPOOL_MIN_PAGES
, mempool_alloc_slab
,
135 mempool_free_slab
, metapage_cache
);
137 if (metapage_mempool
== NULL
) {
138 kmem_cache_destroy(metapage_cache
);
145 ((PAGE_SIZE
<< hash_order
) / sizeof(void *)) < HASH_SIZE
;
148 (struct metapage
**) __get_free_pages(GFP_KERNEL
, hash_order
);
150 memset(hash_table
, 0, PAGE_SIZE
<< hash_order
);
155 void metapage_exit(void)
157 mempool_destroy(metapage_mempool
);
158 kmem_cache_destroy(metapage_cache
);
162 * Basically same hash as in pagemap.h, but using our hash table
164 static struct metapage
**meta_hash(struct address_space
*mapping
,
167 #define i (((unsigned long)mapping)/ \
168 (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
169 #define s(x) ((x) + ((x) >> HASH_BITS))
170 return hash_table
+ (s(i
+ index
) & (HASH_SIZE
- 1));
175 static struct metapage
*search_hash(struct metapage
** hash_ptr
,
176 struct address_space
*mapping
,
179 struct metapage
*ptr
;
181 for (ptr
= *hash_ptr
; ptr
; ptr
= ptr
->hash_next
) {
182 if ((ptr
->mapping
== mapping
) && (ptr
->index
== index
))
189 static void add_to_hash(struct metapage
* mp
, struct metapage
** hash_ptr
)
192 (*hash_ptr
)->hash_prev
= mp
;
194 mp
->hash_prev
= NULL
;
195 mp
->hash_next
= *hash_ptr
;
199 static void remove_from_hash(struct metapage
* mp
, struct metapage
** hash_ptr
)
202 mp
->hash_prev
->hash_next
= mp
->hash_next
;
204 assert(*hash_ptr
== mp
);
205 *hash_ptr
= mp
->hash_next
;
209 mp
->hash_next
->hash_prev
= mp
->hash_prev
;
212 struct metapage
*__get_metapage(struct inode
*inode
, unsigned long lblock
,
213 unsigned int size
, int absolute
,
216 struct metapage
**hash_ptr
;
219 struct address_space
*mapping
;
221 unsigned long page_index
;
222 unsigned long page_offset
;
224 jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode
, lblock
);
227 mapping
= inode
->i_sb
->s_bdev
->bd_inode
->i_mapping
;
230 * If an nfs client tries to read an inode that is larger
231 * than any existing inodes, we may try to read past the
232 * end of the inode map
234 if ((lblock
<< inode
->i_blkbits
) >= inode
->i_size
)
236 mapping
= inode
->i_mapping
;
239 hash_ptr
= meta_hash(mapping
, lblock
);
241 spin_lock(&meta_lock
);
242 mp
= search_hash(hash_ptr
, mapping
, lblock
);
247 spin_unlock(&meta_lock
);
248 if (test_bit(META_stale
, &mp
->flag
)) {
249 release_metapage(mp
);
250 yield(); /* Let other waiters release it, too */
253 if (test_bit(META_discard
, &mp
->flag
)) {
255 jfs_error(inode
->i_sb
,
256 "__get_metapage: using a "
257 "discarded metapage");
258 release_metapage(mp
);
261 clear_bit(META_discard
, &mp
->flag
);
263 jfs_info("__get_metapage: found 0x%p, in hash", mp
);
264 if (mp
->logical_size
!= size
) {
265 jfs_error(inode
->i_sb
,
266 "__get_metapage: mp->logical_size != size");
267 release_metapage(mp
);
271 l2bsize
= inode
->i_blkbits
;
272 l2BlocksPerPage
= PAGE_CACHE_SHIFT
- l2bsize
;
273 page_index
= lblock
>> l2BlocksPerPage
;
274 page_offset
= (lblock
- (page_index
<< l2BlocksPerPage
)) <<
276 if ((page_offset
+ size
) > PAGE_CACHE_SIZE
) {
277 spin_unlock(&meta_lock
);
278 jfs_err("MetaData crosses page boundary!!");
283 * Locks held on aggregate inode pages are usually
284 * not held long, and they are taken in critical code
285 * paths (committing dirty inodes, txCommit thread)
287 * Attempt to get metapage without blocking, tapping into
288 * reserves if necessary.
291 if (JFS_IP(inode
)->fileset
== AGGREGATE_I
) {
292 mp
= mempool_alloc(metapage_mempool
, GFP_ATOMIC
);
295 * mempool is supposed to protect us from
296 * failing here. We will try a blocking
297 * call, but a deadlock is possible here
300 "__get_metapage: atomic call to mempool_alloc failed.\n");
302 "Will attempt blocking call\n");
306 struct metapage
*mp2
;
308 spin_unlock(&meta_lock
);
309 mp
= mempool_alloc(metapage_mempool
, GFP_NOFS
);
310 spin_lock(&meta_lock
);
312 /* we dropped the meta_lock, we need to search the
315 mp2
= search_hash(hash_ptr
, mapping
, lblock
);
325 set_bit(META_absolute
, &mp
->flag
);
326 mp
->xflag
= COMMIT_PAGE
;
328 atomic_set(&mp
->nohomeok
,0);
329 mp
->mapping
= mapping
;
332 mp
->logical_size
= size
;
333 add_to_hash(mp
, hash_ptr
);
334 spin_unlock(&meta_lock
);
337 jfs_info("__get_metapage: Calling grab_cache_page");
338 mp
->page
= grab_cache_page(mapping
, page_index
);
340 jfs_err("grab_cache_page failed!");
343 INCREMENT(mpStat
.pagealloc
);
344 unlock_page(mp
->page
);
347 jfs_info("__get_metapage: Calling read_cache_page");
348 mp
->page
= read_cache_page(mapping
, lblock
,
349 (filler_t
*)mapping
->a_ops
->readpage
, NULL
);
350 if (IS_ERR(mp
->page
)) {
351 jfs_err("read_cache_page failed!");
354 INCREMENT(mpStat
.pagealloc
);
356 mp
->data
= kmap(mp
->page
) + page_offset
;
360 memset(mp
->data
, 0, PSIZE
);
362 jfs_info("__get_metapage: returning = 0x%p", mp
);
366 spin_lock(&meta_lock
);
367 remove_from_hash(mp
, hash_ptr
);
369 spin_unlock(&meta_lock
);
373 void hold_metapage(struct metapage
* mp
, int force
)
375 spin_lock(&meta_lock
);
380 ASSERT (!(test_bit(META_forced
, &mp
->flag
)));
381 if (trylock_metapage(mp
))
382 set_bit(META_forced
, &mp
->flag
);
386 spin_unlock(&meta_lock
);
389 static void __write_metapage(struct metapage
* mp
)
391 int l2bsize
= mp
->mapping
->host
->i_blkbits
;
392 int l2BlocksPerPage
= PAGE_CACHE_SHIFT
- l2bsize
;
393 unsigned long page_index
;
394 unsigned long page_offset
;
397 jfs_info("__write_metapage: mp = 0x%p", mp
);
399 if (test_bit(META_discard
, &mp
->flag
)) {
401 * This metadata is no longer valid
403 clear_bit(META_dirty
, &mp
->flag
);
407 page_index
= mp
->page
->index
;
409 (mp
->index
- (page_index
<< l2BlocksPerPage
)) << l2bsize
;
412 rc
= mp
->mapping
->a_ops
->prepare_write(NULL
, mp
->page
, page_offset
,
416 jfs_err("prepare_write return %d!", rc
);
417 ClearPageUptodate(mp
->page
);
418 unlock_page(mp
->page
);
419 clear_bit(META_dirty
, &mp
->flag
);
422 rc
= mp
->mapping
->a_ops
->commit_write(NULL
, mp
->page
, page_offset
,
426 jfs_err("commit_write returned %d", rc
);
429 unlock_page(mp
->page
);
430 clear_bit(META_dirty
, &mp
->flag
);
432 jfs_info("__write_metapage done");
435 static inline void sync_metapage(struct metapage
*mp
)
437 struct page
*page
= mp
->page
;
439 page_cache_get(page
);
442 /* we're done with this page - no need to check for errors */
443 if (page_has_buffers(page
))
444 write_one_page(page
, 1);
447 page_cache_release(page
);
450 void release_metapage(struct metapage
* mp
)
454 jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp
, mp
->flag
);
456 spin_lock(&meta_lock
);
457 if (test_bit(META_forced
, &mp
->flag
)) {
458 clear_bit(META_forced
, &mp
->flag
);
460 spin_unlock(&meta_lock
);
465 if (--mp
->count
|| atomic_read(&mp
->nohomeok
)) {
467 spin_unlock(&meta_lock
);
472 /* Releasing spinlock, we have to check mp->count later */
473 set_bit(META_stale
, &mp
->flag
);
474 spin_unlock(&meta_lock
);
477 if (test_bit(META_dirty
, &mp
->flag
))
478 __write_metapage(mp
);
479 if (test_bit(META_sync
, &mp
->flag
)) {
481 clear_bit(META_sync
, &mp
->flag
);
484 if (test_bit(META_discard
, &mp
->flag
)) {
486 block_invalidatepage(mp
->page
, 0);
487 unlock_page(mp
->page
);
490 page_cache_release(mp
->page
);
492 INCREMENT(mpStat
.pagefree
);
493 spin_lock(&meta_lock
);
498 * Remove metapage from logsynclist.
506 list_del(&mp
->synclist
);
510 /* Someone else is trying to get this metpage */
512 spin_unlock(&meta_lock
);
515 remove_from_hash(mp
, meta_hash(mp
->mapping
, mp
->index
));
516 spin_unlock(&meta_lock
);
521 void __invalidate_metapages(struct inode
*ip
, s64 addr
, int len
)
523 struct metapage
**hash_ptr
;
524 unsigned long lblock
;
525 int l2BlocksPerPage
= PAGE_CACHE_SHIFT
- ip
->i_blkbits
;
526 /* All callers are interested in block device's mapping */
527 struct address_space
*mapping
= ip
->i_sb
->s_bdev
->bd_inode
->i_mapping
;
532 * First, mark metapages to discard. They will eventually be
533 * released, but should not be written.
535 for (lblock
= addr
; lblock
< addr
+ len
;
536 lblock
+= 1 << l2BlocksPerPage
) {
537 hash_ptr
= meta_hash(mapping
, lblock
);
539 spin_lock(&meta_lock
);
540 mp
= search_hash(hash_ptr
, mapping
, lblock
);
542 if (test_bit(META_stale
, &mp
->flag
)) {
543 /* Racing with release_metapage */
546 spin_unlock(&meta_lock
);
547 /* racing release_metapage should be done now */
548 release_metapage(mp
);
552 set_bit(META_discard
, &mp
->flag
);
553 spin_unlock(&meta_lock
);
555 spin_unlock(&meta_lock
);
556 page
= find_lock_page(mapping
, lblock
>>l2BlocksPerPage
);
558 block_invalidatepage(page
, 0);
560 page_cache_release(page
);
566 #ifdef CONFIG_JFS_STATISTICS
567 int jfs_mpstat_read(char *buffer
, char **start
, off_t offset
, int length
,
568 int *eof
, void *data
)
573 len
+= sprintf(buffer
,
574 "JFS Metapage statistics\n"
575 "=======================\n"
576 "page allocations = %d\n"
584 *start
= buffer
+ begin
;