MOXA linux-2.6.x / linux-2.6.9-uc0 from sdlinux-moxaart.tgz
[linux-2.6.9-moxart.git] / fs / jfs / jfs_metapage.c
blob82ffeaadf6e9cf881dfa1770c513d13ed90317e8
1 /*
2 * Copyright (C) International Business Machines Corp., 2000-2003
3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 #include <linux/fs.h>
21 #include <linux/init.h>
22 #include <linux/buffer_head.h>
23 #include <linux/mempool.h>
24 #include "jfs_incore.h"
25 #include "jfs_superblock.h"
26 #include "jfs_filsys.h"
27 #include "jfs_metapage.h"
28 #include "jfs_txnmgr.h"
29 #include "jfs_debug.h"
31 static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
33 #ifdef CONFIG_JFS_STATISTICS
34 struct {
35 uint pagealloc; /* # of page allocations */
36 uint pagefree; /* # of page frees */
37 uint lockwait; /* # of sleeping lock_metapage() calls */
38 } mpStat;
39 #endif
42 #define HASH_BITS 10 /* This makes hash_table 1 4K page */
43 #define HASH_SIZE (1 << HASH_BITS)
44 static struct metapage **hash_table = NULL;
45 static unsigned long hash_order;
48 static inline int metapage_locked(struct metapage *mp)
50 return test_bit(META_locked, &mp->flag);
53 static inline int trylock_metapage(struct metapage *mp)
55 return test_and_set_bit(META_locked, &mp->flag);
58 static inline void unlock_metapage(struct metapage *mp)
60 clear_bit(META_locked, &mp->flag);
61 wake_up(&mp->wait);
64 static void __lock_metapage(struct metapage *mp)
66 DECLARE_WAITQUEUE(wait, current);
68 INCREMENT(mpStat.lockwait);
70 add_wait_queue_exclusive(&mp->wait, &wait);
71 do {
72 set_current_state(TASK_UNINTERRUPTIBLE);
73 if (metapage_locked(mp)) {
74 spin_unlock(&meta_lock);
75 schedule();
76 spin_lock(&meta_lock);
78 } while (trylock_metapage(mp));
79 __set_current_state(TASK_RUNNING);
80 remove_wait_queue(&mp->wait, &wait);
83 /* needs meta_lock */
84 static inline void lock_metapage(struct metapage *mp)
86 if (trylock_metapage(mp))
87 __lock_metapage(mp);
90 #define METAPOOL_MIN_PAGES 32
91 static kmem_cache_t *metapage_cache;
92 static mempool_t *metapage_mempool;
94 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
96 struct metapage *mp = (struct metapage *)foo;
98 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
99 SLAB_CTOR_CONSTRUCTOR) {
100 mp->lid = 0;
101 mp->lsn = 0;
102 mp->flag = 0;
103 mp->data = NULL;
104 mp->clsn = 0;
105 mp->log = NULL;
106 set_bit(META_free, &mp->flag);
107 init_waitqueue_head(&mp->wait);
111 static inline struct metapage *alloc_metapage(int no_wait)
113 return mempool_alloc(metapage_mempool, no_wait ? GFP_ATOMIC : GFP_NOFS);
116 static inline void free_metapage(struct metapage *mp)
118 mp->flag = 0;
119 set_bit(META_free, &mp->flag);
121 mempool_free(mp, metapage_mempool);
124 int __init metapage_init(void)
127 * Allocate the metapage structures
129 metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
130 0, 0, init_once, NULL);
131 if (metapage_cache == NULL)
132 return -ENOMEM;
134 metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mempool_alloc_slab,
135 mempool_free_slab, metapage_cache);
137 if (metapage_mempool == NULL) {
138 kmem_cache_destroy(metapage_cache);
139 return -ENOMEM;
142 * Now the hash list
144 for (hash_order = 0;
145 ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
146 hash_order++);
147 hash_table =
148 (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
149 assert(hash_table);
150 memset(hash_table, 0, PAGE_SIZE << hash_order);
152 return 0;
155 void metapage_exit(void)
157 mempool_destroy(metapage_mempool);
158 kmem_cache_destroy(metapage_cache);
162 * Basically same hash as in pagemap.h, but using our hash table
164 static struct metapage **meta_hash(struct address_space *mapping,
165 unsigned long index)
167 #define i (((unsigned long)mapping)/ \
168 (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
169 #define s(x) ((x) + ((x) >> HASH_BITS))
170 return hash_table + (s(i + index) & (HASH_SIZE - 1));
171 #undef i
172 #undef s
175 static struct metapage *search_hash(struct metapage ** hash_ptr,
176 struct address_space *mapping,
177 unsigned long index)
179 struct metapage *ptr;
181 for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
182 if ((ptr->mapping == mapping) && (ptr->index == index))
183 return ptr;
186 return NULL;
189 static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
191 if (*hash_ptr)
192 (*hash_ptr)->hash_prev = mp;
194 mp->hash_prev = NULL;
195 mp->hash_next = *hash_ptr;
196 *hash_ptr = mp;
199 static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
201 if (mp->hash_prev)
202 mp->hash_prev->hash_next = mp->hash_next;
203 else {
204 assert(*hash_ptr == mp);
205 *hash_ptr = mp->hash_next;
208 if (mp->hash_next)
209 mp->hash_next->hash_prev = mp->hash_prev;
212 struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
213 unsigned int size, int absolute,
214 unsigned long new)
216 struct metapage **hash_ptr;
217 int l2BlocksPerPage;
218 int l2bsize;
219 struct address_space *mapping;
220 struct metapage *mp;
221 unsigned long page_index;
222 unsigned long page_offset;
224 jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock);
226 if (absolute)
227 mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
228 else {
230 * If an nfs client tries to read an inode that is larger
231 * than any existing inodes, we may try to read past the
232 * end of the inode map
234 if ((lblock << inode->i_blkbits) >= inode->i_size)
235 return NULL;
236 mapping = inode->i_mapping;
239 hash_ptr = meta_hash(mapping, lblock);
240 again:
241 spin_lock(&meta_lock);
242 mp = search_hash(hash_ptr, mapping, lblock);
243 if (mp) {
244 page_found:
245 mp->count++;
246 lock_metapage(mp);
247 spin_unlock(&meta_lock);
248 if (test_bit(META_stale, &mp->flag)) {
249 release_metapage(mp);
250 yield(); /* Let other waiters release it, too */
251 goto again;
253 if (test_bit(META_discard, &mp->flag)) {
254 if (!new) {
255 jfs_error(inode->i_sb,
256 "__get_metapage: using a "
257 "discarded metapage");
258 release_metapage(mp);
259 return NULL;
261 clear_bit(META_discard, &mp->flag);
263 jfs_info("__get_metapage: found 0x%p, in hash", mp);
264 if (mp->logical_size != size) {
265 jfs_error(inode->i_sb,
266 "__get_metapage: mp->logical_size != size");
267 release_metapage(mp);
268 return NULL;
270 } else {
271 l2bsize = inode->i_blkbits;
272 l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
273 page_index = lblock >> l2BlocksPerPage;
274 page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
275 l2bsize;
276 if ((page_offset + size) > PAGE_CACHE_SIZE) {
277 spin_unlock(&meta_lock);
278 jfs_err("MetaData crosses page boundary!!");
279 return NULL;
283 * Locks held on aggregate inode pages are usually
284 * not held long, and they are taken in critical code
285 * paths (committing dirty inodes, txCommit thread)
287 * Attempt to get metapage without blocking, tapping into
288 * reserves if necessary.
290 mp = NULL;
291 if (JFS_IP(inode)->fileset == AGGREGATE_I) {
292 mp = mempool_alloc(metapage_mempool, GFP_ATOMIC);
293 if (!mp) {
295 * mempool is supposed to protect us from
296 * failing here. We will try a blocking
297 * call, but a deadlock is possible here
299 printk(KERN_WARNING
300 "__get_metapage: atomic call to mempool_alloc failed.\n");
301 printk(KERN_WARNING
302 "Will attempt blocking call\n");
305 if (!mp) {
306 struct metapage *mp2;
308 spin_unlock(&meta_lock);
309 mp = mempool_alloc(metapage_mempool, GFP_NOFS);
310 spin_lock(&meta_lock);
312 /* we dropped the meta_lock, we need to search the
313 * hash again.
315 mp2 = search_hash(hash_ptr, mapping, lblock);
316 if (mp2) {
317 free_metapage(mp);
318 mp = mp2;
319 goto page_found;
322 mp->flag = 0;
323 lock_metapage(mp);
324 if (absolute)
325 set_bit(META_absolute, &mp->flag);
326 mp->xflag = COMMIT_PAGE;
327 mp->count = 1;
328 atomic_set(&mp->nohomeok,0);
329 mp->mapping = mapping;
330 mp->index = lblock;
331 mp->page = NULL;
332 mp->logical_size = size;
333 add_to_hash(mp, hash_ptr);
334 spin_unlock(&meta_lock);
336 if (new) {
337 jfs_info("__get_metapage: Calling grab_cache_page");
338 mp->page = grab_cache_page(mapping, page_index);
339 if (!mp->page) {
340 jfs_err("grab_cache_page failed!");
341 goto freeit;
342 } else {
343 INCREMENT(mpStat.pagealloc);
344 unlock_page(mp->page);
346 } else {
347 jfs_info("__get_metapage: Calling read_cache_page");
348 mp->page = read_cache_page(mapping, lblock,
349 (filler_t *)mapping->a_ops->readpage, NULL);
350 if (IS_ERR(mp->page)) {
351 jfs_err("read_cache_page failed!");
352 goto freeit;
353 } else
354 INCREMENT(mpStat.pagealloc);
356 mp->data = kmap(mp->page) + page_offset;
359 if (new)
360 memset(mp->data, 0, PSIZE);
362 jfs_info("__get_metapage: returning = 0x%p", mp);
363 return mp;
365 freeit:
366 spin_lock(&meta_lock);
367 remove_from_hash(mp, hash_ptr);
368 free_metapage(mp);
369 spin_unlock(&meta_lock);
370 return NULL;
373 void hold_metapage(struct metapage * mp, int force)
375 spin_lock(&meta_lock);
377 mp->count++;
379 if (force) {
380 ASSERT (!(test_bit(META_forced, &mp->flag)));
381 if (trylock_metapage(mp))
382 set_bit(META_forced, &mp->flag);
383 } else
384 lock_metapage(mp);
386 spin_unlock(&meta_lock);
389 static void __write_metapage(struct metapage * mp)
391 int l2bsize = mp->mapping->host->i_blkbits;
392 int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
393 unsigned long page_index;
394 unsigned long page_offset;
395 int rc;
397 jfs_info("__write_metapage: mp = 0x%p", mp);
399 if (test_bit(META_discard, &mp->flag)) {
401 * This metadata is no longer valid
403 clear_bit(META_dirty, &mp->flag);
404 return;
407 page_index = mp->page->index;
408 page_offset =
409 (mp->index - (page_index << l2BlocksPerPage)) << l2bsize;
411 lock_page(mp->page);
412 rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
413 page_offset +
414 mp->logical_size);
415 if (rc) {
416 jfs_err("prepare_write return %d!", rc);
417 ClearPageUptodate(mp->page);
418 unlock_page(mp->page);
419 clear_bit(META_dirty, &mp->flag);
420 return;
422 rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
423 page_offset +
424 mp->logical_size);
425 if (rc) {
426 jfs_err("commit_write returned %d", rc);
429 unlock_page(mp->page);
430 clear_bit(META_dirty, &mp->flag);
432 jfs_info("__write_metapage done");
435 static inline void sync_metapage(struct metapage *mp)
437 struct page *page = mp->page;
439 page_cache_get(page);
440 lock_page(page);
442 /* we're done with this page - no need to check for errors */
443 if (page_has_buffers(page))
444 write_one_page(page, 1);
445 else
446 unlock_page(page);
447 page_cache_release(page);
450 void release_metapage(struct metapage * mp)
452 struct jfs_log *log;
454 jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
456 spin_lock(&meta_lock);
457 if (test_bit(META_forced, &mp->flag)) {
458 clear_bit(META_forced, &mp->flag);
459 mp->count--;
460 spin_unlock(&meta_lock);
461 return;
464 assert(mp->count);
465 if (--mp->count || atomic_read(&mp->nohomeok)) {
466 unlock_metapage(mp);
467 spin_unlock(&meta_lock);
468 return;
471 if (mp->page) {
472 /* Releasing spinlock, we have to check mp->count later */
473 set_bit(META_stale, &mp->flag);
474 spin_unlock(&meta_lock);
475 kunmap(mp->page);
476 mp->data = NULL;
477 if (test_bit(META_dirty, &mp->flag))
478 __write_metapage(mp);
479 if (test_bit(META_sync, &mp->flag)) {
480 sync_metapage(mp);
481 clear_bit(META_sync, &mp->flag);
484 if (test_bit(META_discard, &mp->flag)) {
485 lock_page(mp->page);
486 block_invalidatepage(mp->page, 0);
487 unlock_page(mp->page);
490 page_cache_release(mp->page);
491 mp->page = NULL;
492 INCREMENT(mpStat.pagefree);
493 spin_lock(&meta_lock);
496 if (mp->lsn) {
498 * Remove metapage from logsynclist.
500 log = mp->log;
501 LOGSYNC_LOCK(log);
502 mp->log = NULL;
503 mp->lsn = 0;
504 mp->clsn = 0;
505 log->count--;
506 list_del(&mp->synclist);
507 LOGSYNC_UNLOCK(log);
509 if (mp->count) {
510 /* Someone else is trying to get this metpage */
511 unlock_metapage(mp);
512 spin_unlock(&meta_lock);
513 return;
515 remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
516 spin_unlock(&meta_lock);
518 free_metapage(mp);
521 void __invalidate_metapages(struct inode *ip, s64 addr, int len)
523 struct metapage **hash_ptr;
524 unsigned long lblock;
525 int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
526 /* All callers are interested in block device's mapping */
527 struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
528 struct metapage *mp;
529 struct page *page;
532 * First, mark metapages to discard. They will eventually be
533 * released, but should not be written.
535 for (lblock = addr; lblock < addr + len;
536 lblock += 1 << l2BlocksPerPage) {
537 hash_ptr = meta_hash(mapping, lblock);
538 again:
539 spin_lock(&meta_lock);
540 mp = search_hash(hash_ptr, mapping, lblock);
541 if (mp) {
542 if (test_bit(META_stale, &mp->flag)) {
543 /* Racing with release_metapage */
544 mp->count++;
545 lock_metapage(mp);
546 spin_unlock(&meta_lock);
547 /* racing release_metapage should be done now */
548 release_metapage(mp);
549 goto again;
552 set_bit(META_discard, &mp->flag);
553 spin_unlock(&meta_lock);
554 } else {
555 spin_unlock(&meta_lock);
556 page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
557 if (page) {
558 block_invalidatepage(page, 0);
559 unlock_page(page);
560 page_cache_release(page);
566 #ifdef CONFIG_JFS_STATISTICS
567 int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
568 int *eof, void *data)
570 int len = 0;
571 off_t begin;
573 len += sprintf(buffer,
574 "JFS Metapage statistics\n"
575 "=======================\n"
576 "page allocations = %d\n"
577 "page frees = %d\n"
578 "lock waits = %d\n",
579 mpStat.pagealloc,
580 mpStat.pagefree,
581 mpStat.lockwait);
583 begin = offset;
584 *start = buffer + begin;
585 len -= begin;
587 if (len > length)
588 len = length;
589 else
590 *eof = 1;
592 if (len < 0)
593 len = 0;
595 return len;
597 #endif