fs/jfs/jfs_metapage.c

   1 /*
   2  *   Copyright (C) International Business Machines Corp., 2000-2003
   3  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4  *
   5  *   This program is free software;  you can redistribute it and/or modify
   6  *   it under the terms of the GNU General Public License as published by
   7  *   the Free Software Foundation; either version 2 of the License, or
   8  *   (at your option) any later version.
   9  *
  10  *   This program is distributed in the hope that it will be useful,
  11  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13  *   the GNU General Public License for more details.
  14  *
  15  *   You should have received a copy of the GNU General Public License
  16  *   along with this program;  if not, write to the Free Software
  17  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18  */
  19
  20 #include <linux/fs.h>
  21 #include <linux/init.h>
  22 #include <linux/buffer_head.h>
  23 #include <linux/mempool.h>
  24 #include "jfs_incore.h"
  25 #include "jfs_superblock.h"
  26 #include "jfs_filsys.h"
  27 #include "jfs_metapage.h"
  28 #include "jfs_txnmgr.h"
  29 #include "jfs_debug.h"
  30
  31 static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
  32
  33 #ifdef CONFIG_JFS_STATISTICS
  34 struct {
  35         uint    pagealloc;      /* # of page allocations */
  36         uint    pagefree;       /* # of page frees */
  37         uint    lockwait;       /* # of sleeping lock_metapage() calls */
  38 } mpStat;
  39 #endif
  40
  41
  42 #define HASH_BITS 10            /* This makes hash_table 1 4K page */
  43 #define HASH_SIZE (1 << HASH_BITS)
  44 static struct metapage **hash_table = NULL;
  45 static unsigned long hash_order;
  46
  47
  48 static inline int metapage_locked(struct metapage *mp)
  49 {
  50         return test_bit(META_locked, &mp->flag);
  51 }
  52
  53 static inline int trylock_metapage(struct metapage *mp)
  54 {
  55         return test_and_set_bit(META_locked, &mp->flag);
  56 }
  57
  58 static inline void unlock_metapage(struct metapage *mp)
  59 {
  60         clear_bit(META_locked, &mp->flag);
  61         wake_up(&mp->wait);
  62 }
  63
  64 static void __lock_metapage(struct metapage *mp)
  65 {
  66         DECLARE_WAITQUEUE(wait, current);
  67
  68         INCREMENT(mpStat.lockwait);
  69
  70         add_wait_queue_exclusive(&mp->wait, &wait);
  71         do {
  72                 set_current_state(TASK_UNINTERRUPTIBLE);
  73                 if (metapage_locked(mp)) {
  74                         spin_unlock(&meta_lock);
  75                         schedule();
  76                         spin_lock(&meta_lock);
  77                 }
  78         } while (trylock_metapage(mp));
  79         __set_current_state(TASK_RUNNING);
  80         remove_wait_queue(&mp->wait, &wait);
  81 }
  82
  83 /* needs meta_lock */
  84 static inline void lock_metapage(struct metapage *mp)
  85 {
  86         if (trylock_metapage(mp))
  87                 __lock_metapage(mp);
  88 }
  89
  90 #define METAPOOL_MIN_PAGES 32
  91 static kmem_cache_t *metapage_cache;
  92 static mempool_t *metapage_mempool;
  93
  94 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
  95 {
  96         struct metapage *mp = (struct metapage *)foo;
  97
  98         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
  99             SLAB_CTOR_CONSTRUCTOR) {
 100                 mp->lid = 0;
 101                 mp->lsn = 0;
 102                 mp->flag = 0;
 103                 mp->data = NULL;
 104                 mp->clsn = 0;
 105                 mp->log = NULL;
 106                 set_bit(META_free, &mp->flag);
 107                 init_waitqueue_head(&mp->wait);
 108         }
 109 }
 110
 111 static inline struct metapage *alloc_metapage(int no_wait)
 112 {
 113         return mempool_alloc(metapage_mempool, no_wait ? GFP_ATOMIC : GFP_NOFS);
 114 }
 115
 116 static inline void free_metapage(struct metapage *mp)
 117 {
 118         mp->flag = 0;
 119         set_bit(META_free, &mp->flag);
 120
 121         mempool_free(mp, metapage_mempool);
 122 }
 123
 124 int __init metapage_init(void)
 125 {
 126         /*
 127          * Allocate the metapage structures
 128          */
 129         metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
 130                                            0, 0, init_once, NULL);
 131         if (metapage_cache == NULL)
 132                 return -ENOMEM;
 133
 134         metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mempool_alloc_slab,
 135                                           mempool_free_slab, metapage_cache);
 136
 137         if (metapage_mempool == NULL) {
 138                 kmem_cache_destroy(metapage_cache);
 139                 return -ENOMEM;
 140         }
 141         /*
 142          * Now the hash list
 143          */
 144         for (hash_order = 0;
 145              ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
 146              hash_order++);
 147         hash_table =
 148             (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
 149         assert(hash_table);
 150         memset(hash_table, 0, PAGE_SIZE << hash_order);
 151
 152         return 0;
 153 }
 154
 155 void metapage_exit(void)
 156 {
 157         mempool_destroy(metapage_mempool);
 158         kmem_cache_destroy(metapage_cache);
 159 }
 160
 161 /*
 162  * Basically same hash as in pagemap.h, but using our hash table
 163  */
 164 static struct metapage **meta_hash(struct address_space *mapping,
 165                                    unsigned long index)
 166 {
 167 #define i (((unsigned long)mapping)/ \
 168            (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
 169 #define s(x) ((x) + ((x) >> HASH_BITS))
 170         return hash_table + (s(i + index) & (HASH_SIZE - 1));
 171 #undef i
 172 #undef s
 173 }
 174
 175 static struct metapage *search_hash(struct metapage ** hash_ptr,
 176                                     struct address_space *mapping,
 177                                unsigned long index)
 178 {
 179         struct metapage *ptr;
 180
 181         for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
 182                 if ((ptr->mapping == mapping) && (ptr->index == index))
 183                         return ptr;
 184         }
 185
 186         return NULL;
 187 }
 188
 189 static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
 190 {
 191         if (*hash_ptr)
 192                 (*hash_ptr)->hash_prev = mp;
 193
 194         mp->hash_prev = NULL;
 195         mp->hash_next = *hash_ptr;
 196         *hash_ptr = mp;
 197 }
 198
 199 static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
 200 {
 201         if (mp->hash_prev)
 202                 mp->hash_prev->hash_next = mp->hash_next;
 203         else {
 204                 assert(*hash_ptr == mp);
 205                 *hash_ptr = mp->hash_next;
 206         }
 207
 208         if (mp->hash_next)
 209                 mp->hash_next->hash_prev = mp->hash_prev;
 210 }
 211
 212 struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 213                                 unsigned int size, int absolute,
 214                                 unsigned long new)
 215 {
 216         struct metapage **hash_ptr;
 217         int l2BlocksPerPage;
 218         int l2bsize;
 219         struct address_space *mapping;
 220         struct metapage *mp;
 221         unsigned long page_index;
 222         unsigned long page_offset;
 223
 224         jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock);
 225
 226         if (absolute)
 227                 mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
 228         else {
 229                 /*
 230                  * If an nfs client tries to read an inode that is larger
 231                  * than any existing inodes, we may try to read past the
 232                  * end of the inode map
 233                  */
 234                 if ((lblock << inode->i_blkbits) >= inode->i_size)
 235                         return NULL;
 236                 mapping = inode->i_mapping;
 237         }
 238
 239         hash_ptr = meta_hash(mapping, lblock);
 240 again:
 241         spin_lock(&meta_lock);
 242         mp = search_hash(hash_ptr, mapping, lblock);
 243         if (mp) {
 244               page_found:
 245                 mp->count++;
 246                 lock_metapage(mp);
 247                 spin_unlock(&meta_lock);
 248                 if (test_bit(META_stale, &mp->flag)) {
 249                         release_metapage(mp);
 250                         yield();        /* Let other waiters release it, too */
 251                         goto again;
 252                 }
 253                 if (test_bit(META_discard, &mp->flag)) {
 254                         if (!new) {
 255                                 jfs_error(inode->i_sb,
 256                                           "__get_metapage: using a "
 257                                           "discarded metapage");
 258                                 release_metapage(mp);
 259                                 return NULL;
 260                         }
 261                         clear_bit(META_discard, &mp->flag);
 262                 }
 263                 jfs_info("__get_metapage: found 0x%p, in hash", mp);
 264                 if (mp->logical_size != size) {
 265                         jfs_error(inode->i_sb,
 266                                   "__get_metapage: mp->logical_size != size");
 267                         release_metapage(mp);
 268                         return NULL;
 269                 }
 270         } else {
 271                 l2bsize = inode->i_blkbits;
 272                 l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
 273                 page_index = lblock >> l2BlocksPerPage;
 274                 page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
 275                     l2bsize;
 276                 if ((page_offset + size) > PAGE_CACHE_SIZE) {
 277                         spin_unlock(&meta_lock);
 278                         jfs_err("MetaData crosses page boundary!!");
 279                         return NULL;
 280                 }
 281
 282                 /*
 283                  * Locks held on aggregate inode pages are usually
 284                  * not held long, and they are taken in critical code
 285                  * paths (committing dirty inodes, txCommit thread)
 286                  *
 287                  * Attempt to get metapage without blocking, tapping into
 288                  * reserves if necessary.
 289                  */
 290                 mp = NULL;
 291                 if (JFS_IP(inode)->fileset == AGGREGATE_I) {
 292                         mp =  mempool_alloc(metapage_mempool, GFP_ATOMIC);
 293                         if (!mp) {
 294                                 /*
 295                                  * mempool is supposed to protect us from
 296                                  * failing here.  We will try a blocking
 297                                  * call, but a deadlock is possible here
 298                                  */
 299                                 printk(KERN_WARNING
 300                                        "__get_metapage: atomic call to mempool_alloc failed.\n");
 301                                 printk(KERN_WARNING
 302                                        "Will attempt blocking call\n");
 303                         }
 304                 }
 305                 if (!mp) {
 306                         struct metapage *mp2;
 307
 308                         spin_unlock(&meta_lock);
 309                         mp =  mempool_alloc(metapage_mempool, GFP_NOFS);
 310                         spin_lock(&meta_lock);
 311
 312                         /* we dropped the meta_lock, we need to search the
 313                          * hash again.
 314                          */
 315                         mp2 = search_hash(hash_ptr, mapping, lblock);
 316                         if (mp2) {
 317                                 free_metapage(mp);
 318                                 mp = mp2;
 319                                 goto page_found;
 320                         }
 321                 }
 322                 mp->flag = 0;
 323                 lock_metapage(mp);
 324                 if (absolute)
 325                         set_bit(META_absolute, &mp->flag);
 326                 mp->xflag = COMMIT_PAGE;
 327                 mp->count = 1;
 328                 atomic_set(&mp->nohomeok,0);
 329                 mp->mapping = mapping;
 330                 mp->index = lblock;
 331                 mp->page = NULL;
 332                 mp->logical_size = size;
 333                 add_to_hash(mp, hash_ptr);
 334                 spin_unlock(&meta_lock);
 335
 336                 if (new) {
 337                         jfs_info("__get_metapage: Calling grab_cache_page");
 338                         mp->page = grab_cache_page(mapping, page_index);
 339                         if (!mp->page) {
 340                                 jfs_err("grab_cache_page failed!");
 341                                 goto freeit;
 342                         } else {
 343                                 INCREMENT(mpStat.pagealloc);
 344                                 unlock_page(mp->page);
 345                         }
 346                 } else {
 347                         jfs_info("__get_metapage: Calling read_cache_page");
 348                         mp->page = read_cache_page(mapping, lblock,
 349                                     (filler_t *)mapping->a_ops->readpage, NULL);
 350                         if (IS_ERR(mp->page)) {
 351                                 jfs_err("read_cache_page failed!");
 352                                 goto freeit;
 353                         } else
 354                                 INCREMENT(mpStat.pagealloc);
 355                 }
 356                 mp->data = kmap(mp->page) + page_offset;
 357         }
 358
 359         if (new)
 360                 memset(mp->data, 0, PSIZE);
 361
 362         jfs_info("__get_metapage: returning = 0x%p", mp);
 363         return mp;
 364
 365 freeit:
 366         spin_lock(&meta_lock);
 367         remove_from_hash(mp, hash_ptr);
 368         free_metapage(mp);
 369         spin_unlock(&meta_lock);
 370         return NULL;
 371 }
 372
 373 void hold_metapage(struct metapage * mp, int force)
 374 {
 375         spin_lock(&meta_lock);
 376
 377         mp->count++;
 378
 379         if (force) {
 380                 ASSERT (!(test_bit(META_forced, &mp->flag)));
 381                 if (trylock_metapage(mp))
 382                         set_bit(META_forced, &mp->flag);
 383         } else
 384                 lock_metapage(mp);
 385
 386         spin_unlock(&meta_lock);
 387 }
 388
 389 static void __write_metapage(struct metapage * mp)
 390 {
 391         int l2bsize = mp->mapping->host->i_blkbits;
 392         int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
 393         unsigned long page_index;
 394         unsigned long page_offset;
 395         int rc;
 396
 397         jfs_info("__write_metapage: mp = 0x%p", mp);
 398
 399         if (test_bit(META_discard, &mp->flag)) {
 400                 /*
 401                  * This metadata is no longer valid
 402                  */
 403                 clear_bit(META_dirty, &mp->flag);
 404                 return;
 405         }
 406
 407         page_index = mp->page->index;
 408         page_offset =
 409             (mp->index - (page_index << l2BlocksPerPage)) << l2bsize;
 410
 411         lock_page(mp->page);
 412         rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
 413                                                page_offset +
 414                                                mp->logical_size);
 415         if (rc) {
 416                 jfs_err("prepare_write return %d!", rc);
 417                 ClearPageUptodate(mp->page);
 418                 unlock_page(mp->page);
 419                 clear_bit(META_dirty, &mp->flag);
 420                 return;
 421         }
 422         rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
 423                                               page_offset +
 424                                               mp->logical_size);
 425         if (rc) {
 426                 jfs_err("commit_write returned %d", rc);
 427         }
 428
 429         unlock_page(mp->page);
 430         clear_bit(META_dirty, &mp->flag);
 431
 432         jfs_info("__write_metapage done");
 433 }
 434
 435 static inline void sync_metapage(struct metapage *mp)
 436 {
 437         struct page *page = mp->page;
 438
 439         page_cache_get(page);
 440         lock_page(page);
 441
 442         /* we're done with this page - no need to check for errors */
 443         if (page_has_buffers(page))
 444                 write_one_page(page, 1);
 445         else
 446                 unlock_page(page);
 447         page_cache_release(page);
 448 }
 449
 450 void release_metapage(struct metapage * mp)
 451 {
 452         struct jfs_log *log;
 453
 454         jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
 455
 456         spin_lock(&meta_lock);
 457         if (test_bit(META_forced, &mp->flag)) {
 458                 clear_bit(META_forced, &mp->flag);
 459                 mp->count--;
 460                 spin_unlock(&meta_lock);
 461                 return;
 462         }
 463
 464         assert(mp->count);
 465         if (--mp->count || atomic_read(&mp->nohomeok)) {
 466                 unlock_metapage(mp);
 467                 spin_unlock(&meta_lock);
 468                 return;
 469         }
 470
 471         if (mp->page) {
 472                 /* Releasing spinlock, we have to check mp->count later */
 473                 set_bit(META_stale, &mp->flag);
 474                 spin_unlock(&meta_lock);
 475                 kunmap(mp->page);
 476                 mp->data = NULL;
 477                 if (test_bit(META_dirty, &mp->flag))
 478                         __write_metapage(mp);
 479                 if (test_bit(META_sync, &mp->flag)) {
 480                         sync_metapage(mp);
 481                         clear_bit(META_sync, &mp->flag);
 482                 }
 483
 484                 if (test_bit(META_discard, &mp->flag)) {
 485                         lock_page(mp->page);
 486                         block_invalidatepage(mp->page, 0);
 487                         unlock_page(mp->page);
 488                 }
 489
 490                 page_cache_release(mp->page);
 491                 mp->page = NULL;
 492                 INCREMENT(mpStat.pagefree);
 493                 spin_lock(&meta_lock);
 494         }
 495
 496         if (mp->lsn) {
 497                 /*
 498                  * Remove metapage from logsynclist.
 499                  */
 500                 log = mp->log;
 501                 LOGSYNC_LOCK(log);
 502                 mp->log = NULL;
 503                 mp->lsn = 0;
 504                 mp->clsn = 0;
 505                 log->count--;
 506                 list_del(&mp->synclist);
 507                 LOGSYNC_UNLOCK(log);
 508         }
 509         if (mp->count) {
 510                 /* Someone else is trying to get this metpage */
 511                 unlock_metapage(mp);
 512                 spin_unlock(&meta_lock);
 513                 return;
 514         }
 515         remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
 516         spin_unlock(&meta_lock);
 517
 518         free_metapage(mp);
 519 }
 520
 521 void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 522 {
 523         struct metapage **hash_ptr;
 524         unsigned long lblock;
 525         int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
 526         /* All callers are interested in block device's mapping */
 527         struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
 528         struct metapage *mp;
 529         struct page *page;
 530
 531         /*
 532          * First, mark metapages to discard.  They will eventually be
 533          * released, but should not be written.
 534          */
 535         for (lblock = addr; lblock < addr + len;
 536              lblock += 1 << l2BlocksPerPage) {
 537                 hash_ptr = meta_hash(mapping, lblock);
 538 again:
 539                 spin_lock(&meta_lock);
 540                 mp = search_hash(hash_ptr, mapping, lblock);
 541                 if (mp) {
 542                         if (test_bit(META_stale, &mp->flag)) {
 543                                 /* Racing with release_metapage */
 544                                 mp->count++;
 545                                 lock_metapage(mp);
 546                                 spin_unlock(&meta_lock);
 547                                 /* racing release_metapage should be done now */
 548                                 release_metapage(mp);
 549                                 goto again;
 550                         }
 551
 552                         set_bit(META_discard, &mp->flag);
 553                         spin_unlock(&meta_lock);
 554                 } else {
 555                         spin_unlock(&meta_lock);
 556                         page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
 557                         if (page) {
 558                                 block_invalidatepage(page, 0);
 559                                 unlock_page(page);
 560                                 page_cache_release(page);
 561                         }
 562                 }
 563         }
 564 }
 565
 566 #ifdef CONFIG_JFS_STATISTICS
 567 int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
 568                     int *eof, void *data)
 569 {
 570         int len = 0;
 571         off_t begin;
 572
 573         len += sprintf(buffer,
 574                        "JFS Metapage statistics\n"
 575                        "=======================\n"
 576                        "page allocations = %d\n"
 577                        "page frees = %d\n"
 578                        "lock waits = %d\n",
 579                        mpStat.pagealloc,
 580                        mpStat.pagefree,
 581                        mpStat.lockwait);
 582
 583         begin = offset;
 584         *start = buffer + begin;
 585         len -= begin;
 586
 587         if (len > length)
 588                 len = length;
 589         else
 590                 *eof = 1;
 591
 592         if (len < 0)
 593                 len = 0;
 594
 595         return len;
 596 }
 597 #endif