fs/nfs/pagelist.c

   1 /*
   2  * linux/fs/nfs/pagelist.c
   3  *
   4  * A set of helper functions for managing NFS read and write requests.
   5  * The main purpose of these routines is to provide support for the
   6  * coalescing of several requests into a single RPC call.
   7  *
   8  * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no>
   9  *
  10  */
  11
  12 #include <linux/slab.h>
  13 #include <linux/file.h>
  14 #include <linux/sched.h>
  15 #include <linux/sunrpc/clnt.h>
  16 #include <linux/nfs3.h>
  17 #include <linux/nfs4.h>
  18 #include <linux/nfs_page.h>
  19 #include <linux/nfs_fs.h>
  20 #include <linux/nfs_mount.h>
  21 #include <linux/export.h>
  22
  23 #include "internal.h"
  24 #include "pnfs.h"
  25
  26 static struct kmem_cache *nfs_page_cachep;
  27
  28 static inline struct nfs_page *
  29 nfs_page_alloc(void)
  30 {
  31         struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL);
  32         if (p)
  33                 INIT_LIST_HEAD(&p->wb_list);
  34         return p;
  35 }
  36
  37 static inline void
  38 nfs_page_free(struct nfs_page *p)
  39 {
  40         kmem_cache_free(nfs_page_cachep, p);
  41 }
  42
  43 /**
  44  * nfs_create_request - Create an NFS read/write request.
  45  * @ctx: open context to use
  46  * @inode: inode to which the request is attached
  47  * @page: page to write
  48  * @offset: starting offset within the page for the write
  49  * @count: number of bytes to read/write
  50  *
  51  * The page must be locked by the caller. This makes sure we never
  52  * create two different requests for the same page.
  53  * User should ensure it is safe to sleep in this function.
  54  */
  55 struct nfs_page *
  56 nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
  57                    struct page *page,
  58                    unsigned int offset, unsigned int count)
  59 {
  60         struct nfs_page         *req;
  61
  62         /* try to allocate the request struct */
  63         req = nfs_page_alloc();
  64         if (req == NULL)
  65                 return ERR_PTR(-ENOMEM);
  66
  67         /* get lock context early so we can deal with alloc failures */
  68         req->wb_lock_context = nfs_get_lock_context(ctx);
  69         if (req->wb_lock_context == NULL) {
  70                 nfs_page_free(req);
  71                 return ERR_PTR(-ENOMEM);
  72         }
  73
  74         /* Initialize the request struct. Initially, we assume a
  75          * long write-back delay. This will be adjusted in
  76          * update_nfs_request below if the region is not locked. */
  77         req->wb_page    = page;
  78         atomic_set(&req->wb_complete, 0);
  79         req->wb_index   = page->index;
  80         page_cache_get(page);
  81         BUG_ON(PagePrivate(page));
  82         BUG_ON(!PageLocked(page));
  83         BUG_ON(page->mapping->host != inode);
  84         req->wb_offset  = offset;
  85         req->wb_pgbase  = offset;
  86         req->wb_bytes   = count;
  87         req->wb_context = get_nfs_open_context(ctx);
  88         kref_init(&req->wb_kref);
  89         return req;
  90 }
  91
  92 /**
  93  * nfs_unlock_request - Unlock request and wake up sleepers.
  94  * @req:
  95  */
  96 void nfs_unlock_request(struct nfs_page *req)
  97 {
  98         if (!NFS_WBACK_BUSY(req)) {
  99                 printk(KERN_ERR "NFS: Invalid unlock attempted\n");
 100                 BUG();
 101         }
 102         smp_mb__before_clear_bit();
 103         clear_bit(PG_BUSY, &req->wb_flags);
 104         smp_mb__after_clear_bit();
 105         wake_up_bit(&req->wb_flags, PG_BUSY);
 106         nfs_release_request(req);
 107 }
 108
 109 /**
 110  * nfs_set_page_tag_locked - Tag a request as locked
 111  * @req:
 112  */
 113 int nfs_set_page_tag_locked(struct nfs_page *req)
 114 {
 115         if (!nfs_lock_request_dontget(req))
 116                 return 0;
 117         if (test_bit(PG_MAPPED, &req->wb_flags))
 118                 radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 119         return 1;
 120 }
 121
 122 /**
 123  * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
 124  */
 125 void nfs_clear_page_tag_locked(struct nfs_page *req)
 126 {
 127         if (test_bit(PG_MAPPED, &req->wb_flags)) {
 128                 struct inode *inode = req->wb_context->dentry->d_inode;
 129                 struct nfs_inode *nfsi = NFS_I(inode);
 130
 131                 spin_lock(&inode->i_lock);
 132                 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 133                 nfs_unlock_request(req);
 134                 spin_unlock(&inode->i_lock);
 135         } else
 136                 nfs_unlock_request(req);
 137 }
 138
 139 /*
 140  * nfs_clear_request - Free up all resources allocated to the request
 141  * @req:
 142  *
 143  * Release page and open context resources associated with a read/write
 144  * request after it has completed.
 145  */
 146 static void nfs_clear_request(struct nfs_page *req)
 147 {
 148         struct page *page = req->wb_page;
 149         struct nfs_open_context *ctx = req->wb_context;
 150         struct nfs_lock_context *l_ctx = req->wb_lock_context;
 151
 152         if (page != NULL) {
 153                 page_cache_release(page);
 154                 req->wb_page = NULL;
 155         }
 156         if (l_ctx != NULL) {
 157                 nfs_put_lock_context(l_ctx);
 158                 req->wb_lock_context = NULL;
 159         }
 160         if (ctx != NULL) {
 161                 put_nfs_open_context(ctx);
 162                 req->wb_context = NULL;
 163         }
 164 }
 165
 166
 167 /**
 168  * nfs_release_request - Release the count on an NFS read/write request
 169  * @req: request to release
 170  *
 171  * Note: Should never be called with the spinlock held!
 172  */
 173 static void nfs_free_request(struct kref *kref)
 174 {
 175         struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
 176
 177         /* Release struct file and open context */
 178         nfs_clear_request(req);
 179         nfs_page_free(req);
 180 }
 181
 182 void nfs_release_request(struct nfs_page *req)
 183 {
 184         kref_put(&req->wb_kref, nfs_free_request);
 185 }
 186
 187 static int nfs_wait_bit_uninterruptible(void *word)
 188 {
 189         io_schedule();
 190         return 0;
 191 }
 192
 193 /**
 194  * nfs_wait_on_request - Wait for a request to complete.
 195  * @req: request to wait upon.
 196  *
 197  * Interruptible by fatal signals only.
 198  * The user is responsible for holding a count on the request.
 199  */
 200 int
 201 nfs_wait_on_request(struct nfs_page *req)
 202 {
 203         return wait_on_bit(&req->wb_flags, PG_BUSY,
 204                         nfs_wait_bit_uninterruptible,
 205                         TASK_UNINTERRUPTIBLE);
 206 }
 207
 208 bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
 209 {
 210         /*
 211          * FIXME: ideally we should be able to coalesce all requests
 212          * that are not block boundary aligned, but currently this
 213          * is problematic for the case of bsize < PAGE_CACHE_SIZE,
 214          * since nfs_flush_multi and nfs_pagein_multi assume you
 215          * can have only one struct nfs_page.
 216          */
 217         if (desc->pg_bsize < PAGE_SIZE)
 218                 return 0;
 219
 220         return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
 221 }
 222 EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 223
 224 /**
 225  * nfs_pageio_init - initialise a page io descriptor
 226  * @desc: pointer to descriptor
 227  * @inode: pointer to inode
 228  * @doio: pointer to io function
 229  * @bsize: io block size
 230  * @io_flags: extra parameters for the io function
 231  */
 232 void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 233                      struct inode *inode,
 234                      const struct nfs_pageio_ops *pg_ops,
 235                      size_t bsize,
 236                      int io_flags)
 237 {
 238         INIT_LIST_HEAD(&desc->pg_list);
 239         desc->pg_bytes_written = 0;
 240         desc->pg_count = 0;
 241         desc->pg_bsize = bsize;
 242         desc->pg_base = 0;
 243         desc->pg_moreio = 0;
 244         desc->pg_recoalesce = 0;
 245         desc->pg_inode = inode;
 246         desc->pg_ops = pg_ops;
 247         desc->pg_ioflags = io_flags;
 248         desc->pg_error = 0;
 249         desc->pg_lseg = NULL;
 250 }
 251
 252 /**
 253  * nfs_can_coalesce_requests - test two requests for compatibility
 254  * @prev: pointer to nfs_page
 255  * @req: pointer to nfs_page
 256  *
 257  * The nfs_page structures 'prev' and 'req' are compared to ensure that the
 258  * page data area they describe is contiguous, and that their RPC
 259  * credentials, NFSv4 open state, and lockowners are the same.
 260  *
 261  * Return 'true' if this is the case, else return 'false'.
 262  */
 263 static bool nfs_can_coalesce_requests(struct nfs_page *prev,
 264                                       struct nfs_page *req,
 265                                       struct nfs_pageio_descriptor *pgio)
 266 {
 267         if (req->wb_context->cred != prev->wb_context->cred)
 268                 return false;
 269         if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
 270                 return false;
 271         if (req->wb_context->state != prev->wb_context->state)
 272                 return false;
 273         if (req->wb_index != (prev->wb_index + 1))
 274                 return false;
 275         if (req->wb_pgbase != 0)
 276                 return false;
 277         if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
 278                 return false;
 279         return pgio->pg_ops->pg_test(pgio, prev, req);
 280 }
 281
 282 /**
 283  * nfs_pageio_do_add_request - Attempt to coalesce a request into a page list.
 284  * @desc: destination io descriptor
 285  * @req: request
 286  *
 287  * Returns true if the request 'req' was successfully coalesced into the
 288  * existing list of pages 'desc'.
 289  */
 290 static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
 291                                      struct nfs_page *req)
 292 {
 293         if (desc->pg_count != 0) {
 294                 struct nfs_page *prev;
 295
 296                 prev = nfs_list_entry(desc->pg_list.prev);
 297                 if (!nfs_can_coalesce_requests(prev, req, desc))
 298                         return 0;
 299         } else {
 300                 if (desc->pg_ops->pg_init)
 301                         desc->pg_ops->pg_init(desc, req);
 302                 desc->pg_base = req->wb_pgbase;
 303         }
 304         nfs_list_remove_request(req);
 305         nfs_list_add_request(req, &desc->pg_list);
 306         desc->pg_count += req->wb_bytes;
 307         return 1;
 308 }
 309
 310 /*
 311  * Helper for nfs_pageio_add_request and nfs_pageio_complete
 312  */
 313 static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
 314 {
 315         if (!list_empty(&desc->pg_list)) {
 316                 int error = desc->pg_ops->pg_doio(desc);
 317                 if (error < 0)
 318                         desc->pg_error = error;
 319                 else
 320                         desc->pg_bytes_written += desc->pg_count;
 321         }
 322         if (list_empty(&desc->pg_list)) {
 323                 desc->pg_count = 0;
 324                 desc->pg_base = 0;
 325         }
 326 }
 327
 328 /**
 329  * nfs_pageio_add_request - Attempt to coalesce a request into a page list.
 330  * @desc: destination io descriptor
 331  * @req: request
 332  *
 333  * Returns true if the request 'req' was successfully coalesced into the
 334  * existing list of pages 'desc'.
 335  */
 336 static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 337                            struct nfs_page *req)
 338 {
 339         while (!nfs_pageio_do_add_request(desc, req)) {
 340                 desc->pg_moreio = 1;
 341                 nfs_pageio_doio(desc);
 342                 if (desc->pg_error < 0)
 343                         return 0;
 344                 desc->pg_moreio = 0;
 345                 if (desc->pg_recoalesce)
 346                         return 0;
 347         }
 348         return 1;
 349 }
 350
 351 static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
 352 {
 353         LIST_HEAD(head);
 354
 355         do {
 356                 list_splice_init(&desc->pg_list, &head);
 357                 desc->pg_bytes_written -= desc->pg_count;
 358                 desc->pg_count = 0;
 359                 desc->pg_base = 0;
 360                 desc->pg_recoalesce = 0;
 361
 362                 while (!list_empty(&head)) {
 363                         struct nfs_page *req;
 364
 365                         req = list_first_entry(&head, struct nfs_page, wb_list);
 366                         nfs_list_remove_request(req);
 367                         if (__nfs_pageio_add_request(desc, req))
 368                                 continue;
 369                         if (desc->pg_error < 0)
 370                                 return 0;
 371                         break;
 372                 }
 373         } while (desc->pg_recoalesce);
 374         return 1;
 375 }
 376
 377 int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 378                 struct nfs_page *req)
 379 {
 380         int ret;
 381
 382         do {
 383                 ret = __nfs_pageio_add_request(desc, req);
 384                 if (ret)
 385                         break;
 386                 if (desc->pg_error < 0)
 387                         break;
 388                 ret = nfs_do_recoalesce(desc);
 389         } while (ret);
 390         return ret;
 391 }
 392
 393 /**
 394  * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
 395  * @desc: pointer to io descriptor
 396  */
 397 void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
 398 {
 399         for (;;) {
 400                 nfs_pageio_doio(desc);
 401                 if (!desc->pg_recoalesce)
 402                         break;
 403                 if (!nfs_do_recoalesce(desc))
 404                         break;
 405         }
 406 }
 407
 408 /**
 409  * nfs_pageio_cond_complete - Conditional I/O completion
 410  * @desc: pointer to io descriptor
 411  * @index: page index
 412  *
 413  * It is important to ensure that processes don't try to take locks
 414  * on non-contiguous ranges of pages as that might deadlock. This
 415  * function should be called before attempting to wait on a locked
 416  * nfs_page. It will complete the I/O if the page index 'index'
 417  * is not contiguous with the existing list of pages in 'desc'.
 418  */
 419 void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
 420 {
 421         if (!list_empty(&desc->pg_list)) {
 422                 struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
 423                 if (index != prev->wb_index + 1)
 424                         nfs_pageio_complete(desc);
 425         }
 426 }
 427
 428 #define NFS_SCAN_MAXENTRIES 16
 429 /**
 430  * nfs_scan_list - Scan a list for matching requests
 431  * @nfsi: NFS inode
 432  * @dst: Destination list
 433  * @idx_start: lower bound of page->index to scan
 434  * @npages: idx_start + npages sets the upper bound to scan.
 435  * @tag: tag to scan for
 436  *
 437  * Moves elements from one of the inode request lists.
 438  * If the number of requests is set to 0, the entire address_space
 439  * starting at index idx_start, is scanned.
 440  * The requests are *not* checked to ensure that they form a contiguous set.
 441  * You must be holding the inode's i_lock when calling this function
 442  */
 443 int nfs_scan_list(struct nfs_inode *nfsi,
 444                 struct list_head *dst, pgoff_t idx_start,
 445                 unsigned int npages, int tag)
 446 {
 447         struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
 448         struct nfs_page *req;
 449         pgoff_t idx_end;
 450         int found, i;
 451         int res;
 452         struct list_head *list;
 453
 454         res = 0;
 455         if (npages == 0)
 456                 idx_end = ~0;
 457         else
 458                 idx_end = idx_start + npages - 1;
 459
 460         for (;;) {
 461                 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
 462                                 (void **)&pgvec[0], idx_start,
 463                                 NFS_SCAN_MAXENTRIES, tag);
 464                 if (found <= 0)
 465                         break;
 466                 for (i = 0; i < found; i++) {
 467                         req = pgvec[i];
 468                         if (req->wb_index > idx_end)
 469                                 goto out;
 470                         idx_start = req->wb_index + 1;
 471                         if (nfs_set_page_tag_locked(req)) {
 472                                 kref_get(&req->wb_kref);
 473                                 radix_tree_tag_clear(&nfsi->nfs_page_tree,
 474                                                 req->wb_index, tag);
 475                                 list = pnfs_choose_commit_list(req, dst);
 476                                 nfs_list_add_request(req, list);
 477                                 res++;
 478                                 if (res == INT_MAX)
 479                                         goto out;
 480                         }
 481                 }
 482                 /* for latency reduction */
 483                 cond_resched_lock(&nfsi->vfs_inode.i_lock);
 484         }
 485 out:
 486         return res;
 487 }
 488
 489 int __init nfs_init_nfspagecache(void)
 490 {
 491         nfs_page_cachep = kmem_cache_create("nfs_page",
 492                                             sizeof(struct nfs_page),
 493                                             0, SLAB_HWCACHE_ALIGN,
 494                                             NULL);
 495         if (nfs_page_cachep == NULL)
 496                 return -ENOMEM;
 497
 498         return 0;
 499 }
 500
 501 void nfs_destroy_nfspagecache(void)
 502 {
 503         kmem_cache_destroy(nfs_page_cachep);
 504 }
 505