4 * Writing file data over NFS.
6 * We do it like this: When a (user) process wishes to write data to an
7 * NFS file, a write request is allocated that contains the RPC task data
8 * plus some info on the page to be written, and added to the inode's
9 * write chain. If the process writes past the end of the page, an async
10 * RPC call to write the page is scheduled immediately; otherwise, the call
11 * is delayed for a few seconds.
13 * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
15 * Write requests are kept on the inode's writeback list. Each entry in
16 * that list references the page (portion) to be written. When the
17 * cache timeout has expired, the RPC task is woken up, and tries to
18 * lock the page. As soon as it manages to do so, the request is moved
19 * from the writeback list to the writelock list.
21 * Note: we must make sure never to confuse the inode passed in the
22 * write_page request with the one in page->inode. As far as I understand
23 * it, these are different when doing a swap-out.
25 * To understand everything that goes on here and in the NFS read code,
26 * one should be aware that a page is locked in exactly one of the following
29 * - A write request is in progress.
30 * - A user process is in generic_file_write/nfs_update_page
31 * - A user process is in generic_file_read
33 * Also note that because of the way pages are invalidated in
34 * nfs_revalidate_inode, the following assertions hold:
36 * - If a page is dirty, there will be no read requests (a page will
37 * not be re-read unless invalidated by nfs_revalidate_inode).
38 * - If the page is not uptodate, there will be no pending write
39 * requests, and no process will be in nfs_update_page.
41 * FIXME: Interaction with the vmscan routines is not optimal yet.
42 * Either vmscan must be made nfs-savvy, or we need a different page
43 * reclaim concept that supports something like FS-independent
44 * buffer_heads with a b_ops-> field.
46 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
49 #include <linux/config.h>
50 #include <linux/types.h>
51 #include <linux/slab.h>
53 #include <linux/pagemap.h>
54 #include <linux/file.h>
55 #include <linux/mpage.h>
56 #include <linux/writeback.h>
58 #include <linux/sunrpc/clnt.h>
59 #include <linux/nfs_fs.h>
60 #include <linux/nfs_mount.h>
61 #include <linux/nfs_page.h>
62 #include <asm/uaccess.h>
63 #include <linux/smp_lock.h>
64 #include <linux/mempool.h>
66 #include "delegation.h"
68 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
70 #define MIN_POOL_WRITE (32)
71 #define MIN_POOL_COMMIT (4)
74 * Local function declarations
76 static struct nfs_page
* nfs_update_request(struct nfs_open_context
*,
79 unsigned int, unsigned int);
80 static void nfs_writeback_done_partial(struct nfs_write_data
*, int);
81 static void nfs_writeback_done_full(struct nfs_write_data
*, int);
82 static int nfs_wait_on_write_congestion(struct address_space
*, int);
83 static int nfs_wait_on_requests(struct inode
*, unsigned long, unsigned int);
85 static kmem_cache_t
*nfs_wdata_cachep
;
86 static mempool_t
*nfs_wdata_mempool
;
87 static mempool_t
*nfs_commit_mempool
;
89 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion
);
91 static __inline__
struct nfs_write_data
*nfs_writedata_alloc(void)
93 struct nfs_write_data
*p
;
94 p
= (struct nfs_write_data
*)mempool_alloc(nfs_wdata_mempool
, SLAB_NOFS
);
96 memset(p
, 0, sizeof(*p
));
97 INIT_LIST_HEAD(&p
->pages
);
102 static __inline__
void nfs_writedata_free(struct nfs_write_data
*p
)
104 mempool_free(p
, nfs_wdata_mempool
);
107 static void nfs_writedata_release(struct rpc_task
*task
)
109 struct nfs_write_data
*wdata
= (struct nfs_write_data
*)task
->tk_calldata
;
110 nfs_writedata_free(wdata
);
113 static __inline__
struct nfs_write_data
*nfs_commit_alloc(void)
115 struct nfs_write_data
*p
;
116 p
= (struct nfs_write_data
*)mempool_alloc(nfs_commit_mempool
, SLAB_NOFS
);
118 memset(p
, 0, sizeof(*p
));
119 INIT_LIST_HEAD(&p
->pages
);
124 static __inline__
void nfs_commit_free(struct nfs_write_data
*p
)
126 mempool_free(p
, nfs_commit_mempool
);
129 /* Adjust the file length if we're writing beyond the end */
130 static void nfs_grow_file(struct page
*page
, unsigned int offset
, unsigned int count
)
132 struct inode
*inode
= page
->mapping
->host
;
133 loff_t end
, i_size
= i_size_read(inode
);
134 unsigned long end_index
= (i_size
- 1) >> PAGE_CACHE_SHIFT
;
136 if (i_size
> 0 && page
->index
< end_index
)
138 end
= ((loff_t
)page
->index
<< PAGE_CACHE_SHIFT
) + ((loff_t
)offset
+count
);
141 i_size_write(inode
, end
);
144 /* We can set the PG_uptodate flag if we see that a write request
145 * covers the full page.
147 static void nfs_mark_uptodate(struct page
*page
, unsigned int base
, unsigned int count
)
151 if (PageUptodate(page
))
155 if (count
== PAGE_CACHE_SIZE
) {
156 SetPageUptodate(page
);
160 end_offs
= i_size_read(page
->mapping
->host
) - 1;
163 /* Is this the last page? */
164 if (page
->index
!= (unsigned long)(end_offs
>> PAGE_CACHE_SHIFT
))
166 /* This is the last page: set PG_uptodate if we cover the entire
167 * extent of the data, then zero the rest of the page.
169 if (count
== (unsigned int)(end_offs
& (PAGE_CACHE_SIZE
- 1)) + 1) {
170 memclear_highpage_flush(page
, count
, PAGE_CACHE_SIZE
- count
);
171 SetPageUptodate(page
);
176 * Write a page synchronously.
177 * Offset is the data offset within the page.
179 static int nfs_writepage_sync(struct nfs_open_context
*ctx
, struct inode
*inode
,
180 struct page
*page
, unsigned int offset
, unsigned int count
,
183 unsigned int wsize
= NFS_SERVER(inode
)->wsize
;
184 int result
, written
= 0;
185 struct nfs_write_data
*wdata
;
187 wdata
= kmalloc(sizeof(*wdata
), GFP_NOFS
);
191 memset(wdata
, 0, sizeof(*wdata
));
193 wdata
->cred
= ctx
->cred
;
194 wdata
->inode
= inode
;
195 wdata
->args
.fh
= NFS_FH(inode
);
196 wdata
->args
.context
= ctx
;
197 wdata
->args
.pages
= &page
;
198 wdata
->args
.stable
= NFS_FILE_SYNC
;
199 wdata
->args
.pgbase
= offset
;
200 wdata
->args
.count
= wsize
;
201 wdata
->res
.fattr
= &wdata
->fattr
;
202 wdata
->res
.verf
= &wdata
->verf
;
204 dprintk("NFS: nfs_writepage_sync(%s/%Ld %d@%Ld)\n",
206 (long long)NFS_FILEID(inode
),
207 count
, (long long)(page_offset(page
) + offset
));
209 nfs_begin_data_update(inode
);
212 wdata
->args
.count
= count
;
213 wdata
->args
.offset
= page_offset(page
) + wdata
->args
.pgbase
;
215 result
= NFS_PROTO(inode
)->write(wdata
);
218 /* Must mark the page invalid after I/O error */
219 ClearPageUptodate(page
);
222 if (result
< wdata
->args
.count
)
223 printk(KERN_WARNING
"NFS: short write, count=%u, result=%d\n",
224 wdata
->args
.count
, result
);
226 wdata
->args
.offset
+= result
;
227 wdata
->args
.pgbase
+= result
;
231 /* Update file length */
232 nfs_grow_file(page
, offset
, written
);
233 /* Set the PG_uptodate flag? */
234 nfs_mark_uptodate(page
, offset
, written
);
237 ClearPageError(page
);
240 nfs_end_data_update_defer(inode
);
243 return written
? written
: result
;
246 static int nfs_writepage_async(struct nfs_open_context
*ctx
,
247 struct inode
*inode
, struct page
*page
,
248 unsigned int offset
, unsigned int count
)
250 struct nfs_page
*req
;
253 req
= nfs_update_request(ctx
, inode
, page
, offset
, count
);
254 status
= (IS_ERR(req
)) ? PTR_ERR(req
) : 0;
257 /* Update file length */
258 nfs_grow_file(page
, offset
, count
);
259 /* Set the PG_uptodate flag? */
260 nfs_mark_uptodate(page
, offset
, count
);
261 nfs_unlock_request(req
);
266 static int wb_priority(struct writeback_control
*wbc
)
268 if (wbc
->for_reclaim
)
269 return FLUSH_HIGHPRI
;
270 if (wbc
->for_kupdate
)
276 * Write an mmapped page to the server.
278 int nfs_writepage(struct page
*page
, struct writeback_control
*wbc
)
280 struct nfs_open_context
*ctx
;
281 struct inode
*inode
= page
->mapping
->host
;
282 unsigned long end_index
;
283 unsigned offset
= PAGE_CACHE_SIZE
;
284 loff_t i_size
= i_size_read(inode
);
285 int inode_referenced
= 0;
286 int priority
= wb_priority(wbc
);
290 * Note: We need to ensure that we have a reference to the inode
291 * if we are to do asynchronous writes. If not, waiting
292 * in nfs_wait_on_request() may deadlock with clear_inode().
294 * If igrab() fails here, then it is in any case safe to
295 * call nfs_wb_page(), since there will be no pending writes.
297 if (igrab(inode
) != 0)
298 inode_referenced
= 1;
299 end_index
= i_size
>> PAGE_CACHE_SHIFT
;
301 /* Ensure we've flushed out any previous writes */
302 nfs_wb_page_priority(inode
, page
, priority
);
305 if (page
->index
< end_index
)
307 /* things got complicated... */
308 offset
= i_size
& (PAGE_CACHE_SIZE
-1);
310 /* OK, are we completely out? */
311 err
= 0; /* potential race with truncate - ignore */
312 if (page
->index
>= end_index
+1 || !offset
)
315 ctx
= nfs_find_open_context(inode
, FMODE_WRITE
);
321 if (!IS_SYNC(inode
) && inode_referenced
) {
322 err
= nfs_writepage_async(ctx
, inode
, page
, 0, offset
);
325 if (wbc
->for_reclaim
)
326 nfs_flush_inode(inode
, 0, 0, FLUSH_STABLE
);
329 err
= nfs_writepage_sync(ctx
, inode
, page
, 0,
333 redirty_page_for_writepage(wbc
, page
);
338 put_nfs_open_context(ctx
);
341 if (inode_referenced
)
347 * Note: causes nfs_update_request() to block on the assumption
348 * that the writeback is generated due to memory pressure.
350 int nfs_writepages(struct address_space
*mapping
, struct writeback_control
*wbc
)
352 struct backing_dev_info
*bdi
= mapping
->backing_dev_info
;
353 struct inode
*inode
= mapping
->host
;
356 err
= generic_writepages(mapping
, wbc
);
359 while (test_and_set_bit(BDI_write_congested
, &bdi
->state
) != 0) {
360 if (wbc
->nonblocking
)
362 nfs_wait_on_write_congestion(mapping
, 0);
364 err
= nfs_flush_inode(inode
, 0, 0, wb_priority(wbc
));
367 wbc
->nr_to_write
-= err
;
368 if (!wbc
->nonblocking
&& wbc
->sync_mode
== WB_SYNC_ALL
) {
369 err
= nfs_wait_on_requests(inode
, 0, 0);
373 err
= nfs_commit_inode(inode
, 0, 0, wb_priority(wbc
));
375 wbc
->nr_to_write
-= err
;
379 clear_bit(BDI_write_congested
, &bdi
->state
);
380 wake_up_all(&nfs_write_congestion
);
385 * Insert a write request into an inode
387 static int nfs_inode_add_request(struct inode
*inode
, struct nfs_page
*req
)
389 struct nfs_inode
*nfsi
= NFS_I(inode
);
392 error
= radix_tree_insert(&nfsi
->nfs_page_tree
, req
->wb_index
, req
);
393 BUG_ON(error
== -EEXIST
);
398 nfs_begin_data_update(inode
);
399 if (nfs_have_delegation(inode
, FMODE_WRITE
))
403 atomic_inc(&req
->wb_count
);
408 * Insert a write request into an inode
410 static void nfs_inode_remove_request(struct nfs_page
*req
)
412 struct inode
*inode
= req
->wb_context
->dentry
->d_inode
;
413 struct nfs_inode
*nfsi
= NFS_I(inode
);
415 BUG_ON (!NFS_WBACK_BUSY(req
));
417 spin_lock(&nfsi
->req_lock
);
418 radix_tree_delete(&nfsi
->nfs_page_tree
, req
->wb_index
);
421 spin_unlock(&nfsi
->req_lock
);
422 nfs_end_data_update_defer(inode
);
425 spin_unlock(&nfsi
->req_lock
);
426 nfs_clear_request(req
);
427 nfs_release_request(req
);
433 static inline struct nfs_page
*
434 _nfs_find_request(struct inode
*inode
, unsigned long index
)
436 struct nfs_inode
*nfsi
= NFS_I(inode
);
437 struct nfs_page
*req
;
439 req
= (struct nfs_page
*)radix_tree_lookup(&nfsi
->nfs_page_tree
, index
);
441 atomic_inc(&req
->wb_count
);
445 static struct nfs_page
*
446 nfs_find_request(struct inode
*inode
, unsigned long index
)
448 struct nfs_page
*req
;
449 struct nfs_inode
*nfsi
= NFS_I(inode
);
451 spin_lock(&nfsi
->req_lock
);
452 req
= _nfs_find_request(inode
, index
);
453 spin_unlock(&nfsi
->req_lock
);
458 * Add a request to the inode's dirty list.
461 nfs_mark_request_dirty(struct nfs_page
*req
)
463 struct inode
*inode
= req
->wb_context
->dentry
->d_inode
;
464 struct nfs_inode
*nfsi
= NFS_I(inode
);
466 spin_lock(&nfsi
->req_lock
);
467 nfs_list_add_request(req
, &nfsi
->dirty
);
469 spin_unlock(&nfsi
->req_lock
);
470 inc_page_state(nr_dirty
);
471 mark_inode_dirty(inode
);
475 * Check if a request is dirty
478 nfs_dirty_request(struct nfs_page
*req
)
480 struct nfs_inode
*nfsi
= NFS_I(req
->wb_context
->dentry
->d_inode
);
481 return !list_empty(&req
->wb_list
) && req
->wb_list_head
== &nfsi
->dirty
;
484 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
486 * Add a request to the inode's commit list.
489 nfs_mark_request_commit(struct nfs_page
*req
)
491 struct inode
*inode
= req
->wb_context
->dentry
->d_inode
;
492 struct nfs_inode
*nfsi
= NFS_I(inode
);
494 spin_lock(&nfsi
->req_lock
);
495 nfs_list_add_request(req
, &nfsi
->commit
);
497 spin_unlock(&nfsi
->req_lock
);
498 inc_page_state(nr_unstable
);
499 mark_inode_dirty(inode
);
504 * Wait for a request to complete.
506 * Interruptible by signals only if mounted with intr flag.
509 nfs_wait_on_requests(struct inode
*inode
, unsigned long idx_start
, unsigned int npages
)
511 struct nfs_inode
*nfsi
= NFS_I(inode
);
512 struct nfs_page
*req
;
513 unsigned long idx_end
, next
;
514 unsigned int res
= 0;
520 idx_end
= idx_start
+ npages
- 1;
522 spin_lock(&nfsi
->req_lock
);
524 while (radix_tree_gang_lookup(&nfsi
->nfs_page_tree
, (void **)&req
, next
, 1)) {
525 if (req
->wb_index
> idx_end
)
528 next
= req
->wb_index
+ 1;
529 if (!NFS_WBACK_BUSY(req
))
532 atomic_inc(&req
->wb_count
);
533 spin_unlock(&nfsi
->req_lock
);
534 error
= nfs_wait_on_request(req
);
535 nfs_release_request(req
);
538 spin_lock(&nfsi
->req_lock
);
541 spin_unlock(&nfsi
->req_lock
);
546 * nfs_scan_dirty - Scan an inode for dirty requests
547 * @inode: NFS inode to scan
548 * @dst: destination list
549 * @idx_start: lower bound of page->index to scan.
550 * @npages: idx_start + npages sets the upper bound to scan.
552 * Moves requests from the inode's dirty page list.
553 * The requests are *not* checked to ensure that they form a contiguous set.
556 nfs_scan_dirty(struct inode
*inode
, struct list_head
*dst
, unsigned long idx_start
, unsigned int npages
)
558 struct nfs_inode
*nfsi
= NFS_I(inode
);
560 res
= nfs_scan_list(&nfsi
->dirty
, dst
, idx_start
, npages
);
562 sub_page_state(nr_dirty
,res
);
563 if ((nfsi
->ndirty
== 0) != list_empty(&nfsi
->dirty
))
564 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.ndirty.\n");
568 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
570 * nfs_scan_commit - Scan an inode for commit requests
571 * @inode: NFS inode to scan
572 * @dst: destination list
573 * @idx_start: lower bound of page->index to scan.
574 * @npages: idx_start + npages sets the upper bound to scan.
576 * Moves requests from the inode's 'commit' request list.
577 * The requests are *not* checked to ensure that they form a contiguous set.
580 nfs_scan_commit(struct inode
*inode
, struct list_head
*dst
, unsigned long idx_start
, unsigned int npages
)
582 struct nfs_inode
*nfsi
= NFS_I(inode
);
584 res
= nfs_scan_list(&nfsi
->commit
, dst
, idx_start
, npages
);
585 nfsi
->ncommit
-= res
;
586 if ((nfsi
->ncommit
== 0) != list_empty(&nfsi
->commit
))
587 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.ncommit.\n");
592 static int nfs_wait_on_write_congestion(struct address_space
*mapping
, int intr
)
594 struct backing_dev_info
*bdi
= mapping
->backing_dev_info
;
600 if (!bdi_write_congested(bdi
))
603 struct rpc_clnt
*clnt
= NFS_CLIENT(mapping
->host
);
606 rpc_clnt_sigmask(clnt
, &oldset
);
607 prepare_to_wait(&nfs_write_congestion
, &wait
, TASK_INTERRUPTIBLE
);
608 if (bdi_write_congested(bdi
)) {
614 rpc_clnt_sigunmask(clnt
, &oldset
);
616 prepare_to_wait(&nfs_write_congestion
, &wait
, TASK_UNINTERRUPTIBLE
);
617 if (bdi_write_congested(bdi
))
620 finish_wait(&nfs_write_congestion
, &wait
);
626 * Try to update any existing write request, or create one if there is none.
627 * In order to match, the request's credentials must match those of
628 * the calling process.
630 * Note: Should always be called with the Page Lock held!
632 static struct nfs_page
* nfs_update_request(struct nfs_open_context
* ctx
,
633 struct inode
*inode
, struct page
*page
,
634 unsigned int offset
, unsigned int bytes
)
636 struct nfs_server
*server
= NFS_SERVER(inode
);
637 struct nfs_inode
*nfsi
= NFS_I(inode
);
638 struct nfs_page
*req
, *new = NULL
;
639 unsigned long rqend
, end
;
641 end
= offset
+ bytes
;
643 if (nfs_wait_on_write_congestion(page
->mapping
, server
->flags
& NFS_MOUNT_INTR
))
644 return ERR_PTR(-ERESTARTSYS
);
646 /* Loop over all inode entries and see if we find
647 * A request for the page we wish to update
649 spin_lock(&nfsi
->req_lock
);
650 req
= _nfs_find_request(inode
, page
->index
);
652 if (!nfs_lock_request_dontget(req
)) {
654 spin_unlock(&nfsi
->req_lock
);
655 error
= nfs_wait_on_request(req
);
656 nfs_release_request(req
);
658 return ERR_PTR(error
);
661 spin_unlock(&nfsi
->req_lock
);
663 nfs_release_request(new);
669 nfs_lock_request_dontget(new);
670 error
= nfs_inode_add_request(inode
, new);
672 spin_unlock(&nfsi
->req_lock
);
673 nfs_unlock_request(new);
674 return ERR_PTR(error
);
676 spin_unlock(&nfsi
->req_lock
);
677 nfs_mark_request_dirty(new);
680 spin_unlock(&nfsi
->req_lock
);
682 new = nfs_create_request(ctx
, inode
, page
, offset
, bytes
);
687 /* We have a request for our page.
688 * If the creds don't match, or the
689 * page addresses don't match,
690 * tell the caller to wait on the conflicting
693 rqend
= req
->wb_offset
+ req
->wb_bytes
;
694 if (req
->wb_context
!= ctx
695 || req
->wb_page
!= page
696 || !nfs_dirty_request(req
)
697 || offset
> rqend
|| end
< req
->wb_offset
) {
698 nfs_unlock_request(req
);
699 return ERR_PTR(-EBUSY
);
702 /* Okay, the request matches. Update the region */
703 if (offset
< req
->wb_offset
) {
704 req
->wb_offset
= offset
;
705 req
->wb_pgbase
= offset
;
706 req
->wb_bytes
= rqend
- req
->wb_offset
;
710 req
->wb_bytes
= end
- req
->wb_offset
;
715 int nfs_flush_incompatible(struct file
*file
, struct page
*page
)
717 struct nfs_open_context
*ctx
= (struct nfs_open_context
*)file
->private_data
;
718 struct inode
*inode
= page
->mapping
->host
;
719 struct nfs_page
*req
;
722 * Look for a request corresponding to this page. If there
723 * is one, and it belongs to another file, we flush it out
724 * before we try to copy anything into the page. Do this
725 * due to the lack of an ACCESS-type call in NFSv2.
726 * Also do the same if we find a request from an existing
729 req
= nfs_find_request(inode
, page
->index
);
731 if (req
->wb_page
!= page
|| ctx
!= req
->wb_context
)
732 status
= nfs_wb_page(inode
, page
);
733 nfs_release_request(req
);
735 return (status
< 0) ? status
: 0;
739 * Update and possibly write a cached page of an NFS file.
741 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
742 * things with a page scheduled for an RPC call (e.g. invalidate it).
744 int nfs_updatepage(struct file
*file
, struct page
*page
,
745 unsigned int offset
, unsigned int count
)
747 struct nfs_open_context
*ctx
= (struct nfs_open_context
*)file
->private_data
;
748 struct dentry
*dentry
= file
->f_dentry
;
749 struct inode
*inode
= page
->mapping
->host
;
750 struct nfs_page
*req
;
753 dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
754 dentry
->d_parent
->d_name
.name
, dentry
->d_name
.name
,
755 count
, (long long)(page_offset(page
) +offset
));
757 if (IS_SYNC(inode
)) {
758 status
= nfs_writepage_sync(ctx
, inode
, page
, offset
, count
, 0);
760 if (offset
== 0 && status
== PAGE_CACHE_SIZE
)
761 SetPageUptodate(page
);
767 /* If we're not using byte range locks, and we know the page
768 * is entirely in cache, it may be more efficient to avoid
769 * fragmenting write requests.
771 if (PageUptodate(page
) && inode
->i_flock
== NULL
) {
772 loff_t end_offs
= i_size_read(inode
) - 1;
773 unsigned long end_index
= end_offs
>> PAGE_CACHE_SHIFT
;
777 if (unlikely(end_offs
< 0)) {
779 } else if (page
->index
== end_index
) {
781 pglen
= (unsigned int)(end_offs
& (PAGE_CACHE_SIZE
-1)) + 1;
784 } else if (page
->index
< end_index
)
785 count
= PAGE_CACHE_SIZE
;
789 * Try to find an NFS request corresponding to this page
791 * If the existing request cannot be updated, we must flush
795 req
= nfs_update_request(ctx
, inode
, page
, offset
, count
);
796 status
= (IS_ERR(req
)) ? PTR_ERR(req
) : 0;
797 if (status
!= -EBUSY
)
799 /* Request could not be updated. Flush it out and try again */
800 status
= nfs_wb_page(inode
, page
);
801 } while (status
>= 0);
807 /* Update file length */
808 nfs_grow_file(page
, offset
, count
);
809 /* Set the PG_uptodate flag? */
810 nfs_mark_uptodate(page
, req
->wb_pgbase
, req
->wb_bytes
);
811 nfs_unlock_request(req
);
813 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
814 status
, (long long)i_size_read(inode
));
816 ClearPageUptodate(page
);
820 static void nfs_writepage_release(struct nfs_page
*req
)
822 end_page_writeback(req
->wb_page
);
824 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
825 if (!PageError(req
->wb_page
)) {
826 if (NFS_NEED_RESCHED(req
)) {
827 nfs_mark_request_dirty(req
);
829 } else if (NFS_NEED_COMMIT(req
)) {
830 nfs_mark_request_commit(req
);
834 nfs_inode_remove_request(req
);
837 nfs_clear_commit(req
);
838 nfs_clear_reschedule(req
);
840 nfs_inode_remove_request(req
);
842 nfs_unlock_request(req
);
845 static inline int flush_task_priority(int how
)
847 switch (how
& (FLUSH_HIGHPRI
|FLUSH_LOWPRI
)) {
849 return RPC_PRIORITY_HIGH
;
851 return RPC_PRIORITY_LOW
;
853 return RPC_PRIORITY_NORMAL
;
857 * Set up the argument/result storage required for the RPC call.
859 static void nfs_write_rpcsetup(struct nfs_page
*req
,
860 struct nfs_write_data
*data
,
861 unsigned int count
, unsigned int offset
,
864 struct rpc_task
*task
= &data
->task
;
867 /* Set up the RPC argument and reply structs
868 * NB: take care not to mess about with data->commit et al. */
871 data
->inode
= inode
= req
->wb_context
->dentry
->d_inode
;
872 data
->cred
= req
->wb_context
->cred
;
874 data
->args
.fh
= NFS_FH(inode
);
875 data
->args
.offset
= req_offset(req
) + offset
;
876 data
->args
.pgbase
= req
->wb_pgbase
+ offset
;
877 data
->args
.pages
= data
->pagevec
;
878 data
->args
.count
= count
;
879 data
->args
.context
= req
->wb_context
;
881 data
->res
.fattr
= &data
->fattr
;
882 data
->res
.count
= count
;
883 data
->res
.verf
= &data
->verf
;
885 NFS_PROTO(inode
)->write_setup(data
, how
);
887 data
->task
.tk_priority
= flush_task_priority(how
);
888 data
->task
.tk_cookie
= (unsigned long)inode
;
889 data
->task
.tk_calldata
= data
;
890 /* Release requests */
891 data
->task
.tk_release
= nfs_writedata_release
;
893 dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
896 (long long)NFS_FILEID(inode
),
898 (unsigned long long)data
->args
.offset
);
901 static void nfs_execute_write(struct nfs_write_data
*data
)
903 struct rpc_clnt
*clnt
= NFS_CLIENT(data
->inode
);
906 rpc_clnt_sigmask(clnt
, &oldset
);
908 rpc_execute(&data
->task
);
910 rpc_clnt_sigunmask(clnt
, &oldset
);
914 * Generate multiple small requests to write out a single
915 * contiguous dirty area on one page.
917 static int nfs_flush_multi(struct list_head
*head
, struct inode
*inode
, int how
)
919 struct nfs_page
*req
= nfs_list_entry(head
->next
);
920 struct page
*page
= req
->wb_page
;
921 struct nfs_write_data
*data
;
922 unsigned int wsize
= NFS_SERVER(inode
)->wsize
;
923 unsigned int nbytes
, offset
;
927 nfs_list_remove_request(req
);
929 nbytes
= req
->wb_bytes
;
931 data
= nfs_writedata_alloc();
934 list_add(&data
->pages
, &list
);
940 atomic_set(&req
->wb_complete
, requests
);
942 ClearPageError(page
);
943 SetPageWriteback(page
);
945 nbytes
= req
->wb_bytes
;
947 data
= list_entry(list
.next
, struct nfs_write_data
, pages
);
948 list_del_init(&data
->pages
);
950 data
->pagevec
[0] = page
;
951 data
->complete
= nfs_writeback_done_partial
;
953 if (nbytes
> wsize
) {
954 nfs_write_rpcsetup(req
, data
, wsize
, offset
, how
);
958 nfs_write_rpcsetup(req
, data
, nbytes
, offset
, how
);
961 nfs_execute_write(data
);
962 } while (nbytes
!= 0);
967 while (!list_empty(&list
)) {
968 data
= list_entry(list
.next
, struct nfs_write_data
, pages
);
969 list_del(&data
->pages
);
970 nfs_writedata_free(data
);
972 nfs_mark_request_dirty(req
);
973 nfs_unlock_request(req
);
978 * Create an RPC task for the given write request and kick it.
979 * The page must have been locked by the caller.
981 * It may happen that the page we're passed is not marked dirty.
982 * This is the case if nfs_updatepage detects a conflicting request
983 * that has been written but not committed.
985 static int nfs_flush_one(struct list_head
*head
, struct inode
*inode
, int how
)
987 struct nfs_page
*req
;
989 struct nfs_write_data
*data
;
992 if (NFS_SERVER(inode
)->wsize
< PAGE_CACHE_SIZE
)
993 return nfs_flush_multi(head
, inode
, how
);
995 data
= nfs_writedata_alloc();
999 pages
= data
->pagevec
;
1001 while (!list_empty(head
)) {
1002 req
= nfs_list_entry(head
->next
);
1003 nfs_list_remove_request(req
);
1004 nfs_list_add_request(req
, &data
->pages
);
1005 ClearPageError(req
->wb_page
);
1006 SetPageWriteback(req
->wb_page
);
1007 *pages
++ = req
->wb_page
;
1008 count
+= req
->wb_bytes
;
1010 req
= nfs_list_entry(data
->pages
.next
);
1012 data
->complete
= nfs_writeback_done_full
;
1013 /* Set up the argument struct */
1014 nfs_write_rpcsetup(req
, data
, count
, 0, how
);
1016 nfs_execute_write(data
);
1019 while (!list_empty(head
)) {
1020 struct nfs_page
*req
= nfs_list_entry(head
->next
);
1021 nfs_list_remove_request(req
);
1022 nfs_mark_request_dirty(req
);
1023 nfs_unlock_request(req
);
1029 nfs_flush_list(struct list_head
*head
, int wpages
, int how
)
1031 LIST_HEAD(one_request
);
1032 struct nfs_page
*req
;
1034 unsigned int pages
= 0;
1036 while (!list_empty(head
)) {
1037 pages
+= nfs_coalesce_requests(head
, &one_request
, wpages
);
1038 req
= nfs_list_entry(one_request
.next
);
1039 error
= nfs_flush_one(&one_request
, req
->wb_context
->dentry
->d_inode
, how
);
1046 while (!list_empty(head
)) {
1047 req
= nfs_list_entry(head
->next
);
1048 nfs_list_remove_request(req
);
1049 nfs_mark_request_dirty(req
);
1050 nfs_unlock_request(req
);
1056 * Handle a write reply that flushed part of a page.
1058 static void nfs_writeback_done_partial(struct nfs_write_data
*data
, int status
)
1060 struct nfs_page
*req
= data
->req
;
1061 struct page
*page
= req
->wb_page
;
1063 dprintk("NFS: write (%s/%Ld %d@%Ld)",
1064 req
->wb_context
->dentry
->d_inode
->i_sb
->s_id
,
1065 (long long)NFS_FILEID(req
->wb_context
->dentry
->d_inode
),
1067 (long long)req_offset(req
));
1070 ClearPageUptodate(page
);
1072 req
->wb_context
->error
= status
;
1073 dprintk(", error = %d\n", status
);
1075 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1076 if (data
->verf
.committed
< NFS_FILE_SYNC
) {
1077 if (!NFS_NEED_COMMIT(req
)) {
1078 nfs_defer_commit(req
);
1079 memcpy(&req
->wb_verf
, &data
->verf
, sizeof(req
->wb_verf
));
1080 dprintk(" defer commit\n");
1081 } else if (memcmp(&req
->wb_verf
, &data
->verf
, sizeof(req
->wb_verf
))) {
1082 nfs_defer_reschedule(req
);
1083 dprintk(" server reboot detected\n");
1090 if (atomic_dec_and_test(&req
->wb_complete
))
1091 nfs_writepage_release(req
);
1095 * Handle a write reply that flushes a whole page.
1097 * FIXME: There is an inherent race with invalidate_inode_pages and
1098 * writebacks since the page->count is kept > 1 for as long
1099 * as the page has a write request pending.
1101 static void nfs_writeback_done_full(struct nfs_write_data
*data
, int status
)
1103 struct nfs_page
*req
;
1106 /* Update attributes as result of writeback. */
1107 while (!list_empty(&data
->pages
)) {
1108 req
= nfs_list_entry(data
->pages
.next
);
1109 nfs_list_remove_request(req
);
1110 page
= req
->wb_page
;
1112 dprintk("NFS: write (%s/%Ld %d@%Ld)",
1113 req
->wb_context
->dentry
->d_inode
->i_sb
->s_id
,
1114 (long long)NFS_FILEID(req
->wb_context
->dentry
->d_inode
),
1116 (long long)req_offset(req
));
1119 ClearPageUptodate(page
);
1121 req
->wb_context
->error
= status
;
1122 end_page_writeback(page
);
1123 nfs_inode_remove_request(req
);
1124 dprintk(", error = %d\n", status
);
1127 end_page_writeback(page
);
1129 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1130 if (data
->args
.stable
!= NFS_UNSTABLE
|| data
->verf
.committed
== NFS_FILE_SYNC
) {
1131 nfs_inode_remove_request(req
);
1135 memcpy(&req
->wb_verf
, &data
->verf
, sizeof(req
->wb_verf
));
1136 nfs_mark_request_commit(req
);
1137 dprintk(" marked for commit\n");
1139 nfs_inode_remove_request(req
);
1142 nfs_unlock_request(req
);
1147 * This function is called when the WRITE call is complete.
1149 void nfs_writeback_done(struct rpc_task
*task
)
1151 struct nfs_write_data
*data
= (struct nfs_write_data
*) task
->tk_calldata
;
1152 struct nfs_writeargs
*argp
= &data
->args
;
1153 struct nfs_writeres
*resp
= &data
->res
;
1155 dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
1156 task
->tk_pid
, task
->tk_status
);
1158 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1159 if (resp
->verf
->committed
< argp
->stable
&& task
->tk_status
>= 0) {
1160 /* We tried a write call, but the server did not
1161 * commit data to stable storage even though we
1163 * Note: There is a known bug in Tru64 < 5.0 in which
1164 * the server reports NFS_DATA_SYNC, but performs
1165 * NFS_FILE_SYNC. We therefore implement this checking
1166 * as a dprintk() in order to avoid filling syslog.
1168 static unsigned long complain
;
1170 if (time_before(complain
, jiffies
)) {
1171 dprintk("NFS: faulty NFS server %s:"
1172 " (committed = %d) != (stable = %d)\n",
1173 NFS_SERVER(data
->inode
)->hostname
,
1174 resp
->verf
->committed
, argp
->stable
);
1175 complain
= jiffies
+ 300 * HZ
;
1179 /* Is this a short write? */
1180 if (task
->tk_status
>= 0 && resp
->count
< argp
->count
) {
1181 static unsigned long complain
;
1183 /* Has the server at least made some progress? */
1184 if (resp
->count
!= 0) {
1185 /* Was this an NFSv2 write or an NFSv3 stable write? */
1186 if (resp
->verf
->committed
!= NFS_UNSTABLE
) {
1187 /* Resend from where the server left off */
1188 argp
->offset
+= resp
->count
;
1189 argp
->pgbase
+= resp
->count
;
1190 argp
->count
-= resp
->count
;
1192 /* Resend as a stable write in order to avoid
1193 * headaches in the case of a server crash.
1195 argp
->stable
= NFS_FILE_SYNC
;
1197 rpc_restart_call(task
);
1200 if (time_before(complain
, jiffies
)) {
1202 "NFS: Server wrote less than requested.\n");
1203 complain
= jiffies
+ 300 * HZ
;
1205 /* Can't do anything about it except throw an error. */
1206 task
->tk_status
= -EIO
;
1210 * Process the nfs_page list
1212 data
->complete(data
, task
->tk_status
);
1216 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1217 static void nfs_commit_release(struct rpc_task
*task
)
1219 struct nfs_write_data
*wdata
= (struct nfs_write_data
*)task
->tk_calldata
;
1220 nfs_commit_free(wdata
);
1224 * Set up the argument/result storage required for the RPC call.
1226 static void nfs_commit_rpcsetup(struct list_head
*head
,
1227 struct nfs_write_data
*data
, int how
)
1229 struct rpc_task
*task
= &data
->task
;
1230 struct nfs_page
*first
, *last
;
1231 struct inode
*inode
;
1232 loff_t start
, end
, len
;
1234 /* Set up the RPC argument and reply structs
1235 * NB: take care not to mess about with data->commit et al. */
1237 list_splice_init(head
, &data
->pages
);
1238 first
= nfs_list_entry(data
->pages
.next
);
1239 last
= nfs_list_entry(data
->pages
.prev
);
1240 inode
= first
->wb_context
->dentry
->d_inode
;
1243 * Determine the offset range of requests in the COMMIT call.
1244 * We rely on the fact that data->pages is an ordered list...
1246 start
= req_offset(first
);
1247 end
= req_offset(last
) + last
->wb_bytes
;
1249 /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
1250 if (end
>= i_size_read(inode
) || len
< 0 || len
> (~((u32
)0) >> 1))
1253 data
->inode
= inode
;
1254 data
->cred
= first
->wb_context
->cred
;
1256 data
->args
.fh
= NFS_FH(data
->inode
);
1257 data
->args
.offset
= start
;
1258 data
->args
.count
= len
;
1259 data
->res
.count
= len
;
1260 data
->res
.fattr
= &data
->fattr
;
1261 data
->res
.verf
= &data
->verf
;
1263 NFS_PROTO(inode
)->commit_setup(data
, how
);
1265 data
->task
.tk_priority
= flush_task_priority(how
);
1266 data
->task
.tk_cookie
= (unsigned long)inode
;
1267 data
->task
.tk_calldata
= data
;
1268 /* Release requests */
1269 data
->task
.tk_release
= nfs_commit_release
;
1271 dprintk("NFS: %4d initiated commit call\n", task
->tk_pid
);
1275 * Commit dirty pages
1278 nfs_commit_list(struct list_head
*head
, int how
)
1280 struct nfs_write_data
*data
;
1281 struct nfs_page
*req
;
1283 data
= nfs_commit_alloc();
1288 /* Set up the argument struct */
1289 nfs_commit_rpcsetup(head
, data
, how
);
1291 nfs_execute_write(data
);
1294 while (!list_empty(head
)) {
1295 req
= nfs_list_entry(head
->next
);
1296 nfs_list_remove_request(req
);
1297 nfs_mark_request_commit(req
);
1298 nfs_unlock_request(req
);
1304 * COMMIT call returned
1307 nfs_commit_done(struct rpc_task
*task
)
1309 struct nfs_write_data
*data
= (struct nfs_write_data
*)task
->tk_calldata
;
1310 struct nfs_page
*req
;
1313 dprintk("NFS: %4d nfs_commit_done (status %d)\n",
1314 task
->tk_pid
, task
->tk_status
);
1316 while (!list_empty(&data
->pages
)) {
1317 req
= nfs_list_entry(data
->pages
.next
);
1318 nfs_list_remove_request(req
);
1320 dprintk("NFS: commit (%s/%Ld %d@%Ld)",
1321 req
->wb_context
->dentry
->d_inode
->i_sb
->s_id
,
1322 (long long)NFS_FILEID(req
->wb_context
->dentry
->d_inode
),
1324 (long long)req_offset(req
));
1325 if (task
->tk_status
< 0) {
1326 req
->wb_context
->error
= task
->tk_status
;
1327 nfs_inode_remove_request(req
);
1328 dprintk(", error = %d\n", task
->tk_status
);
1332 /* Okay, COMMIT succeeded, apparently. Check the verifier
1333 * returned by the server against all stored verfs. */
1334 if (!memcmp(req
->wb_verf
.verifier
, data
->verf
.verifier
, sizeof(data
->verf
.verifier
))) {
1335 /* We have a match */
1336 nfs_inode_remove_request(req
);
1340 /* We have a mismatch. Write the page again */
1341 dprintk(" mismatch\n");
1342 nfs_mark_request_dirty(req
);
1344 nfs_unlock_request(req
);
1347 sub_page_state(nr_unstable
,res
);
1351 int nfs_flush_inode(struct inode
*inode
, unsigned long idx_start
,
1352 unsigned int npages
, int how
)
1354 struct nfs_inode
*nfsi
= NFS_I(inode
);
1359 spin_lock(&nfsi
->req_lock
);
1360 res
= nfs_scan_dirty(inode
, &head
, idx_start
, npages
);
1361 spin_unlock(&nfsi
->req_lock
);
1363 error
= nfs_flush_list(&head
, NFS_SERVER(inode
)->wpages
, how
);
1369 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1370 int nfs_commit_inode(struct inode
*inode
, unsigned long idx_start
,
1371 unsigned int npages
, int how
)
1373 struct nfs_inode
*nfsi
= NFS_I(inode
);
1378 spin_lock(&nfsi
->req_lock
);
1379 res
= nfs_scan_commit(inode
, &head
, idx_start
, npages
);
1381 res
+= nfs_scan_commit(inode
, &head
, 0, 0);
1382 spin_unlock(&nfsi
->req_lock
);
1383 error
= nfs_commit_list(&head
, how
);
1385 spin_unlock(&nfsi
->req_lock
);
1392 int nfs_sync_inode(struct inode
*inode
, unsigned long idx_start
,
1393 unsigned int npages
, int how
)
1398 wait
= how
& FLUSH_WAIT
;
1404 error
= nfs_wait_on_requests(inode
, idx_start
, npages
);
1406 error
= nfs_flush_inode(inode
, idx_start
, npages
, how
);
1407 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1409 error
= nfs_commit_inode(inode
, idx_start
, npages
, how
);
1411 } while (error
> 0);
1415 int nfs_init_writepagecache(void)
1417 nfs_wdata_cachep
= kmem_cache_create("nfs_write_data",
1418 sizeof(struct nfs_write_data
),
1419 0, SLAB_HWCACHE_ALIGN
,
1421 if (nfs_wdata_cachep
== NULL
)
1424 nfs_wdata_mempool
= mempool_create(MIN_POOL_WRITE
,
1428 if (nfs_wdata_mempool
== NULL
)
1431 nfs_commit_mempool
= mempool_create(MIN_POOL_COMMIT
,
1435 if (nfs_commit_mempool
== NULL
)
1441 void nfs_destroy_writepagecache(void)
1443 mempool_destroy(nfs_commit_mempool
);
1444 mempool_destroy(nfs_wdata_mempool
);
1445 if (kmem_cache_destroy(nfs_wdata_cachep
))
1446 printk(KERN_INFO
"nfs_write_data: not all structures were freed\n");