6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c
7 * modified for async RPC by okir@monad.swb.de
10 #include <linux/time.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/fcntl.h>
14 #include <linux/stat.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/sunrpc/clnt.h>
19 #include <linux/nfs_fs.h>
20 #include <linux/nfs_page.h>
21 #include <linux/module.h>
29 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
31 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops
;
32 static const struct nfs_rw_ops nfs_rw_read_ops
;
34 static struct kmem_cache
*nfs_rdata_cachep
;
36 static struct nfs_pgio_header
*nfs_readhdr_alloc(void)
38 return kmem_cache_zalloc(nfs_rdata_cachep
, GFP_KERNEL
);
41 static void nfs_readhdr_free(struct nfs_pgio_header
*rhdr
)
43 kmem_cache_free(nfs_rdata_cachep
, rhdr
);
47 int nfs_return_empty_page(struct page
*page
)
49 zero_user(page
, 0, PAGE_CACHE_SIZE
);
50 SetPageUptodate(page
);
55 void nfs_pageio_init_read(struct nfs_pageio_descriptor
*pgio
,
56 struct inode
*inode
, bool force_mds
,
57 const struct nfs_pgio_completion_ops
*compl_ops
)
59 struct nfs_server
*server
= NFS_SERVER(inode
);
60 const struct nfs_pageio_ops
*pg_ops
= &nfs_pgio_rw_ops
;
62 #ifdef CONFIG_NFS_V4_1
63 if (server
->pnfs_curr_ld
&& !force_mds
)
64 pg_ops
= server
->pnfs_curr_ld
->pg_read_ops
;
66 nfs_pageio_init(pgio
, inode
, pg_ops
, compl_ops
, &nfs_rw_read_ops
,
69 EXPORT_SYMBOL_GPL(nfs_pageio_init_read
);
71 void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor
*pgio
)
73 pgio
->pg_ops
= &nfs_pgio_rw_ops
;
74 pgio
->pg_bsize
= NFS_SERVER(pgio
->pg_inode
)->rsize
;
76 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds
);
78 int nfs_readpage_async(struct nfs_open_context
*ctx
, struct inode
*inode
,
83 struct nfs_pageio_descriptor pgio
;
85 len
= nfs_page_length(page
);
87 return nfs_return_empty_page(page
);
88 new = nfs_create_request(ctx
, page
, NULL
, 0, len
);
93 if (len
< PAGE_CACHE_SIZE
)
94 zero_user_segment(page
, len
, PAGE_CACHE_SIZE
);
96 nfs_pageio_init_read(&pgio
, inode
, false,
97 &nfs_async_read_completion_ops
);
98 nfs_pageio_add_request(&pgio
, new);
99 nfs_pageio_complete(&pgio
);
100 NFS_I(inode
)->read_io
+= pgio
.pg_bytes_written
;
104 static void nfs_readpage_release(struct nfs_page
*req
)
106 struct inode
*d_inode
= req
->wb_context
->dentry
->d_inode
;
108 dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode
->i_sb
->s_id
,
109 (unsigned long long)NFS_FILEID(d_inode
), req
->wb_bytes
,
110 (long long)req_offset(req
));
112 if (nfs_page_group_sync_on_bit(req
, PG_UNLOCKPAGE
)) {
113 if (PageUptodate(req
->wb_page
))
114 nfs_readpage_to_fscache(d_inode
, req
->wb_page
, 0);
116 unlock_page(req
->wb_page
);
118 nfs_release_request(req
);
121 static void nfs_page_group_set_uptodate(struct nfs_page
*req
)
123 if (nfs_page_group_sync_on_bit(req
, PG_UPTODATE
))
124 SetPageUptodate(req
->wb_page
);
127 static void nfs_read_completion(struct nfs_pgio_header
*hdr
)
129 unsigned long bytes
= 0;
131 if (test_bit(NFS_IOHDR_REDO
, &hdr
->flags
))
133 while (!list_empty(&hdr
->pages
)) {
134 struct nfs_page
*req
= nfs_list_entry(hdr
->pages
.next
);
135 struct page
*page
= req
->wb_page
;
136 unsigned long start
= req
->wb_pgbase
;
137 unsigned long end
= req
->wb_pgbase
+ req
->wb_bytes
;
139 if (test_bit(NFS_IOHDR_EOF
, &hdr
->flags
)) {
140 /* note: regions of the page not covered by a
141 * request are zeroed in nfs_readpage_async /
142 * readpage_async_filler */
143 if (bytes
> hdr
->good_bytes
) {
144 /* nothing in this request was good, so zero
145 * the full extent of the request */
146 zero_user_segment(page
, start
, end
);
148 } else if (hdr
->good_bytes
- bytes
< req
->wb_bytes
) {
149 /* part of this request has good bytes, but
150 * not all. zero the bad bytes */
151 start
+= hdr
->good_bytes
- bytes
;
152 WARN_ON(start
< req
->wb_pgbase
);
153 zero_user_segment(page
, start
, end
);
156 bytes
+= req
->wb_bytes
;
157 if (test_bit(NFS_IOHDR_ERROR
, &hdr
->flags
)) {
158 if (bytes
<= hdr
->good_bytes
)
159 nfs_page_group_set_uptodate(req
);
161 nfs_page_group_set_uptodate(req
);
162 nfs_list_remove_request(req
);
163 nfs_readpage_release(req
);
169 static void nfs_initiate_read(struct nfs_pgio_header
*hdr
,
170 struct rpc_message
*msg
,
171 struct rpc_task_setup
*task_setup_data
, int how
)
173 struct inode
*inode
= hdr
->inode
;
174 int swap_flags
= IS_SWAPFILE(inode
) ? NFS_RPC_SWAPFLAGS
: 0;
176 task_setup_data
->flags
|= swap_flags
;
177 NFS_PROTO(inode
)->read_setup(hdr
, msg
);
181 nfs_async_read_error(struct list_head
*head
)
183 struct nfs_page
*req
;
185 while (!list_empty(head
)) {
186 req
= nfs_list_entry(head
->next
);
187 nfs_list_remove_request(req
);
188 nfs_readpage_release(req
);
192 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops
= {
193 .error_cleanup
= nfs_async_read_error
,
194 .completion
= nfs_read_completion
,
198 * This is the callback from RPC telling us whether a reply was
199 * received or some error occurred (timeout or socket shutdown).
201 static int nfs_readpage_done(struct rpc_task
*task
,
202 struct nfs_pgio_header
*hdr
,
205 int status
= NFS_PROTO(inode
)->read_done(task
, hdr
);
209 nfs_add_stats(inode
, NFSIOS_SERVERREADBYTES
, hdr
->res
.count
);
211 if (task
->tk_status
== -ESTALE
) {
212 set_bit(NFS_INO_STALE
, &NFS_I(inode
)->flags
);
213 nfs_mark_for_revalidate(inode
);
218 static void nfs_readpage_retry(struct rpc_task
*task
,
219 struct nfs_pgio_header
*hdr
)
221 struct nfs_pgio_args
*argp
= &hdr
->args
;
222 struct nfs_pgio_res
*resp
= &hdr
->res
;
224 /* This is a short read! */
225 nfs_inc_stats(hdr
->inode
, NFSIOS_SHORTREAD
);
226 /* Has the server at least made some progress? */
227 if (resp
->count
== 0) {
228 nfs_set_pgio_error(hdr
, -EIO
, argp
->offset
);
231 /* Yes, so retry the read at the end of the hdr */
232 hdr
->mds_offset
+= resp
->count
;
233 argp
->offset
+= resp
->count
;
234 argp
->pgbase
+= resp
->count
;
235 argp
->count
-= resp
->count
;
236 rpc_restart_call_prepare(task
);
239 static void nfs_readpage_result(struct rpc_task
*task
,
240 struct nfs_pgio_header
*hdr
)
245 bound
= hdr
->args
.offset
+ hdr
->res
.count
;
246 spin_lock(&hdr
->lock
);
247 if (bound
< hdr
->io_start
+ hdr
->good_bytes
) {
248 set_bit(NFS_IOHDR_EOF
, &hdr
->flags
);
249 clear_bit(NFS_IOHDR_ERROR
, &hdr
->flags
);
250 hdr
->good_bytes
= bound
- hdr
->io_start
;
252 spin_unlock(&hdr
->lock
);
253 } else if (hdr
->res
.count
!= hdr
->args
.count
)
254 nfs_readpage_retry(task
, hdr
);
258 * Read a page over NFS.
259 * We read the page synchronously in the following case:
260 * - The error flag is set for this page. This happens only when a
261 * previous async read operation failed.
263 int nfs_readpage(struct file
*file
, struct page
*page
)
265 struct nfs_open_context
*ctx
;
266 struct inode
*inode
= page_file_mapping(page
)->host
;
269 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
270 page
, PAGE_CACHE_SIZE
, page_file_index(page
));
271 nfs_inc_stats(inode
, NFSIOS_VFSREADPAGE
);
272 nfs_add_stats(inode
, NFSIOS_READPAGES
, 1);
275 * Try to flush any pending writes to the file..
277 * NOTE! Because we own the page lock, there cannot
278 * be any new pending writes generated at this point
279 * for this page (other pages can be written to).
281 error
= nfs_wb_page(inode
, page
);
284 if (PageUptodate(page
))
288 if (NFS_STALE(inode
))
293 ctx
= nfs_find_open_context(inode
, NULL
, FMODE_READ
);
297 ctx
= get_nfs_open_context(nfs_file_open_context(file
));
299 if (!IS_SYNC(inode
)) {
300 error
= nfs_readpage_from_fscache(ctx
, inode
, page
);
305 error
= nfs_readpage_async(ctx
, inode
, page
);
308 put_nfs_open_context(ctx
);
315 struct nfs_readdesc
{
316 struct nfs_pageio_descriptor
*pgio
;
317 struct nfs_open_context
*ctx
;
321 readpage_async_filler(void *data
, struct page
*page
)
323 struct nfs_readdesc
*desc
= (struct nfs_readdesc
*)data
;
324 struct nfs_page
*new;
328 len
= nfs_page_length(page
);
330 return nfs_return_empty_page(page
);
332 new = nfs_create_request(desc
->ctx
, page
, NULL
, 0, len
);
336 if (len
< PAGE_CACHE_SIZE
)
337 zero_user_segment(page
, len
, PAGE_CACHE_SIZE
);
338 if (!nfs_pageio_add_request(desc
->pgio
, new)) {
339 error
= desc
->pgio
->pg_error
;
344 error
= PTR_ERR(new);
350 int nfs_readpages(struct file
*filp
, struct address_space
*mapping
,
351 struct list_head
*pages
, unsigned nr_pages
)
353 struct nfs_pageio_descriptor pgio
;
354 struct nfs_readdesc desc
= {
357 struct inode
*inode
= mapping
->host
;
358 unsigned long npages
;
361 dprintk("NFS: nfs_readpages (%s/%Lu %d)\n",
363 (unsigned long long)NFS_FILEID(inode
),
365 nfs_inc_stats(inode
, NFSIOS_VFSREADPAGES
);
367 if (NFS_STALE(inode
))
371 desc
.ctx
= nfs_find_open_context(inode
, NULL
, FMODE_READ
);
372 if (desc
.ctx
== NULL
)
375 desc
.ctx
= get_nfs_open_context(nfs_file_open_context(filp
));
377 /* attempt to read as many of the pages as possible from the cache
378 * - this returns -ENOBUFS immediately if the cookie is negative
380 ret
= nfs_readpages_from_fscache(desc
.ctx
, inode
, mapping
,
383 goto read_complete
; /* all pages were read */
385 nfs_pageio_init_read(&pgio
, inode
, false,
386 &nfs_async_read_completion_ops
);
388 ret
= read_cache_pages(mapping
, pages
, readpage_async_filler
, &desc
);
390 nfs_pageio_complete(&pgio
);
391 NFS_I(inode
)->read_io
+= pgio
.pg_bytes_written
;
392 npages
= (pgio
.pg_bytes_written
+ PAGE_CACHE_SIZE
- 1) >> PAGE_CACHE_SHIFT
;
393 nfs_add_stats(inode
, NFSIOS_READPAGES
, npages
);
395 put_nfs_open_context(desc
.ctx
);
400 int __init
nfs_init_readpagecache(void)
402 nfs_rdata_cachep
= kmem_cache_create("nfs_read_data",
403 sizeof(struct nfs_pgio_header
),
404 0, SLAB_HWCACHE_ALIGN
,
406 if (nfs_rdata_cachep
== NULL
)
412 void nfs_destroy_readpagecache(void)
414 kmem_cache_destroy(nfs_rdata_cachep
);
417 static const struct nfs_rw_ops nfs_rw_read_ops
= {
418 .rw_mode
= FMODE_READ
,
419 .rw_alloc_header
= nfs_readhdr_alloc
,
420 .rw_free_header
= nfs_readhdr_free
,
421 .rw_done
= nfs_readpage_done
,
422 .rw_result
= nfs_readpage_result
,
423 .rw_initiate
= nfs_initiate_read
,