1 /* Storage object read/write
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
12 #include <linux/mount.h>
13 #include <linux/slab.h>
14 #include <linux/file.h>
15 #include <linux/swap.h>
19 * detect wake up events generated by the unlocking of pages in which we're
21 * - we use this to detect read completion of backing pages
22 * - the caller holds the waitqueue lock
24 static int cachefiles_read_waiter(wait_queue_entry_t
*wait
, unsigned mode
,
27 struct cachefiles_one_read
*monitor
=
28 container_of(wait
, struct cachefiles_one_read
, monitor
);
29 struct cachefiles_object
*object
;
30 struct wait_bit_key
*key
= _key
;
31 struct page
*page
= wait
->private;
35 _enter("{%lu},%u,%d,{%p,%u}",
36 monitor
->netfs_page
->index
, mode
, sync
,
37 key
->flags
, key
->bit_nr
);
39 if (key
->flags
!= &page
->flags
||
40 key
->bit_nr
!= PG_locked
)
43 _debug("--- monitor %p %lx ---", page
, page
->flags
);
45 if (!PageUptodate(page
) && !PageError(page
)) {
46 /* unlocked, not uptodate and not erronous? */
47 _debug("page probably truncated");
50 /* remove from the waitqueue */
51 list_del(&wait
->entry
);
53 /* move onto the action list and queue for FS-Cache thread pool */
56 object
= container_of(monitor
->op
->op
.object
,
57 struct cachefiles_object
, fscache
);
59 spin_lock(&object
->work_lock
);
60 list_add_tail(&monitor
->op_link
, &monitor
->op
->to_do
);
61 spin_unlock(&object
->work_lock
);
63 fscache_enqueue_retrieval(monitor
->op
);
68 * handle a probably truncated page
69 * - check to see if the page is still relevant and reissue the read if
71 * - return -EIO on error, -ENODATA if the page is gone, -EINPROGRESS if we
72 * must wait again and 0 if successful
74 static int cachefiles_read_reissue(struct cachefiles_object
*object
,
75 struct cachefiles_one_read
*monitor
)
77 struct address_space
*bmapping
= d_backing_inode(object
->backer
)->i_mapping
;
78 struct page
*backpage
= monitor
->back_page
, *backpage2
;
81 _enter("{ino=%lx},{%lx,%lx}",
82 d_backing_inode(object
->backer
)->i_ino
,
83 backpage
->index
, backpage
->flags
);
85 /* skip if the page was truncated away completely */
86 if (backpage
->mapping
!= bmapping
) {
87 _leave(" = -ENODATA [mapping]");
91 backpage2
= find_get_page(bmapping
, backpage
->index
);
93 _leave(" = -ENODATA [gone]");
97 if (backpage
!= backpage2
) {
99 _leave(" = -ENODATA [different]");
103 /* the page is still there and we already have a ref on it, so we don't
107 INIT_LIST_HEAD(&monitor
->op_link
);
108 add_page_wait_queue(backpage
, &monitor
->monitor
);
110 if (trylock_page(backpage
)) {
112 if (PageError(backpage
))
115 if (PageUptodate(backpage
))
118 _debug("reissue read");
119 ret
= bmapping
->a_ops
->readpage(NULL
, backpage
);
124 /* but the page may have been read before the monitor was installed, so
125 * the monitor may miss the event - so we have to ensure that we do get
126 * one in such a case */
127 if (trylock_page(backpage
)) {
128 _debug("jumpstart %p {%lx}", backpage
, backpage
->flags
);
129 unlock_page(backpage
);
132 /* it'll reappear on the todo list */
133 _leave(" = -EINPROGRESS");
137 unlock_page(backpage
);
138 spin_lock_irq(&object
->work_lock
);
139 list_del(&monitor
->op_link
);
140 spin_unlock_irq(&object
->work_lock
);
141 _leave(" = %d", ret
);
146 * copy data from backing pages to netfs pages to complete a read operation
147 * - driven by FS-Cache's thread pool
149 static void cachefiles_read_copier(struct fscache_operation
*_op
)
151 struct cachefiles_one_read
*monitor
;
152 struct cachefiles_object
*object
;
153 struct fscache_retrieval
*op
;
156 op
= container_of(_op
, struct fscache_retrieval
, op
);
157 object
= container_of(op
->op
.object
,
158 struct cachefiles_object
, fscache
);
160 _enter("{ino=%lu}", d_backing_inode(object
->backer
)->i_ino
);
163 spin_lock_irq(&object
->work_lock
);
165 while (!list_empty(&op
->to_do
)) {
166 monitor
= list_entry(op
->to_do
.next
,
167 struct cachefiles_one_read
, op_link
);
168 list_del(&monitor
->op_link
);
170 spin_unlock_irq(&object
->work_lock
);
172 _debug("- copy {%lu}", monitor
->back_page
->index
);
175 if (test_bit(FSCACHE_COOKIE_INVALIDATING
,
176 &object
->fscache
.cookie
->flags
)) {
178 } else if (PageUptodate(monitor
->back_page
)) {
179 copy_highpage(monitor
->netfs_page
, monitor
->back_page
);
180 fscache_mark_page_cached(monitor
->op
,
181 monitor
->netfs_page
);
183 } else if (!PageError(monitor
->back_page
)) {
184 /* the page has probably been truncated */
185 error
= cachefiles_read_reissue(object
, monitor
);
186 if (error
== -EINPROGRESS
)
190 cachefiles_io_error_obj(
192 "Readpage failed on backing file %lx",
193 (unsigned long) monitor
->back_page
->flags
);
197 put_page(monitor
->back_page
);
199 fscache_end_io(op
, monitor
->netfs_page
, error
);
200 put_page(monitor
->netfs_page
);
201 fscache_retrieval_complete(op
, 1);
202 fscache_put_retrieval(op
);
206 /* let the thread pool have some air occasionally */
208 if (max
< 0 || need_resched()) {
209 if (!list_empty(&op
->to_do
))
210 fscache_enqueue_retrieval(op
);
211 _leave(" [maxed out]");
215 spin_lock_irq(&object
->work_lock
);
218 spin_unlock_irq(&object
->work_lock
);
223 * read the corresponding page to the given set from the backing file
224 * - an uncertain page is simply discarded, to be tried again another time
226 static int cachefiles_read_backing_file_one(struct cachefiles_object
*object
,
227 struct fscache_retrieval
*op
,
228 struct page
*netpage
)
230 struct cachefiles_one_read
*monitor
;
231 struct address_space
*bmapping
;
232 struct page
*newpage
, *backpage
;
237 _debug("read back %p{%lu,%d}",
238 netpage
, netpage
->index
, page_count(netpage
));
240 monitor
= kzalloc(sizeof(*monitor
), cachefiles_gfp
);
244 monitor
->netfs_page
= netpage
;
245 monitor
->op
= fscache_get_retrieval(op
);
247 init_waitqueue_func_entry(&monitor
->monitor
, cachefiles_read_waiter
);
249 /* attempt to get hold of the backing page */
250 bmapping
= d_backing_inode(object
->backer
)->i_mapping
;
254 backpage
= find_get_page(bmapping
, netpage
->index
);
256 goto backing_page_already_present
;
259 newpage
= __page_cache_alloc(cachefiles_gfp
);
264 ret
= add_to_page_cache_lru(newpage
, bmapping
,
265 netpage
->index
, cachefiles_gfp
);
267 goto installed_new_backing_page
;
272 /* we've installed a new backing page, so now we need to start
274 installed_new_backing_page
:
275 _debug("- new %p", newpage
);
281 ret
= bmapping
->a_ops
->readpage(NULL
, backpage
);
285 /* set the monitor to transfer the data across */
286 monitor_backing_page
:
287 _debug("- monitor add");
289 /* install the monitor */
290 get_page(monitor
->netfs_page
);
292 monitor
->back_page
= backpage
;
293 monitor
->monitor
.private = backpage
;
294 add_page_wait_queue(backpage
, &monitor
->monitor
);
297 /* but the page may have been read before the monitor was installed, so
298 * the monitor may miss the event - so we have to ensure that we do get
299 * one in such a case */
300 if (trylock_page(backpage
)) {
301 _debug("jumpstart %p {%lx}", backpage
, backpage
->flags
);
302 unlock_page(backpage
);
306 /* if the backing page is already present, it can be in one of
307 * three states: read in progress, read failed or read okay */
308 backing_page_already_present
:
316 if (PageError(backpage
))
319 if (PageUptodate(backpage
))
320 goto backing_page_already_uptodate
;
322 if (!trylock_page(backpage
))
323 goto monitor_backing_page
;
324 _debug("read %p {%lx}", backpage
, backpage
->flags
);
325 goto read_backing_page
;
327 /* the backing page is already up to date, attach the netfs
328 * page to the pagecache and LRU and copy the data across */
329 backing_page_already_uptodate
:
330 _debug("- uptodate");
332 fscache_mark_page_cached(op
, netpage
);
334 copy_highpage(netpage
, backpage
);
335 fscache_end_io(op
, netpage
, 0);
336 fscache_retrieval_complete(op
, 1);
346 fscache_put_retrieval(monitor
->op
);
349 _leave(" = %d", ret
);
353 _debug("read error %d", ret
);
354 if (ret
== -ENOMEM
) {
355 fscache_retrieval_complete(op
, 1);
359 cachefiles_io_error_obj(object
, "Page read error on backing file");
360 fscache_retrieval_complete(op
, 1);
367 fscache_put_retrieval(monitor
->op
);
370 fscache_retrieval_complete(op
, 1);
371 _leave(" = -ENOMEM");
376 * read a page from the cache or allocate a block in which to store it
377 * - cache withdrawal is prevented by the caller
378 * - returns -EINTR if interrupted
379 * - returns -ENOMEM if ran out of memory
380 * - returns -ENOBUFS if no buffers can be made available
381 * - returns -ENOBUFS if page is beyond EOF
382 * - if the page is backed by a block in the cache:
383 * - a read will be started which will call the callback on completion
384 * - 0 will be returned
385 * - else if the page is unbacked:
386 * - the metadata will be retained
387 * - -ENODATA will be returned
389 int cachefiles_read_or_alloc_page(struct fscache_retrieval
*op
,
393 struct cachefiles_object
*object
;
394 struct cachefiles_cache
*cache
;
396 sector_t block0
, block
;
400 object
= container_of(op
->op
.object
,
401 struct cachefiles_object
, fscache
);
402 cache
= container_of(object
->fscache
.cache
,
403 struct cachefiles_cache
, cache
);
405 _enter("{%p},{%lx},,,", object
, page
->index
);
410 inode
= d_backing_inode(object
->backer
);
411 ASSERT(S_ISREG(inode
->i_mode
));
412 ASSERT(inode
->i_mapping
->a_ops
->bmap
);
413 ASSERT(inode
->i_mapping
->a_ops
->readpages
);
415 /* calculate the shift required to use bmap */
416 shift
= PAGE_SHIFT
- inode
->i_sb
->s_blocksize_bits
;
418 op
->op
.flags
&= FSCACHE_OP_KEEP_FLAGS
;
419 op
->op
.flags
|= FSCACHE_OP_ASYNC
;
420 op
->op
.processor
= cachefiles_read_copier
;
422 /* we assume the absence or presence of the first block is a good
423 * enough indication for the page as a whole
424 * - TODO: don't use bmap() for this as it is _not_ actually good
425 * enough for this as it doesn't indicate errors, but it's all we've
428 block0
= page
->index
;
431 block
= inode
->i_mapping
->a_ops
->bmap(inode
->i_mapping
, block0
);
432 _debug("%llx -> %llx",
433 (unsigned long long) block0
,
434 (unsigned long long) block
);
437 /* submit the apparently valid page to the backing fs to be
439 ret
= cachefiles_read_backing_file_one(object
, op
, page
);
440 } else if (cachefiles_has_space(cache
, 0, 1) == 0) {
441 /* there's space in the cache we can use */
442 fscache_mark_page_cached(op
, page
);
443 fscache_retrieval_complete(op
, 1);
449 _leave(" = %d", ret
);
453 fscache_retrieval_complete(op
, 1);
454 _leave(" = -ENOBUFS");
459 * read the corresponding pages to the given set from the backing file
460 * - any uncertain pages are simply discarded, to be tried again another time
462 static int cachefiles_read_backing_file(struct cachefiles_object
*object
,
463 struct fscache_retrieval
*op
,
464 struct list_head
*list
)
466 struct cachefiles_one_read
*monitor
= NULL
;
467 struct address_space
*bmapping
= d_backing_inode(object
->backer
)->i_mapping
;
468 struct page
*newpage
= NULL
, *netpage
, *_n
, *backpage
= NULL
;
473 list_for_each_entry_safe(netpage
, _n
, list
, lru
) {
474 list_del(&netpage
->lru
);
476 _debug("read back %p{%lu,%d}",
477 netpage
, netpage
->index
, page_count(netpage
));
480 monitor
= kzalloc(sizeof(*monitor
), cachefiles_gfp
);
484 monitor
->op
= fscache_get_retrieval(op
);
485 init_waitqueue_func_entry(&monitor
->monitor
,
486 cachefiles_read_waiter
);
490 backpage
= find_get_page(bmapping
, netpage
->index
);
492 goto backing_page_already_present
;
495 newpage
= __page_cache_alloc(cachefiles_gfp
);
500 ret
= add_to_page_cache_lru(newpage
, bmapping
,
504 goto installed_new_backing_page
;
509 /* we've installed a new backing page, so now we need
510 * to start it reading */
511 installed_new_backing_page
:
512 _debug("- new %p", newpage
);
518 ret
= bmapping
->a_ops
->readpage(NULL
, backpage
);
522 /* add the netfs page to the pagecache and LRU, and set the
523 * monitor to transfer the data across */
524 monitor_backing_page
:
525 _debug("- monitor add");
527 ret
= add_to_page_cache_lru(netpage
, op
->mapping
,
528 netpage
->index
, cachefiles_gfp
);
530 if (ret
== -EEXIST
) {
532 fscache_retrieval_complete(op
, 1);
538 /* install a monitor */
540 monitor
->netfs_page
= netpage
;
543 monitor
->back_page
= backpage
;
544 monitor
->monitor
.private = backpage
;
545 add_page_wait_queue(backpage
, &monitor
->monitor
);
548 /* but the page may have been read before the monitor was
549 * installed, so the monitor may miss the event - so we have to
550 * ensure that we do get one in such a case */
551 if (trylock_page(backpage
)) {
552 _debug("2unlock %p {%lx}", backpage
, backpage
->flags
);
553 unlock_page(backpage
);
563 /* if the backing page is already present, it can be in one of
564 * three states: read in progress, read failed or read okay */
565 backing_page_already_present
:
566 _debug("- present %p", backpage
);
568 if (PageError(backpage
))
571 if (PageUptodate(backpage
))
572 goto backing_page_already_uptodate
;
574 _debug("- not ready %p{%lx}", backpage
, backpage
->flags
);
576 if (!trylock_page(backpage
))
577 goto monitor_backing_page
;
579 if (PageError(backpage
)) {
580 _debug("error %lx", backpage
->flags
);
581 unlock_page(backpage
);
585 if (PageUptodate(backpage
))
586 goto backing_page_already_uptodate_unlock
;
588 /* we've locked a page that's neither up to date nor erroneous,
589 * so we need to attempt to read it again */
590 goto reread_backing_page
;
592 /* the backing page is already up to date, attach the netfs
593 * page to the pagecache and LRU and copy the data across */
594 backing_page_already_uptodate_unlock
:
595 _debug("uptodate %lx", backpage
->flags
);
596 unlock_page(backpage
);
597 backing_page_already_uptodate
:
598 _debug("- uptodate");
600 ret
= add_to_page_cache_lru(netpage
, op
->mapping
,
601 netpage
->index
, cachefiles_gfp
);
603 if (ret
== -EEXIST
) {
605 fscache_retrieval_complete(op
, 1);
611 copy_highpage(netpage
, backpage
);
616 fscache_mark_page_cached(op
, netpage
);
618 /* the netpage is unlocked and marked up to date here */
619 fscache_end_io(op
, netpage
, 0);
622 fscache_retrieval_complete(op
, 1);
639 fscache_put_retrieval(op
);
643 list_for_each_entry_safe(netpage
, _n
, list
, lru
) {
644 list_del(&netpage
->lru
);
646 fscache_retrieval_complete(op
, 1);
649 _leave(" = %d", ret
);
655 goto record_page_complete
;
658 _debug("read error %d", ret
);
660 goto record_page_complete
;
662 cachefiles_io_error_obj(object
, "Page read error on backing file");
664 record_page_complete
:
665 fscache_retrieval_complete(op
, 1);
670 * read a list of pages from the cache or allocate blocks in which to store
673 int cachefiles_read_or_alloc_pages(struct fscache_retrieval
*op
,
674 struct list_head
*pages
,
678 struct cachefiles_object
*object
;
679 struct cachefiles_cache
*cache
;
680 struct list_head backpages
;
681 struct pagevec pagevec
;
683 struct page
*page
, *_n
;
684 unsigned shift
, nrbackpages
;
685 int ret
, ret2
, space
;
687 object
= container_of(op
->op
.object
,
688 struct cachefiles_object
, fscache
);
689 cache
= container_of(object
->fscache
.cache
,
690 struct cachefiles_cache
, cache
);
692 _enter("{OBJ%x,%d},,%d,,",
693 object
->fscache
.debug_id
, atomic_read(&op
->op
.usage
),
700 if (cachefiles_has_space(cache
, 0, *nr_pages
) < 0)
703 inode
= d_backing_inode(object
->backer
);
704 ASSERT(S_ISREG(inode
->i_mode
));
705 ASSERT(inode
->i_mapping
->a_ops
->bmap
);
706 ASSERT(inode
->i_mapping
->a_ops
->readpages
);
708 /* calculate the shift required to use bmap */
709 shift
= PAGE_SHIFT
- inode
->i_sb
->s_blocksize_bits
;
711 pagevec_init(&pagevec
);
713 op
->op
.flags
&= FSCACHE_OP_KEEP_FLAGS
;
714 op
->op
.flags
|= FSCACHE_OP_ASYNC
;
715 op
->op
.processor
= cachefiles_read_copier
;
717 INIT_LIST_HEAD(&backpages
);
720 ret
= space
? -ENODATA
: -ENOBUFS
;
721 list_for_each_entry_safe(page
, _n
, pages
, lru
) {
722 sector_t block0
, block
;
724 /* we assume the absence or presence of the first block is a
725 * good enough indication for the page as a whole
726 * - TODO: don't use bmap() for this as it is _not_ actually
727 * good enough for this as it doesn't indicate errors, but
728 * it's all we've got for the moment
730 block0
= page
->index
;
733 block
= inode
->i_mapping
->a_ops
->bmap(inode
->i_mapping
,
735 _debug("%llx -> %llx",
736 (unsigned long long) block0
,
737 (unsigned long long) block
);
740 /* we have data - add it to the list to give to the
742 list_move(&page
->lru
, &backpages
);
745 } else if (space
&& pagevec_add(&pagevec
, page
) == 0) {
746 fscache_mark_pages_cached(op
, &pagevec
);
747 fscache_retrieval_complete(op
, 1);
750 fscache_retrieval_complete(op
, 1);
754 if (pagevec_count(&pagevec
) > 0)
755 fscache_mark_pages_cached(op
, &pagevec
);
757 if (list_empty(pages
))
760 /* submit the apparently valid pages to the backing fs to be read from
762 if (nrbackpages
> 0) {
763 ret2
= cachefiles_read_backing_file(object
, op
, &backpages
);
764 if (ret2
== -ENOMEM
|| ret2
== -EINTR
)
768 _leave(" = %d [nr=%u%s]",
769 ret
, *nr_pages
, list_empty(pages
) ? " empty" : "");
773 fscache_retrieval_complete(op
, *nr_pages
);
778 * allocate a block in the cache in which to store a page
779 * - cache withdrawal is prevented by the caller
780 * - returns -EINTR if interrupted
781 * - returns -ENOMEM if ran out of memory
782 * - returns -ENOBUFS if no buffers can be made available
783 * - returns -ENOBUFS if page is beyond EOF
785 * - the metadata will be retained
786 * - 0 will be returned
788 int cachefiles_allocate_page(struct fscache_retrieval
*op
,
792 struct cachefiles_object
*object
;
793 struct cachefiles_cache
*cache
;
796 object
= container_of(op
->op
.object
,
797 struct cachefiles_object
, fscache
);
798 cache
= container_of(object
->fscache
.cache
,
799 struct cachefiles_cache
, cache
);
801 _enter("%p,{%lx},", object
, page
->index
);
803 ret
= cachefiles_has_space(cache
, 0, 1);
805 fscache_mark_page_cached(op
, page
);
809 fscache_retrieval_complete(op
, 1);
810 _leave(" = %d", ret
);
815 * allocate blocks in the cache in which to store a set of pages
816 * - cache withdrawal is prevented by the caller
817 * - returns -EINTR if interrupted
818 * - returns -ENOMEM if ran out of memory
819 * - returns -ENOBUFS if some buffers couldn't be made available
820 * - returns -ENOBUFS if some pages are beyond EOF
822 * - -ENODATA will be returned
823 * - metadata will be retained for any page marked
825 int cachefiles_allocate_pages(struct fscache_retrieval
*op
,
826 struct list_head
*pages
,
830 struct cachefiles_object
*object
;
831 struct cachefiles_cache
*cache
;
832 struct pagevec pagevec
;
836 object
= container_of(op
->op
.object
,
837 struct cachefiles_object
, fscache
);
838 cache
= container_of(object
->fscache
.cache
,
839 struct cachefiles_cache
, cache
);
841 _enter("%p,,,%d,", object
, *nr_pages
);
843 ret
= cachefiles_has_space(cache
, 0, *nr_pages
);
845 pagevec_init(&pagevec
);
847 list_for_each_entry(page
, pages
, lru
) {
848 if (pagevec_add(&pagevec
, page
) == 0)
849 fscache_mark_pages_cached(op
, &pagevec
);
852 if (pagevec_count(&pagevec
) > 0)
853 fscache_mark_pages_cached(op
, &pagevec
);
859 fscache_retrieval_complete(op
, *nr_pages
);
860 _leave(" = %d", ret
);
865 * request a page be stored in the cache
866 * - cache withdrawal is prevented by the caller
867 * - this request may be ignored if there's no cache block available, in which
868 * case -ENOBUFS will be returned
869 * - if the op is in progress, 0 will be returned
871 int cachefiles_write_page(struct fscache_storage
*op
, struct page
*page
)
873 struct cachefiles_object
*object
;
874 struct cachefiles_cache
*cache
;
883 ASSERT(page
!= NULL
);
885 object
= container_of(op
->op
.object
,
886 struct cachefiles_object
, fscache
);
888 _enter("%p,%p{%lx},,,", object
, page
, page
->index
);
890 if (!object
->backer
) {
891 _leave(" = -ENOBUFS");
895 ASSERT(d_is_reg(object
->backer
));
897 cache
= container_of(object
->fscache
.cache
,
898 struct cachefiles_cache
, cache
);
900 pos
= (loff_t
)page
->index
<< PAGE_SHIFT
;
902 /* We mustn't write more data than we have, so we have to beware of a
903 * partial page at EOF.
905 eof
= object
->fscache
.store_limit_l
;
909 /* write the page to the backing filesystem and let it store it in its
911 path
.mnt
= cache
->mnt
;
912 path
.dentry
= object
->backer
;
913 file
= dentry_open(&path
, O_RDWR
| O_LARGEFILE
, cache
->cache_cred
);
920 if (eof
& ~PAGE_MASK
) {
921 if (eof
- pos
< PAGE_SIZE
) {
922 _debug("cut short %llx to %llx",
925 ASSERTCMP(pos
+ len
, ==, eof
);
930 ret
= __kernel_write(file
, data
, len
, &pos
);
943 cachefiles_io_error_obj(object
,
944 "Write page to backing file failed");
946 _leave(" = -ENOBUFS [%d]", ret
);
951 * detach a backing block from a page
952 * - cache withdrawal is prevented by the caller
954 void cachefiles_uncache_page(struct fscache_object
*_object
, struct page
*page
)
956 struct cachefiles_object
*object
;
957 struct cachefiles_cache
*cache
;
959 object
= container_of(_object
, struct cachefiles_object
, fscache
);
960 cache
= container_of(object
->fscache
.cache
,
961 struct cachefiles_cache
, cache
);
963 _enter("%p,{%lu}", object
, page
->index
);
965 spin_unlock(&object
->fscache
.cookie
->lock
);