1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* kiocb-using read/write
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
8 #include <linux/mount.h>
9 #include <linux/slab.h>
10 #include <linux/file.h>
11 #include <linux/uio.h>
12 #include <linux/bio.h>
13 #include <linux/falloc.h>
14 #include <linux/sched/mm.h>
15 #include <trace/events/fscache.h>
16 #include <trace/events/netfs.h>
19 struct cachefiles_kiocb
{
27 struct cachefiles_object
*object
;
28 netfs_io_terminated_t term_func
;
31 unsigned int inval_counter
; /* Copy of cookie->inval_counter */
35 static inline void cachefiles_put_kiocb(struct cachefiles_kiocb
*ki
)
37 if (refcount_dec_and_test(&ki
->ki_refcnt
)) {
38 cachefiles_put_object(ki
->object
, cachefiles_obj_put_ioreq
);
39 fput(ki
->iocb
.ki_filp
);
45 * Handle completion of a read from the cache.
47 static void cachefiles_read_complete(struct kiocb
*iocb
, long ret
)
49 struct cachefiles_kiocb
*ki
= container_of(iocb
, struct cachefiles_kiocb
, iocb
);
50 struct inode
*inode
= file_inode(ki
->iocb
.ki_filp
);
55 trace_cachefiles_io_error(ki
->object
, inode
, ret
,
56 cachefiles_trace_read_error
);
60 if (ki
->object
->cookie
->inval_counter
== ki
->inval_counter
)
66 ki
->term_func(ki
->term_func_priv
, ret
, ki
->was_async
);
69 cachefiles_put_kiocb(ki
);
73 * Initiate a read from the cache.
75 static int cachefiles_read(struct netfs_cache_resources
*cres
,
77 struct iov_iter
*iter
,
78 enum netfs_read_from_hole read_hole
,
79 netfs_io_terminated_t term_func
,
82 struct cachefiles_object
*object
;
83 struct cachefiles_kiocb
*ki
;
85 unsigned int old_nofs
;
86 ssize_t ret
= -ENOBUFS
;
87 size_t len
= iov_iter_count(iter
), skipped
= 0;
89 if (!fscache_wait_for_operation(cres
, FSCACHE_WANT_READ
))
90 goto presubmission_error
;
93 object
= cachefiles_cres_object(cres
);
94 file
= cachefiles_cres_file(cres
);
96 _enter("%pD,%li,%llx,%zx/%llx",
97 file
, file_inode(file
)->i_ino
, start_pos
, len
,
98 i_size_read(file_inode(file
)));
100 /* If the caller asked us to seek for data before doing the read, then
101 * we should do that now. If we find a gap, we fill it with zeros.
103 if (read_hole
!= NETFS_READ_HOLE_IGNORE
) {
104 loff_t off
= start_pos
, off2
;
106 off2
= cachefiles_inject_read_error();
108 off2
= vfs_llseek(file
, off
, SEEK_DATA
);
109 if (off2
< 0 && off2
>= (loff_t
)-MAX_ERRNO
&& off2
!= -ENXIO
) {
112 goto presubmission_error
;
115 if (off2
== -ENXIO
|| off2
>= start_pos
+ len
) {
116 /* The region is beyond the EOF or there's no more data
117 * in the region, so clear the rest of the buffer and
121 if (read_hole
== NETFS_READ_HOLE_FAIL
)
122 goto presubmission_error
;
124 iov_iter_zero(len
, iter
);
127 goto presubmission_error
;
130 skipped
= off2
- off
;
131 iov_iter_zero(skipped
, iter
);
135 ki
= kzalloc(sizeof(struct cachefiles_kiocb
), GFP_KERNEL
);
137 goto presubmission_error
;
139 refcount_set(&ki
->ki_refcnt
, 2);
140 ki
->iocb
.ki_filp
= file
;
141 ki
->iocb
.ki_pos
= start_pos
+ skipped
;
142 ki
->iocb
.ki_flags
= IOCB_DIRECT
;
143 ki
->iocb
.ki_ioprio
= get_current_ioprio();
144 ki
->skipped
= skipped
;
146 ki
->inval_counter
= cres
->inval_counter
;
147 ki
->term_func
= term_func
;
148 ki
->term_func_priv
= term_func_priv
;
149 ki
->was_async
= true;
152 ki
->iocb
.ki_complete
= cachefiles_read_complete
;
154 get_file(ki
->iocb
.ki_filp
);
155 cachefiles_grab_object(object
, cachefiles_obj_get_ioreq
);
157 trace_cachefiles_read(object
, file_inode(file
), ki
->iocb
.ki_pos
, len
- skipped
);
158 old_nofs
= memalloc_nofs_save();
159 ret
= cachefiles_inject_read_error();
161 ret
= vfs_iocb_iter_read(file
, &ki
->iocb
, iter
);
162 memalloc_nofs_restore(old_nofs
);
168 case -ERESTARTNOINTR
:
169 case -ERESTARTNOHAND
:
170 case -ERESTART_RESTARTBLOCK
:
171 /* There's no easy way to restart the syscall since other AIO's
172 * may be already running. Just fail this IO with EINTR.
177 ki
->was_async
= false;
178 cachefiles_read_complete(&ki
->iocb
, ret
);
185 cachefiles_put_kiocb(ki
);
186 _leave(" = %zd", ret
);
191 term_func(term_func_priv
, ret
< 0 ? ret
: skipped
, false);
196 * Query the occupancy of the cache in a region, returning where the next chunk
197 * of data starts and how long it is.
199 static int cachefiles_query_occupancy(struct netfs_cache_resources
*cres
,
200 loff_t start
, size_t len
, size_t granularity
,
201 loff_t
*_data_start
, size_t *_data_len
)
203 struct cachefiles_object
*object
;
210 if (!fscache_wait_for_operation(cres
, FSCACHE_WANT_READ
))
213 object
= cachefiles_cres_object(cres
);
214 file
= cachefiles_cres_file(cres
);
215 granularity
= max_t(size_t, object
->volume
->cache
->bsize
, granularity
);
217 _enter("%pD,%li,%llx,%zx/%llx",
218 file
, file_inode(file
)->i_ino
, start
, len
,
219 i_size_read(file_inode(file
)));
221 off
= cachefiles_inject_read_error();
223 off
= vfs_llseek(file
, start
, SEEK_DATA
);
225 return -ENODATA
; /* Beyond EOF */
226 if (off
< 0 && off
>= (loff_t
)-MAX_ERRNO
)
227 return -ENOBUFS
; /* Error. */
228 if (round_up(off
, granularity
) >= start
+ len
)
229 return -ENODATA
; /* No data in range */
231 off2
= cachefiles_inject_read_error();
233 off2
= vfs_llseek(file
, off
, SEEK_HOLE
);
235 return -ENODATA
; /* Beyond EOF */
236 if (off2
< 0 && off2
>= (loff_t
)-MAX_ERRNO
)
237 return -ENOBUFS
; /* Error. */
239 /* Round away partial blocks */
240 off
= round_up(off
, granularity
);
241 off2
= round_down(off2
, granularity
);
246 if (off2
> start
+ len
)
249 *_data_len
= off2
- off
;
254 * Handle completion of a write to the cache.
256 static void cachefiles_write_complete(struct kiocb
*iocb
, long ret
)
258 struct cachefiles_kiocb
*ki
= container_of(iocb
, struct cachefiles_kiocb
, iocb
);
259 struct cachefiles_object
*object
= ki
->object
;
260 struct inode
*inode
= file_inode(ki
->iocb
.ki_filp
);
265 kiocb_end_write(iocb
);
268 trace_cachefiles_io_error(object
, inode
, ret
,
269 cachefiles_trace_write_error
);
271 atomic_long_sub(ki
->b_writing
, &object
->volume
->cache
->b_writing
);
272 set_bit(FSCACHE_COOKIE_HAVE_DATA
, &object
->cookie
->flags
);
274 ki
->term_func(ki
->term_func_priv
, ret
, ki
->was_async
);
275 cachefiles_put_kiocb(ki
);
279 * Initiate a write to the cache.
281 int __cachefiles_write(struct cachefiles_object
*object
,
284 struct iov_iter
*iter
,
285 netfs_io_terminated_t term_func
,
286 void *term_func_priv
)
288 struct cachefiles_cache
*cache
;
289 struct cachefiles_kiocb
*ki
;
290 unsigned int old_nofs
;
292 size_t len
= iov_iter_count(iter
);
294 fscache_count_write();
295 cache
= object
->volume
->cache
;
297 _enter("%pD,%li,%llx,%zx/%llx",
298 file
, file_inode(file
)->i_ino
, start_pos
, len
,
299 i_size_read(file_inode(file
)));
301 ki
= kzalloc(sizeof(struct cachefiles_kiocb
), GFP_KERNEL
);
304 term_func(term_func_priv
, -ENOMEM
, false);
308 refcount_set(&ki
->ki_refcnt
, 2);
309 ki
->iocb
.ki_filp
= file
;
310 ki
->iocb
.ki_pos
= start_pos
;
311 ki
->iocb
.ki_flags
= IOCB_DIRECT
| IOCB_WRITE
;
312 ki
->iocb
.ki_ioprio
= get_current_ioprio();
314 ki
->start
= start_pos
;
316 ki
->term_func
= term_func
;
317 ki
->term_func_priv
= term_func_priv
;
318 ki
->was_async
= true;
319 ki
->b_writing
= (len
+ (1 << cache
->bshift
) - 1) >> cache
->bshift
;
322 ki
->iocb
.ki_complete
= cachefiles_write_complete
;
323 atomic_long_add(ki
->b_writing
, &cache
->b_writing
);
325 get_file(ki
->iocb
.ki_filp
);
326 cachefiles_grab_object(object
, cachefiles_obj_get_ioreq
);
328 trace_cachefiles_write(object
, file_inode(file
), ki
->iocb
.ki_pos
, len
);
329 old_nofs
= memalloc_nofs_save();
330 ret
= cachefiles_inject_write_error();
332 ret
= vfs_iocb_iter_write(file
, &ki
->iocb
, iter
);
333 memalloc_nofs_restore(old_nofs
);
339 case -ERESTARTNOINTR
:
340 case -ERESTARTNOHAND
:
341 case -ERESTART_RESTARTBLOCK
:
342 /* There's no easy way to restart the syscall since other AIO's
343 * may be already running. Just fail this IO with EINTR.
348 ki
->was_async
= false;
349 cachefiles_write_complete(&ki
->iocb
, ret
);
356 cachefiles_put_kiocb(ki
);
357 _leave(" = %zd", ret
);
361 static int cachefiles_write(struct netfs_cache_resources
*cres
,
363 struct iov_iter
*iter
,
364 netfs_io_terminated_t term_func
,
365 void *term_func_priv
)
367 if (!fscache_wait_for_operation(cres
, FSCACHE_WANT_WRITE
)) {
369 term_func(term_func_priv
, -ENOBUFS
, false);
370 trace_netfs_sreq(term_func_priv
, netfs_sreq_trace_cache_nowrite
);
374 return __cachefiles_write(cachefiles_cres_object(cres
),
375 cachefiles_cres_file(cres
),
377 term_func
, term_func_priv
);
380 static inline enum netfs_io_source
381 cachefiles_do_prepare_read(struct netfs_cache_resources
*cres
,
382 loff_t start
, size_t *_len
, loff_t i_size
,
383 unsigned long *_flags
, ino_t netfs_ino
)
385 enum cachefiles_prepare_read_trace why
;
386 struct cachefiles_object
*object
= NULL
;
387 struct cachefiles_cache
*cache
;
388 struct fscache_cookie
*cookie
= fscache_cres_cookie(cres
);
389 const struct cred
*saved_cred
;
390 struct file
*file
= cachefiles_cres_file(cres
);
391 enum netfs_io_source ret
= NETFS_DOWNLOAD_FROM_SERVER
;
394 ino_t ino
= file
? file_inode(file
)->i_ino
: 0;
397 _enter("%zx @%llx/%llx", len
, start
, i_size
);
399 if (start
>= i_size
) {
400 ret
= NETFS_FILL_WITH_ZEROES
;
401 why
= cachefiles_trace_read_after_eof
;
405 if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ
, &cookie
->flags
)) {
406 __set_bit(NETFS_SREQ_COPY_TO_CACHE
, _flags
);
407 why
= cachefiles_trace_read_no_data
;
408 if (!test_bit(NETFS_SREQ_ONDEMAND
, _flags
))
412 /* The object and the file may be being created in the background. */
414 why
= cachefiles_trace_read_no_file
;
415 if (!fscache_wait_for_operation(cres
, FSCACHE_WANT_READ
))
417 file
= cachefiles_cres_file(cres
);
420 ino
= file_inode(file
)->i_ino
;
423 object
= cachefiles_cres_object(cres
);
424 cache
= object
->volume
->cache
;
425 cachefiles_begin_secure(cache
, &saved_cred
);
427 off
= cachefiles_inject_read_error();
429 off
= vfs_llseek(file
, start
, SEEK_DATA
);
430 if (off
< 0 && off
>= (loff_t
)-MAX_ERRNO
) {
431 if (off
== (loff_t
)-ENXIO
) {
432 why
= cachefiles_trace_read_seek_nxio
;
433 goto download_and_store
;
435 trace_cachefiles_io_error(object
, file_inode(file
), off
,
436 cachefiles_trace_seek_error
);
437 why
= cachefiles_trace_read_seek_error
;
441 if (off
>= start
+ len
) {
442 why
= cachefiles_trace_read_found_hole
;
443 goto download_and_store
;
447 off
= round_up(off
, cache
->bsize
);
450 why
= cachefiles_trace_read_found_part
;
451 goto download_and_store
;
454 to
= cachefiles_inject_read_error();
456 to
= vfs_llseek(file
, start
, SEEK_HOLE
);
457 if (to
< 0 && to
>= (loff_t
)-MAX_ERRNO
) {
458 trace_cachefiles_io_error(object
, file_inode(file
), to
,
459 cachefiles_trace_seek_error
);
460 why
= cachefiles_trace_read_seek_error
;
464 if (to
< start
+ len
) {
465 if (start
+ len
>= i_size
)
466 to
= round_up(to
, cache
->bsize
);
468 to
= round_down(to
, cache
->bsize
);
473 why
= cachefiles_trace_read_have_data
;
474 ret
= NETFS_READ_FROM_CACHE
;
478 __set_bit(NETFS_SREQ_COPY_TO_CACHE
, _flags
);
479 if (test_bit(NETFS_SREQ_ONDEMAND
, _flags
)) {
480 rc
= cachefiles_ondemand_read(object
, start
, len
);
482 __clear_bit(NETFS_SREQ_ONDEMAND
, _flags
);
485 ret
= NETFS_INVALID_READ
;
488 cachefiles_end_secure(cache
, saved_cred
);
490 trace_cachefiles_prep_read(object
, start
, len
, *_flags
, ret
, why
, ino
, netfs_ino
);
495 * Prepare a read operation, shortening it to a cached/uncached
496 * boundary as appropriate.
498 static enum netfs_io_source
cachefiles_prepare_read(struct netfs_io_subrequest
*subreq
,
499 unsigned long long i_size
)
501 return cachefiles_do_prepare_read(&subreq
->rreq
->cache_resources
,
502 subreq
->start
, &subreq
->len
, i_size
,
503 &subreq
->flags
, subreq
->rreq
->inode
->i_ino
);
507 * Prepare an on-demand read operation, shortening it to a cached/uncached
508 * boundary as appropriate.
510 static enum netfs_io_source
511 cachefiles_prepare_ondemand_read(struct netfs_cache_resources
*cres
,
512 loff_t start
, size_t *_len
, loff_t i_size
,
513 unsigned long *_flags
, ino_t ino
)
515 return cachefiles_do_prepare_read(cres
, start
, _len
, i_size
, _flags
, ino
);
519 * Prepare for a write to occur.
521 int __cachefiles_prepare_write(struct cachefiles_object
*object
,
523 loff_t
*_start
, size_t *_len
, size_t upper_len
,
524 bool no_space_allocated_yet
)
526 struct cachefiles_cache
*cache
= object
->volume
->cache
;
527 loff_t start
= *_start
, pos
;
531 /* Round to DIO size */
532 start
= round_down(*_start
, PAGE_SIZE
);
533 if (start
!= *_start
|| *_len
> upper_len
) {
534 /* Probably asked to cache a streaming write written into the
535 * pagecache when the cookie was temporarily out of service to
538 fscache_count_dio_misfit();
542 *_len
= round_up(len
, PAGE_SIZE
);
544 /* We need to work out whether there's sufficient disk space to perform
545 * the write - but we can skip that check if we have space already
548 if (no_space_allocated_yet
)
551 pos
= cachefiles_inject_read_error();
553 pos
= vfs_llseek(file
, start
, SEEK_DATA
);
554 if (pos
< 0 && pos
>= (loff_t
)-MAX_ERRNO
) {
556 goto check_space
; /* Unallocated tail */
557 trace_cachefiles_io_error(object
, file_inode(file
), pos
,
558 cachefiles_trace_seek_error
);
561 if ((u64
)pos
>= (u64
)start
+ *_len
)
562 goto check_space
; /* Unallocated region */
564 /* We have a block that's at least partially filled - if we're low on
565 * space, we need to see if it's fully allocated. If it's not, we may
568 if (cachefiles_has_space(cache
, 0, *_len
/ PAGE_SIZE
,
569 cachefiles_has_space_check
) == 0)
570 return 0; /* Enough space to simply overwrite the whole block */
572 pos
= cachefiles_inject_read_error();
574 pos
= vfs_llseek(file
, start
, SEEK_HOLE
);
575 if (pos
< 0 && pos
>= (loff_t
)-MAX_ERRNO
) {
576 trace_cachefiles_io_error(object
, file_inode(file
), pos
,
577 cachefiles_trace_seek_error
);
580 if ((u64
)pos
>= (u64
)start
+ *_len
)
581 return 0; /* Fully allocated */
583 /* Partially allocated, but insufficient space: cull. */
584 fscache_count_no_write_space();
585 ret
= cachefiles_inject_remove_error();
587 ret
= vfs_fallocate(file
, FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
,
590 trace_cachefiles_io_error(object
, file_inode(file
), ret
,
591 cachefiles_trace_fallocate_error
);
592 cachefiles_io_error_obj(object
,
593 "CacheFiles: fallocate failed (%d)\n", ret
);
600 return cachefiles_has_space(cache
, 0, *_len
/ PAGE_SIZE
,
601 cachefiles_has_space_for_write
);
604 static int cachefiles_prepare_write(struct netfs_cache_resources
*cres
,
605 loff_t
*_start
, size_t *_len
, size_t upper_len
,
606 loff_t i_size
, bool no_space_allocated_yet
)
608 struct cachefiles_object
*object
= cachefiles_cres_object(cres
);
609 struct cachefiles_cache
*cache
= object
->volume
->cache
;
610 const struct cred
*saved_cred
;
613 if (!cachefiles_cres_file(cres
)) {
614 if (!fscache_wait_for_operation(cres
, FSCACHE_WANT_WRITE
))
616 if (!cachefiles_cres_file(cres
))
620 cachefiles_begin_secure(cache
, &saved_cred
);
621 ret
= __cachefiles_prepare_write(object
, cachefiles_cres_file(cres
),
622 _start
, _len
, upper_len
,
623 no_space_allocated_yet
);
624 cachefiles_end_secure(cache
, saved_cred
);
628 static void cachefiles_prepare_write_subreq(struct netfs_io_subrequest
*subreq
)
630 struct netfs_io_request
*wreq
= subreq
->rreq
;
631 struct netfs_cache_resources
*cres
= &wreq
->cache_resources
;
632 struct netfs_io_stream
*stream
= &wreq
->io_streams
[subreq
->stream_nr
];
634 _enter("W=%x[%x] %llx", wreq
->debug_id
, subreq
->debug_index
, subreq
->start
);
636 stream
->sreq_max_len
= MAX_RW_COUNT
;
637 stream
->sreq_max_segs
= BIO_MAX_VECS
;
639 if (!cachefiles_cres_file(cres
)) {
640 if (!fscache_wait_for_operation(cres
, FSCACHE_WANT_WRITE
))
641 return netfs_prepare_write_failed(subreq
);
642 if (!cachefiles_cres_file(cres
))
643 return netfs_prepare_write_failed(subreq
);
647 static void cachefiles_issue_write(struct netfs_io_subrequest
*subreq
)
649 struct netfs_io_request
*wreq
= subreq
->rreq
;
650 struct netfs_cache_resources
*cres
= &wreq
->cache_resources
;
651 struct cachefiles_object
*object
= cachefiles_cres_object(cres
);
652 struct cachefiles_cache
*cache
= object
->volume
->cache
;
653 struct netfs_io_stream
*stream
= &wreq
->io_streams
[subreq
->stream_nr
];
654 const struct cred
*saved_cred
;
655 size_t off
, pre
, post
, len
= subreq
->len
;
656 loff_t start
= subreq
->start
;
659 _enter("W=%x[%x] %llx-%llx",
660 wreq
->debug_id
, subreq
->debug_index
, start
, start
+ len
- 1);
662 /* We need to start on the cache granularity boundary */
663 off
= start
& (CACHEFILES_DIO_BLOCK_SIZE
- 1);
665 pre
= CACHEFILES_DIO_BLOCK_SIZE
- off
;
667 fscache_count_dio_misfit();
668 netfs_write_subrequest_terminated(subreq
, len
, false);
671 subreq
->transferred
+= pre
;
674 iov_iter_advance(&subreq
->io_iter
, pre
);
677 /* We also need to end on the cache granularity boundary */
678 if (start
+ len
== wreq
->i_size
) {
679 size_t part
= len
% CACHEFILES_DIO_BLOCK_SIZE
;
680 size_t need
= CACHEFILES_DIO_BLOCK_SIZE
- part
;
682 if (part
&& stream
->submit_extendable_to
>= need
) {
685 subreq
->io_iter
.count
+= need
;
689 post
= len
& (CACHEFILES_DIO_BLOCK_SIZE
- 1);
693 fscache_count_dio_misfit();
694 netfs_write_subrequest_terminated(subreq
, post
, false);
697 iov_iter_truncate(&subreq
->io_iter
, len
);
700 trace_netfs_sreq(subreq
, netfs_sreq_trace_cache_prepare
);
701 cachefiles_begin_secure(cache
, &saved_cred
);
702 ret
= __cachefiles_prepare_write(object
, cachefiles_cres_file(cres
),
703 &start
, &len
, len
, true);
704 cachefiles_end_secure(cache
, saved_cred
);
706 netfs_write_subrequest_terminated(subreq
, ret
, false);
710 trace_netfs_sreq(subreq
, netfs_sreq_trace_cache_write
);
711 cachefiles_write(&subreq
->rreq
->cache_resources
,
712 subreq
->start
, &subreq
->io_iter
,
713 netfs_write_subrequest_terminated
, subreq
);
717 * Clean up an operation.
719 static void cachefiles_end_operation(struct netfs_cache_resources
*cres
)
721 struct file
*file
= cachefiles_cres_file(cres
);
725 fscache_end_cookie_access(fscache_cres_cookie(cres
), fscache_access_io_end
);
728 static const struct netfs_cache_ops cachefiles_netfs_cache_ops
= {
729 .end_operation
= cachefiles_end_operation
,
730 .read
= cachefiles_read
,
731 .write
= cachefiles_write
,
732 .issue_write
= cachefiles_issue_write
,
733 .prepare_read
= cachefiles_prepare_read
,
734 .prepare_write
= cachefiles_prepare_write
,
735 .prepare_write_subreq
= cachefiles_prepare_write_subreq
,
736 .prepare_ondemand_read
= cachefiles_prepare_ondemand_read
,
737 .query_occupancy
= cachefiles_query_occupancy
,
741 * Open the cache file when beginning a cache operation.
743 bool cachefiles_begin_operation(struct netfs_cache_resources
*cres
,
744 enum fscache_want_state want_state
)
746 struct cachefiles_object
*object
= cachefiles_cres_object(cres
);
748 if (!cachefiles_cres_file(cres
)) {
749 cres
->ops
= &cachefiles_netfs_cache_ops
;
751 spin_lock(&object
->lock
);
752 if (!cres
->cache_priv2
&& object
->file
)
753 cres
->cache_priv2
= get_file(object
->file
);
754 spin_unlock(&object
->lock
);
758 if (!cachefiles_cres_file(cres
) && want_state
!= FSCACHE_WANT_PARAMS
) {
759 pr_err("failed to get cres->file\n");