1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /* Network filesystem support services.
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
9 * Documentation/filesystems/netfs_library.rst
11 * for a description of the network filesystem interface declared here.
14 #ifndef _LINUX_NETFS_H
15 #define _LINUX_NETFS_H
17 #include <linux/workqueue.h>
19 #include <linux/pagemap.h>
20 #include <linux/uio.h>
22 enum netfs_sreq_ref_trace
;
23 typedef struct mempool_s mempool_t
;
26 * folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED]
29 * Call this function before writing a folio to a local cache. Starting a
30 * second write before the first one finishes is not allowed.
32 * Note that this should no longer be used.
34 static inline void folio_start_private_2(struct folio
*folio
)
36 VM_BUG_ON_FOLIO(folio_test_private_2(folio
), folio
);
38 folio_set_private_2(folio
);
41 enum netfs_io_source
{
43 NETFS_FILL_WITH_ZEROES
,
44 NETFS_DOWNLOAD_FROM_SERVER
,
45 NETFS_READ_FROM_CACHE
,
47 NETFS_UPLOAD_TO_SERVER
,
52 typedef void (*netfs_io_terminated_t
)(void *priv
, ssize_t transferred_or_error
,
56 * Per-inode context. This wraps the VFS inode.
59 struct inode inode
; /* The VFS inode */
60 const struct netfs_request_ops
*ops
;
61 #if IS_ENABLED(CONFIG_FSCACHE)
62 struct fscache_cookie
*cache
;
64 struct mutex wb_lock
; /* Writeback serialisation */
65 loff_t remote_i_size
; /* Size of the remote file */
66 loff_t zero_point
; /* Size after which we assume there's no data
68 atomic_t io_count
; /* Number of outstanding reqs */
70 #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
71 #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
72 #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */
73 #define NETFS_ICTX_MODIFIED_ATTR 3 /* Indicate change in mtime/ctime */
77 * A netfs group - for instance a ceph snap. This is marked on dirty pages and
78 * pages marked with a group must be flushed before they can be written under
79 * the domain of another group.
83 void (*free
)(struct netfs_group
*netfs_group
);
87 * Information about a dirty page (attached only if necessary).
91 struct netfs_group
*netfs_group
; /* Filesystem's grouping marker (or NULL). */
92 unsigned int dirty_offset
; /* Write-streaming dirty data offset */
93 unsigned int dirty_len
; /* Write-streaming dirty data length */
95 #define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */
96 #define NETFS_FOLIO_COPY_TO_CACHE ((struct netfs_group *)0x356UL) /* Write to the cache only */
98 static inline bool netfs_is_folio_info(const void *priv
)
100 return (unsigned long)priv
& NETFS_FOLIO_INFO
;
103 static inline struct netfs_folio
*__netfs_folio_info(const void *priv
)
105 if (netfs_is_folio_info(priv
))
106 return (struct netfs_folio
*)((unsigned long)priv
& ~NETFS_FOLIO_INFO
);
110 static inline struct netfs_folio
*netfs_folio_info(struct folio
*folio
)
112 return __netfs_folio_info(folio_get_private(folio
));
115 static inline struct netfs_group
*netfs_folio_group(struct folio
*folio
)
117 struct netfs_folio
*finfo
;
118 void *priv
= folio_get_private(folio
);
120 finfo
= netfs_folio_info(folio
);
122 return finfo
->netfs_group
;
127 * Stream of I/O subrequests going to a particular destination, such as the
128 * server or the local cache. This is mainly intended for writing where we may
129 * have to write to multiple destinations concurrently.
131 struct netfs_io_stream
{
132 /* Submission tracking */
133 struct netfs_io_subrequest
*construct
; /* Op being constructed */
134 size_t sreq_max_len
; /* Maximum size of a subrequest */
135 unsigned int sreq_max_segs
; /* 0 or max number of segments in an iterator */
136 unsigned int submit_off
; /* Folio offset we're submitting from */
137 unsigned int submit_len
; /* Amount of data left to submit */
138 unsigned int submit_extendable_to
; /* Amount I/O can be rounded up to */
139 void (*prepare_write
)(struct netfs_io_subrequest
*subreq
);
140 void (*issue_write
)(struct netfs_io_subrequest
*subreq
);
141 /* Collection tracking */
142 struct list_head subrequests
; /* Contributory I/O operations */
143 struct netfs_io_subrequest
*front
; /* Op being collected */
144 unsigned long long collected_to
; /* Position we've collected results to */
145 size_t transferred
; /* The amount transferred from this stream */
146 enum netfs_io_source source
; /* Where to read from/write to */
147 unsigned short error
; /* Aggregate error for the stream */
148 unsigned char stream_nr
; /* Index of stream in parent table */
149 bool avail
; /* T if stream is available */
150 bool active
; /* T if stream is active */
151 bool need_retry
; /* T if this stream needs retrying */
152 bool failed
; /* T if this stream failed */
156 * Resources required to do operations on a cache.
158 struct netfs_cache_resources
{
159 const struct netfs_cache_ops
*ops
;
162 unsigned int debug_id
; /* Cookie debug ID */
163 unsigned int inval_counter
; /* object->inval_counter at begin_op */
167 * Descriptor for a single component subrequest. Each operation represents an
168 * individual read/write from/to a server, a cache, a journal, etc..
170 * The buffer iterator is persistent for the life of the subrequest struct and
171 * the pages it points to can be relied on to exist for the duration.
173 struct netfs_io_subrequest
{
174 struct netfs_io_request
*rreq
; /* Supervising I/O request */
175 struct work_struct work
;
176 struct list_head rreq_link
; /* Link in rreq->subrequests */
177 struct iov_iter io_iter
; /* Iterator for this subrequest */
178 unsigned long long start
; /* Where to start the I/O */
179 size_t len
; /* Size of the I/O */
180 size_t transferred
; /* Amount of data transferred */
181 size_t consumed
; /* Amount of read data consumed */
182 size_t prev_donated
; /* Amount of data donated from previous subreq */
183 size_t next_donated
; /* Amount of data donated from next subreq */
185 short error
; /* 0 or error that occurred */
186 unsigned short debug_index
; /* Index in list (for debugging output) */
187 unsigned int nr_segs
; /* Number of segs in io_iter */
188 enum netfs_io_source source
; /* Where to read from/write to */
189 unsigned char stream_nr
; /* I/O stream this belongs to */
190 unsigned char curr_folioq_slot
; /* Folio currently being read */
191 unsigned char curr_folio_order
; /* Order of folio */
192 struct folio_queue
*curr_folioq
; /* Queue segment in which current folio resides */
194 #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
195 #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
196 #define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */
197 #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */
198 #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */
199 #define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */
200 #define NETFS_SREQ_HIT_EOF 7 /* Set if short due to EOF */
201 #define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */
202 #define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */
203 #define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */
204 #define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */
207 enum netfs_io_origin
{
208 NETFS_READAHEAD
, /* This read was triggered by readahead */
209 NETFS_READPAGE
, /* This read is a synchronous read */
210 NETFS_READ_GAPS
, /* This read is a synchronous read to fill gaps */
211 NETFS_READ_FOR_WRITE
, /* This read is to prepare a write */
212 NETFS_DIO_READ
, /* This is a direct I/O read */
213 NETFS_WRITEBACK
, /* This write was triggered by writepages */
214 NETFS_WRITETHROUGH
, /* This write was made by netfs_perform_write() */
215 NETFS_UNBUFFERED_WRITE
, /* This is an unbuffered write */
216 NETFS_DIO_WRITE
, /* This is a direct I/O write */
217 NETFS_PGPRIV2_COPY_TO_CACHE
, /* [DEPRECATED] This is writing read data to the cache */
222 * Descriptor for an I/O helper request. This is used to make multiple I/O
223 * operations to a variety of data stores and then stitch the result together.
225 struct netfs_io_request
{
227 struct work_struct work
;
230 struct inode
*inode
; /* The file being accessed */
231 struct address_space
*mapping
; /* The mapping being accessed */
232 struct kiocb
*iocb
; /* AIO completion vector */
233 struct netfs_cache_resources cache_resources
;
234 struct readahead_control
*ractl
; /* Readahead descriptor */
235 struct list_head proc_link
; /* Link in netfs_iorequests */
236 struct list_head subrequests
; /* Contributory I/O operations */
237 struct netfs_io_stream io_streams
[2]; /* Streams of parallel I/O operations */
238 #define NR_IO_STREAMS 2 //wreq->nr_io_streams
239 struct netfs_group
*group
; /* Writeback group being written back */
240 struct folio_queue
*buffer
; /* Head of I/O buffer */
241 struct folio_queue
*buffer_tail
; /* Tail of I/O buffer */
242 struct iov_iter iter
; /* Unencrypted-side iterator */
243 struct iov_iter io_iter
; /* I/O (Encrypted-side) iterator */
244 void *netfs_priv
; /* Private data for the netfs */
245 void *netfs_priv2
; /* Private data for the netfs */
246 struct bio_vec
*direct_bv
; /* DIO buffer list (when handling iovec-iter) */
247 unsigned int direct_bv_count
; /* Number of elements in direct_bv[] */
248 unsigned int debug_id
;
249 unsigned int rsize
; /* Maximum read size (0 for none) */
250 unsigned int wsize
; /* Maximum write size (0 for none) */
251 atomic_t subreq_counter
; /* Next subreq->debug_index */
252 unsigned int nr_group_rel
; /* Number of refs to release on ->group */
253 spinlock_t lock
; /* Lock for queuing subreqs */
254 atomic_t nr_outstanding
; /* Number of ops in progress */
255 unsigned long long submitted
; /* Amount submitted for I/O so far */
256 unsigned long long len
; /* Length of the request */
257 size_t transferred
; /* Amount to be indicated as transferred */
258 long error
; /* 0 or error that occurred */
259 enum netfs_io_origin origin
; /* Origin of the request */
260 bool direct_bv_unpin
; /* T if direct_bv[] must be unpinned */
261 u8 buffer_head_slot
; /* First slot in ->buffer */
262 u8 buffer_tail_slot
; /* Next slot in ->buffer_tail */
263 unsigned long long i_size
; /* Size of the file */
264 unsigned long long start
; /* Start position */
265 atomic64_t issued_to
; /* Write issuer folio cursor */
266 unsigned long long collected_to
; /* Point we've collected to */
267 unsigned long long cleaned_to
; /* Position we've cleaned folios to */
268 pgoff_t no_unlock_folio
; /* Don't unlock this folio after read */
269 size_t prev_donated
; /* Fallback for subreq->prev_donated */
272 #define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */
273 #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
274 #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
275 #define NETFS_RREQ_FAILED 4 /* The request failed */
276 #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */
277 #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
278 #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */
279 #define NETFS_RREQ_BLOCKED 10 /* We blocked */
280 #define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */
281 #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */
282 #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */
283 #define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */
284 #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
285 * write to cache on read */
286 const struct netfs_request_ops
*netfs_ops
;
287 void (*cleanup
)(struct netfs_io_request
*req
);
291 * Operations the network filesystem can/must provide to the helpers.
293 struct netfs_request_ops
{
294 mempool_t
*request_pool
;
295 mempool_t
*subrequest_pool
;
296 int (*init_request
)(struct netfs_io_request
*rreq
, struct file
*file
);
297 void (*free_request
)(struct netfs_io_request
*rreq
);
298 void (*free_subrequest
)(struct netfs_io_subrequest
*rreq
);
300 /* Read request handling */
301 void (*expand_readahead
)(struct netfs_io_request
*rreq
);
302 int (*prepare_read
)(struct netfs_io_subrequest
*subreq
);
303 void (*issue_read
)(struct netfs_io_subrequest
*subreq
);
304 bool (*is_still_valid
)(struct netfs_io_request
*rreq
);
305 int (*check_write_begin
)(struct file
*file
, loff_t pos
, unsigned len
,
306 struct folio
**foliop
, void **_fsdata
);
307 void (*done
)(struct netfs_io_request
*rreq
);
309 /* Modification handling */
310 void (*update_i_size
)(struct inode
*inode
, loff_t i_size
);
311 void (*post_modify
)(struct inode
*inode
);
313 /* Write request handling */
314 void (*begin_writeback
)(struct netfs_io_request
*wreq
);
315 void (*prepare_write
)(struct netfs_io_subrequest
*subreq
);
316 void (*issue_write
)(struct netfs_io_subrequest
*subreq
);
317 void (*retry_request
)(struct netfs_io_request
*wreq
, struct netfs_io_stream
*stream
);
318 void (*invalidate_cache
)(struct netfs_io_request
*wreq
);
322 * How to handle reading from a hole.
324 enum netfs_read_from_hole
{
325 NETFS_READ_HOLE_IGNORE
,
326 NETFS_READ_HOLE_CLEAR
,
327 NETFS_READ_HOLE_FAIL
,
331 * Table of operations for access to a cache.
333 struct netfs_cache_ops
{
334 /* End an operation */
335 void (*end_operation
)(struct netfs_cache_resources
*cres
);
337 /* Read data from the cache */
338 int (*read
)(struct netfs_cache_resources
*cres
,
340 struct iov_iter
*iter
,
341 enum netfs_read_from_hole read_hole
,
342 netfs_io_terminated_t term_func
,
343 void *term_func_priv
);
345 /* Write data to the cache */
346 int (*write
)(struct netfs_cache_resources
*cres
,
348 struct iov_iter
*iter
,
349 netfs_io_terminated_t term_func
,
350 void *term_func_priv
);
352 /* Write data to the cache from a netfs subrequest. */
353 void (*issue_write
)(struct netfs_io_subrequest
*subreq
);
355 /* Expand readahead request */
356 void (*expand_readahead
)(struct netfs_cache_resources
*cres
,
357 unsigned long long *_start
,
358 unsigned long long *_len
,
359 unsigned long long i_size
);
361 /* Prepare a read operation, shortening it to a cached/uncached
362 * boundary as appropriate.
364 enum netfs_io_source (*prepare_read
)(struct netfs_io_subrequest
*subreq
,
365 unsigned long long i_size
);
367 /* Prepare a write subrequest, working out if we're allowed to do it
368 * and finding out the maximum amount of data to gather before
369 * attempting to submit. If we're not permitted to do it, the
370 * subrequest should be marked failed.
372 void (*prepare_write_subreq
)(struct netfs_io_subrequest
*subreq
);
374 /* Prepare a write operation, working out what part of the write we can
377 int (*prepare_write
)(struct netfs_cache_resources
*cres
,
378 loff_t
*_start
, size_t *_len
, size_t upper_len
,
379 loff_t i_size
, bool no_space_allocated_yet
);
381 /* Prepare an on-demand read operation, shortening it to a cached/uncached
382 * boundary as appropriate.
384 enum netfs_io_source (*prepare_ondemand_read
)(struct netfs_cache_resources
*cres
,
385 loff_t start
, size_t *_len
,
387 unsigned long *_flags
, ino_t ino
);
389 /* Query the occupancy of the cache in a region, returning where the
390 * next chunk of data starts and how long it is.
392 int (*query_occupancy
)(struct netfs_cache_resources
*cres
,
393 loff_t start
, size_t len
, size_t granularity
,
394 loff_t
*_data_start
, size_t *_data_len
);
397 /* High-level read API. */
398 ssize_t
netfs_unbuffered_read_iter_locked(struct kiocb
*iocb
, struct iov_iter
*iter
);
399 ssize_t
netfs_unbuffered_read_iter(struct kiocb
*iocb
, struct iov_iter
*iter
);
400 ssize_t
netfs_buffered_read_iter(struct kiocb
*iocb
, struct iov_iter
*iter
);
401 ssize_t
netfs_file_read_iter(struct kiocb
*iocb
, struct iov_iter
*iter
);
403 /* High-level write API */
404 ssize_t
netfs_perform_write(struct kiocb
*iocb
, struct iov_iter
*iter
,
405 struct netfs_group
*netfs_group
);
406 ssize_t
netfs_buffered_write_iter_locked(struct kiocb
*iocb
, struct iov_iter
*from
,
407 struct netfs_group
*netfs_group
);
408 ssize_t
netfs_unbuffered_write_iter(struct kiocb
*iocb
, struct iov_iter
*from
);
409 ssize_t
netfs_unbuffered_write_iter_locked(struct kiocb
*iocb
, struct iov_iter
*iter
,
410 struct netfs_group
*netfs_group
);
411 ssize_t
netfs_file_write_iter(struct kiocb
*iocb
, struct iov_iter
*from
);
413 /* Address operations API */
414 struct readahead_control
;
415 void netfs_readahead(struct readahead_control
*);
416 int netfs_read_folio(struct file
*, struct folio
*);
417 int netfs_write_begin(struct netfs_inode
*, struct file
*,
418 struct address_space
*, loff_t pos
, unsigned int len
,
419 struct folio
**, void **fsdata
);
420 int netfs_writepages(struct address_space
*mapping
,
421 struct writeback_control
*wbc
);
422 bool netfs_dirty_folio(struct address_space
*mapping
, struct folio
*folio
);
423 int netfs_unpin_writeback(struct inode
*inode
, struct writeback_control
*wbc
);
424 void netfs_clear_inode_writeback(struct inode
*inode
, const void *aux
);
425 void netfs_invalidate_folio(struct folio
*folio
, size_t offset
, size_t length
);
426 bool netfs_release_folio(struct folio
*folio
, gfp_t gfp
);
428 /* VMA operations API. */
429 vm_fault_t
netfs_page_mkwrite(struct vm_fault
*vmf
, struct netfs_group
*netfs_group
);
431 /* (Sub)request management API. */
432 void netfs_read_subreq_progress(struct netfs_io_subrequest
*subreq
,
434 void netfs_read_subreq_terminated(struct netfs_io_subrequest
*subreq
,
435 int error
, bool was_async
);
436 void netfs_get_subrequest(struct netfs_io_subrequest
*subreq
,
437 enum netfs_sreq_ref_trace what
);
438 void netfs_put_subrequest(struct netfs_io_subrequest
*subreq
,
439 bool was_async
, enum netfs_sreq_ref_trace what
);
440 ssize_t
netfs_extract_user_iter(struct iov_iter
*orig
, size_t orig_len
,
441 struct iov_iter
*new,
442 iov_iter_extraction_t extraction_flags
);
443 size_t netfs_limit_iter(const struct iov_iter
*iter
, size_t start_offset
,
444 size_t max_size
, size_t max_segs
);
445 void netfs_prepare_write_failed(struct netfs_io_subrequest
*subreq
);
446 void netfs_write_subrequest_terminated(void *_op
, ssize_t transferred_or_error
,
448 void netfs_queue_write_request(struct netfs_io_subrequest
*subreq
);
450 int netfs_start_io_read(struct inode
*inode
);
451 void netfs_end_io_read(struct inode
*inode
);
452 int netfs_start_io_write(struct inode
*inode
);
453 void netfs_end_io_write(struct inode
*inode
);
454 int netfs_start_io_direct(struct inode
*inode
);
455 void netfs_end_io_direct(struct inode
*inode
);
458 * netfs_inode - Get the netfs inode context from the inode
459 * @inode: The inode to query
461 * Get the netfs lib inode context from the network filesystem's inode. The
462 * context struct is expected to directly follow on from the VFS inode struct.
464 static inline struct netfs_inode
*netfs_inode(struct inode
*inode
)
466 return container_of(inode
, struct netfs_inode
, inode
);
470 * netfs_inode_init - Initialise a netfslib inode context
471 * @ctx: The netfs inode to initialise
472 * @ops: The netfs's operations list
473 * @use_zero_point: True to use the zero_point read optimisation
475 * Initialise the netfs library context struct. This is expected to follow on
476 * directly from the VFS inode struct.
478 static inline void netfs_inode_init(struct netfs_inode
*ctx
,
479 const struct netfs_request_ops
*ops
,
483 ctx
->remote_i_size
= i_size_read(&ctx
->inode
);
484 ctx
->zero_point
= LLONG_MAX
;
486 atomic_set(&ctx
->io_count
, 0);
487 #if IS_ENABLED(CONFIG_FSCACHE)
490 mutex_init(&ctx
->wb_lock
);
491 /* ->releasepage() drives zero_point */
492 if (use_zero_point
) {
493 ctx
->zero_point
= ctx
->remote_i_size
;
494 mapping_set_release_always(ctx
->inode
.i_mapping
);
499 * netfs_resize_file - Note that a file got resized
500 * @ctx: The netfs inode being resized
501 * @new_i_size: The new file size
502 * @changed_on_server: The change was applied to the server
504 * Inform the netfs lib that a file got resized so that it can adjust its state.
506 static inline void netfs_resize_file(struct netfs_inode
*ctx
, loff_t new_i_size
,
507 bool changed_on_server
)
509 if (changed_on_server
)
510 ctx
->remote_i_size
= new_i_size
;
511 if (new_i_size
< ctx
->zero_point
)
512 ctx
->zero_point
= new_i_size
;
516 * netfs_i_cookie - Get the cache cookie from the inode
517 * @ctx: The netfs inode to query
519 * Get the caching cookie (if enabled) from the network filesystem's inode.
521 static inline struct fscache_cookie
*netfs_i_cookie(struct netfs_inode
*ctx
)
523 #if IS_ENABLED(CONFIG_FSCACHE)
531 * netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete
532 * @inode: The netfs inode to wait on
534 * Wait for outstanding I/O requests of any type to complete. This is intended
535 * to be called from inode eviction routines. This makes sure that any
536 * resources held by those requests are cleaned up before we let the inode get
539 static inline void netfs_wait_for_outstanding_io(struct inode
*inode
)
541 struct netfs_inode
*ictx
= netfs_inode(inode
);
543 wait_var_event(&ictx
->io_count
, atomic_read(&ictx
->io_count
) == 0);
546 #endif /* _LINUX_NETFS_H */