1 /* SPDX-License-Identifier: GPL-2.0 */
3 #define LINUX_IOMAP_H 1
5 #include <linux/atomic.h>
6 #include <linux/bitmap.h>
7 #include <linux/blk_types.h>
9 #include <linux/types.h>
10 #include <linux/mm_types.h>
11 #include <linux/blkdev.h>
14 struct fiemap_extent_info
;
18 struct iomap_writepage_ctx
;
22 struct vm_area_struct
;
26 * Types of block ranges for iomap mappings:
28 #define IOMAP_HOLE 0 /* no blocks allocated, need allocation */
29 #define IOMAP_DELALLOC 1 /* delayed allocation blocks */
30 #define IOMAP_MAPPED 2 /* blocks allocated at @addr */
31 #define IOMAP_UNWRITTEN 3 /* blocks allocated at @addr in unwritten state */
32 #define IOMAP_INLINE 4 /* data inline in the inode */
35 * Flags reported by the file system from iomap_begin:
37 * IOMAP_F_NEW indicates that the blocks have been newly allocated and need
38 * zeroing for areas that no data is copied to.
40 * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access
41 * written data and requires fdatasync to commit them to persistent storage.
42 * This needs to take into account metadata changes that *may* be made at IO
43 * completion, such as file size updates from direct IO.
45 * IOMAP_F_SHARED indicates that the blocks are shared, and will need to be
46 * unshared as part a write.
48 * IOMAP_F_MERGED indicates that the iomap contains the merge of multiple block
51 * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of
52 * buffer heads for this mapping.
54 * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent
55 * rather than a file data extent.
57 * IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must
58 * never be merged with the mapping before it.
60 #define IOMAP_F_NEW (1U << 0)
61 #define IOMAP_F_DIRTY (1U << 1)
62 #define IOMAP_F_SHARED (1U << 2)
63 #define IOMAP_F_MERGED (1U << 3)
64 #ifdef CONFIG_BUFFER_HEAD
65 #define IOMAP_F_BUFFER_HEAD (1U << 4)
67 #define IOMAP_F_BUFFER_HEAD 0
68 #endif /* CONFIG_BUFFER_HEAD */
69 #define IOMAP_F_XATTR (1U << 5)
70 #define IOMAP_F_BOUNDARY (1U << 6)
73 * Flags set by the core iomap code during operations:
75 * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size
76 * has changed as the result of this write operation.
78 * IOMAP_F_STALE indicates that the iomap is not valid any longer and the file
79 * range it covers needs to be remapped by the high level before the operation
82 #define IOMAP_F_SIZE_CHANGED (1U << 8)
83 #define IOMAP_F_STALE (1U << 9)
86 * Flags from 0x1000 up are for file system specific usage:
88 #define IOMAP_F_PRIVATE (1U << 12)
92 * Magic value for addr:
94 #define IOMAP_NULL_ADDR -1ULL /* addr is not valid */
96 struct iomap_folio_ops
;
99 u64 addr
; /* disk offset of mapping, bytes */
100 loff_t offset
; /* file offset of mapping, bytes */
101 u64 length
; /* length of mapping, bytes */
102 u16 type
; /* type of mapping */
103 u16 flags
; /* flags for mapping */
104 struct block_device
*bdev
; /* block device for I/O */
105 struct dax_device
*dax_dev
; /* dax_dev for dax operations */
107 void *private; /* filesystem private */
108 const struct iomap_folio_ops
*folio_ops
;
109 u64 validity_cookie
; /* used with .iomap_valid() */
112 static inline sector_t
iomap_sector(const struct iomap
*iomap
, loff_t pos
)
114 return (iomap
->addr
+ pos
- iomap
->offset
) >> SECTOR_SHIFT
;
118 * Returns the inline data pointer for logical offset @pos.
120 static inline void *iomap_inline_data(const struct iomap
*iomap
, loff_t pos
)
122 return iomap
->inline_data
+ pos
- iomap
->offset
;
126 * Check if the mapping's length is within the valid range for inline data.
127 * This is used to guard against accessing data beyond the page inline_data
130 static inline bool iomap_inline_data_valid(const struct iomap
*iomap
)
132 return iomap
->length
<= PAGE_SIZE
- offset_in_page(iomap
->inline_data
);
136 * When a filesystem sets folio_ops in an iomap mapping it returns, get_folio
137 * and put_folio will be called for each folio written to. This only applies
138 * to buffered writes as unbuffered writes will not typically have folios
139 * associated with them.
141 * When get_folio succeeds, put_folio will always be called to do any
142 * cleanup work necessary. put_folio is responsible for unlocking and putting
145 struct iomap_folio_ops
{
146 struct folio
*(*get_folio
)(struct iomap_iter
*iter
, loff_t pos
,
148 void (*put_folio
)(struct inode
*inode
, loff_t pos
, unsigned copied
,
149 struct folio
*folio
);
152 * Check that the cached iomap still maps correctly to the filesystem's
153 * internal extent map. FS internal extent maps can change while iomap
154 * is iterating a cached iomap, so this hook allows iomap to detect that
155 * the iomap needs to be refreshed during a long running write
158 * The filesystem can store internal state (e.g. a sequence number) in
159 * iomap->validity_cookie when the iomap is first mapped to be able to
160 * detect changes between mapping time and whenever .iomap_valid() is
163 * This is called with the folio over the specified file position held
164 * locked by the iomap code.
166 bool (*iomap_valid
)(struct inode
*inode
, const struct iomap
*iomap
);
170 * Flags for iomap_begin / iomap_end. No flag implies a read.
172 #define IOMAP_WRITE (1 << 0) /* writing, must allocate blocks */
173 #define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */
174 #define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */
175 #define IOMAP_FAULT (1 << 3) /* mapping for page fault */
176 #define IOMAP_DIRECT (1 << 4) /* direct I/O */
177 #define IOMAP_NOWAIT (1 << 5) /* do not block */
178 #define IOMAP_OVERWRITE_ONLY (1 << 6) /* only pure overwrites allowed */
179 #define IOMAP_UNSHARE (1 << 7) /* unshare_file_range */
181 #define IOMAP_DAX (1 << 8) /* DAX mapping */
184 #endif /* CONFIG_FS_DAX */
185 #define IOMAP_ATOMIC (1 << 9)
189 * Return the existing mapping at pos, or reserve space starting at
190 * pos for up to length, as long as we can do it as a single mapping.
191 * The actual length is returned in iomap->length.
193 int (*iomap_begin
)(struct inode
*inode
, loff_t pos
, loff_t length
,
194 unsigned flags
, struct iomap
*iomap
,
195 struct iomap
*srcmap
);
198 * Commit and/or unreserve space previous allocated using iomap_begin.
199 * Written indicates the length of the successful write operation which
200 * needs to be commited, while the rest needs to be unreserved.
201 * Written might be zero if no data was written.
203 int (*iomap_end
)(struct inode
*inode
, loff_t pos
, loff_t length
,
204 ssize_t written
, unsigned flags
, struct iomap
*iomap
);
208 * struct iomap_iter - Iterate through a range of a file
209 * @inode: Set at the start of the iteration and should not change.
210 * @pos: The current file position we are operating on. It is updated by
211 * calls to iomap_iter(). Treat as read-only in the body.
212 * @len: The remaining length of the file segment we're operating on.
213 * It is updated at the same time as @pos.
214 * @processed: The number of bytes processed by the body in the most recent
215 * iteration, or a negative errno. 0 causes the iteration to stop.
216 * @flags: Zero or more of the iomap_begin flags above.
217 * @iomap: Map describing the I/O iteration
218 * @srcmap: Source map for COW operations
231 int iomap_iter(struct iomap_iter
*iter
, const struct iomap_ops
*ops
);
234 * iomap_length - length of the current iomap iteration
235 * @iter: iteration structure
237 * Returns the length that the operation applies to for the current iteration.
239 static inline u64
iomap_length(const struct iomap_iter
*iter
)
241 u64 end
= iter
->iomap
.offset
+ iter
->iomap
.length
;
243 if (iter
->srcmap
.type
!= IOMAP_HOLE
)
244 end
= min(end
, iter
->srcmap
.offset
+ iter
->srcmap
.length
);
245 return min(iter
->len
, end
- iter
->pos
);
249 * iomap_iter_srcmap - return the source map for the current iomap iteration
250 * @i: iteration structure
252 * Write operations on file systems with reflink support might require a
253 * source and a destination map. This function retourns the source map
254 * for a given operation, which may or may no be identical to the destination
257 static inline const struct iomap
*iomap_iter_srcmap(const struct iomap_iter
*i
)
259 if (i
->srcmap
.type
!= IOMAP_HOLE
)
265 * Return the file offset for the first unchanged block after a short write.
267 * If nothing was written, round @pos down to point at the first block in
268 * the range, else round up to include the partially written block.
270 static inline loff_t
iomap_last_written_block(struct inode
*inode
, loff_t pos
,
273 if (unlikely(!written
))
274 return round_down(pos
, i_blocksize(inode
));
275 return round_up(pos
+ written
, i_blocksize(inode
));
279 * Check if the range needs to be unshared for a FALLOC_FL_UNSHARE_RANGE
282 * Don't bother with blocks that are not shared to start with; or mappings that
283 * cannot be shared, such as inline data, delalloc reservations, holes or
286 * Note that we use srcmap directly instead of iomap_iter_srcmap as unsharing
287 * requires providing a separate source map, and the presence of one is a good
288 * indicator that unsharing is needed, unlike IOMAP_F_SHARED which can be set
289 * for any data that goes into the COW fork for XFS.
291 static inline bool iomap_want_unshare_iter(const struct iomap_iter
*iter
)
293 return (iter
->iomap
.flags
& IOMAP_F_SHARED
) &&
294 iter
->srcmap
.type
== IOMAP_MAPPED
;
297 ssize_t
iomap_file_buffered_write(struct kiocb
*iocb
, struct iov_iter
*from
,
298 const struct iomap_ops
*ops
, void *private);
299 int iomap_read_folio(struct folio
*folio
, const struct iomap_ops
*ops
);
300 void iomap_readahead(struct readahead_control
*, const struct iomap_ops
*ops
);
301 bool iomap_is_partially_uptodate(struct folio
*, size_t from
, size_t count
);
302 struct folio
*iomap_get_folio(struct iomap_iter
*iter
, loff_t pos
, size_t len
);
303 bool iomap_release_folio(struct folio
*folio
, gfp_t gfp_flags
);
304 void iomap_invalidate_folio(struct folio
*folio
, size_t offset
, size_t len
);
305 bool iomap_dirty_folio(struct address_space
*mapping
, struct folio
*folio
);
306 int iomap_file_unshare(struct inode
*inode
, loff_t pos
, loff_t len
,
307 const struct iomap_ops
*ops
);
308 int iomap_zero_range(struct inode
*inode
, loff_t pos
, loff_t len
,
309 bool *did_zero
, const struct iomap_ops
*ops
);
310 int iomap_truncate_page(struct inode
*inode
, loff_t pos
, bool *did_zero
,
311 const struct iomap_ops
*ops
);
312 vm_fault_t
iomap_page_mkwrite(struct vm_fault
*vmf
,
313 const struct iomap_ops
*ops
);
315 typedef void (*iomap_punch_t
)(struct inode
*inode
, loff_t offset
, loff_t length
,
316 struct iomap
*iomap
);
317 void iomap_write_delalloc_release(struct inode
*inode
, loff_t start_byte
,
318 loff_t end_byte
, unsigned flags
, struct iomap
*iomap
,
319 iomap_punch_t punch
);
321 int iomap_fiemap(struct inode
*inode
, struct fiemap_extent_info
*fieinfo
,
322 u64 start
, u64 len
, const struct iomap_ops
*ops
);
323 loff_t
iomap_seek_hole(struct inode
*inode
, loff_t offset
,
324 const struct iomap_ops
*ops
);
325 loff_t
iomap_seek_data(struct inode
*inode
, loff_t offset
,
326 const struct iomap_ops
*ops
);
327 sector_t
iomap_bmap(struct address_space
*mapping
, sector_t bno
,
328 const struct iomap_ops
*ops
);
331 * Structure for writeback I/O completions.
334 struct list_head io_list
; /* next ioend in chain */
336 u16 io_flags
; /* IOMAP_F_* */
337 struct inode
*io_inode
; /* file being written to */
338 size_t io_size
; /* size of the extent */
339 loff_t io_offset
; /* offset in the file */
340 sector_t io_sector
; /* start sector of ioend */
341 struct bio io_bio
; /* MUST BE LAST! */
344 static inline struct iomap_ioend
*iomap_ioend_from_bio(struct bio
*bio
)
346 return container_of(bio
, struct iomap_ioend
, io_bio
);
349 struct iomap_writeback_ops
{
351 * Required, maps the blocks so that writeback can be performed on
352 * the range starting at offset.
354 * Can return arbitrarily large regions, but we need to call into it at
355 * least once per folio to allow the file systems to synchronize with
356 * the write path that could be invalidating mappings.
358 * An existing mapping from a previous call to this method can be reused
359 * by the file system if it is still valid.
361 int (*map_blocks
)(struct iomap_writepage_ctx
*wpc
, struct inode
*inode
,
362 loff_t offset
, unsigned len
);
365 * Optional, allows the file systems to perform actions just before
366 * submitting the bio and/or override the bio end_io handler for complex
367 * operations like copy on write extent manipulation or unwritten extent
370 int (*prepare_ioend
)(struct iomap_ioend
*ioend
, int status
);
373 * Optional, allows the file system to discard state on a page where
374 * we failed to submit any I/O.
376 void (*discard_folio
)(struct folio
*folio
, loff_t pos
);
379 struct iomap_writepage_ctx
{
381 struct iomap_ioend
*ioend
;
382 const struct iomap_writeback_ops
*ops
;
383 u32 nr_folios
; /* folios added to the ioend */
386 void iomap_finish_ioends(struct iomap_ioend
*ioend
, int error
);
387 void iomap_ioend_try_merge(struct iomap_ioend
*ioend
,
388 struct list_head
*more_ioends
);
389 void iomap_sort_ioends(struct list_head
*ioend_list
);
390 int iomap_writepages(struct address_space
*mapping
,
391 struct writeback_control
*wbc
, struct iomap_writepage_ctx
*wpc
,
392 const struct iomap_writeback_ops
*ops
);
395 * Flags for direct I/O ->end_io:
397 #define IOMAP_DIO_UNWRITTEN (1 << 0) /* covers unwritten extent(s) */
398 #define IOMAP_DIO_COW (1 << 1) /* covers COW extent(s) */
400 struct iomap_dio_ops
{
401 int (*end_io
)(struct kiocb
*iocb
, ssize_t size
, int error
,
403 void (*submit_io
)(const struct iomap_iter
*iter
, struct bio
*bio
,
407 * Filesystems wishing to attach private information to a direct io bio
408 * must provide a ->submit_io method that attaches the additional
409 * information to the bio and changes the ->bi_end_io callback to a
410 * custom function. This function should, at a minimum, perform any
411 * relevant post-processing of the bio and end with a call to
412 * iomap_dio_bio_end_io.
414 struct bio_set
*bio_set
;
418 * Wait for the I/O to complete in iomap_dio_rw even if the kiocb is not
421 #define IOMAP_DIO_FORCE_WAIT (1 << 0)
424 * Do not allocate blocks or zero partial blocks, but instead fall back to
425 * the caller by returning -EAGAIN. Used to optimize direct I/O writes that
426 * are not aligned to the file system block size.
428 #define IOMAP_DIO_OVERWRITE_ONLY (1 << 1)
431 * When a page fault occurs, return a partial synchronous result and allow
432 * the caller to retry the rest of the operation after dealing with the page
435 #define IOMAP_DIO_PARTIAL (1 << 2)
437 ssize_t
iomap_dio_rw(struct kiocb
*iocb
, struct iov_iter
*iter
,
438 const struct iomap_ops
*ops
, const struct iomap_dio_ops
*dops
,
439 unsigned int dio_flags
, void *private, size_t done_before
);
440 struct iomap_dio
*__iomap_dio_rw(struct kiocb
*iocb
, struct iov_iter
*iter
,
441 const struct iomap_ops
*ops
, const struct iomap_dio_ops
*dops
,
442 unsigned int dio_flags
, void *private, size_t done_before
);
443 ssize_t
iomap_dio_complete(struct iomap_dio
*dio
);
444 void iomap_dio_bio_end_io(struct bio
*bio
);
448 struct swap_info_struct
;
450 int iomap_swapfile_activate(struct swap_info_struct
*sis
,
451 struct file
*swap_file
, sector_t
*pagespan
,
452 const struct iomap_ops
*ops
);
454 # define iomap_swapfile_activate(sis, swapfile, pagespan, ops) (-EIO)
455 #endif /* CONFIG_SWAP */
457 #endif /* LINUX_IOMAP_H */