1 /* SPDX-License-Identifier: GPL-2.0 */
3 * include/linux/buffer_head.h
5 * Everything to do with buffer_heads.
8 #ifndef _LINUX_BUFFER_HEAD_H
9 #define _LINUX_BUFFER_HEAD_H
11 #include <linux/types.h>
12 #include <linux/blk_types.h>
14 #include <linux/linkage.h>
15 #include <linux/pagemap.h>
16 #include <linux/wait.h>
17 #include <linux/atomic.h>
20 BH_Uptodate
, /* Contains valid data */
21 BH_Dirty
, /* Is dirty */
22 BH_Lock
, /* Is locked */
23 BH_Req
, /* Has been submitted for I/O */
25 BH_Mapped
, /* Has a disk mapping */
26 BH_New
, /* Disk mapping was newly created by get_block */
27 BH_Async_Read
, /* Is under end_buffer_async_read I/O */
28 BH_Async_Write
, /* Is under end_buffer_async_write I/O */
29 BH_Delay
, /* Buffer is not yet allocated on disk */
30 BH_Boundary
, /* Block is followed by a discontiguity */
31 BH_Write_EIO
, /* I/O error on write */
32 BH_Unwritten
, /* Buffer is allocated on disk but not written */
33 BH_Quiet
, /* Buffer Error Prinks to be quiet */
34 BH_Meta
, /* Buffer contains metadata */
35 BH_Prio
, /* Buffer should be submitted with REQ_PRIO */
36 BH_Defer_Completion
, /* Defer AIO completion to workqueue */
38 BH_PrivateStart
,/* not a state bit, but the first bit available
39 * for private allocation by other entities
43 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
48 typedef void (bh_end_io_t
)(struct buffer_head
*bh
, int uptodate
);
51 * Historically, a buffer_head was used to map a single block
52 * within a page, and of course as the unit of I/O through the
53 * filesystem and block layers. Nowadays the basic I/O unit
54 * is the bio, and buffer_heads are used for extracting block
55 * mappings (via a get_block_t call), for tracking state within
56 * a folio (via a folio_mapping) and for wrapping bio submission
57 * for backward compatibility reasons (e.g. submit_bh).
60 unsigned long b_state
; /* buffer state bitmap (see above) */
61 struct buffer_head
*b_this_page
;/* circular list of page's buffers */
63 struct page
*b_page
; /* the page this bh is mapped to */
64 struct folio
*b_folio
; /* the folio this bh is mapped to */
67 sector_t b_blocknr
; /* start block number */
68 size_t b_size
; /* size of mapping */
69 char *b_data
; /* pointer to data within the page */
71 struct block_device
*b_bdev
;
72 bh_end_io_t
*b_end_io
; /* I/O completion */
73 void *b_private
; /* reserved for b_end_io */
74 struct list_head b_assoc_buffers
; /* associated with another mapping */
75 struct address_space
*b_assoc_map
; /* mapping this buffer is
77 atomic_t b_count
; /* users using this buffer_head */
78 spinlock_t b_uptodate_lock
; /* Used by the first bh in a page, to
79 * serialise IO completion of other
80 * buffers in the page */
84 * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
85 * and buffer_foo() functions.
86 * To avoid reset buffer flags that are already set, because that causes
87 * a costly cache line transition, check the flag first.
89 #define BUFFER_FNS(bit, name) \
90 static __always_inline void set_buffer_##name(struct buffer_head *bh) \
92 if (!test_bit(BH_##bit, &(bh)->b_state)) \
93 set_bit(BH_##bit, &(bh)->b_state); \
95 static __always_inline void clear_buffer_##name(struct buffer_head *bh) \
97 clear_bit(BH_##bit, &(bh)->b_state); \
99 static __always_inline int buffer_##name(const struct buffer_head *bh) \
101 return test_bit(BH_##bit, &(bh)->b_state); \
105 * test_set_buffer_foo() and test_clear_buffer_foo()
107 #define TAS_BUFFER_FNS(bit, name) \
108 static __always_inline int test_set_buffer_##name(struct buffer_head *bh) \
110 return test_and_set_bit(BH_##bit, &(bh)->b_state); \
112 static __always_inline int test_clear_buffer_##name(struct buffer_head *bh) \
114 return test_and_clear_bit(BH_##bit, &(bh)->b_state); \
118 * Emit the buffer bitops functions. Note that there are also functions
119 * of the form "mark_buffer_foo()". These are higher-level functions which
120 * do something in addition to setting a b_state bit.
122 BUFFER_FNS(Dirty
, dirty
)
123 TAS_BUFFER_FNS(Dirty
, dirty
)
124 BUFFER_FNS(Lock
, locked
)
126 TAS_BUFFER_FNS(Req
, req
)
127 BUFFER_FNS(Mapped
, mapped
)
129 BUFFER_FNS(Async_Read
, async_read
)
130 BUFFER_FNS(Async_Write
, async_write
)
131 BUFFER_FNS(Delay
, delay
)
132 BUFFER_FNS(Boundary
, boundary
)
133 BUFFER_FNS(Write_EIO
, write_io_error
)
134 BUFFER_FNS(Unwritten
, unwritten
)
135 BUFFER_FNS(Meta
, meta
)
136 BUFFER_FNS(Prio
, prio
)
137 BUFFER_FNS(Defer_Completion
, defer_completion
)
139 static __always_inline
void set_buffer_uptodate(struct buffer_head
*bh
)
142 * If somebody else already set this uptodate, they will
143 * have done the memory barrier, and a reader will thus
144 * see *some* valid buffer state.
146 * Any other serialization (with IO errors or whatever that
147 * might clear the bit) has to come from other state (eg BH_Lock).
149 if (test_bit(BH_Uptodate
, &bh
->b_state
))
153 * make it consistent with folio_mark_uptodate
154 * pairs with smp_load_acquire in buffer_uptodate
156 smp_mb__before_atomic();
157 set_bit(BH_Uptodate
, &bh
->b_state
);
160 static __always_inline
void clear_buffer_uptodate(struct buffer_head
*bh
)
162 clear_bit(BH_Uptodate
, &bh
->b_state
);
165 static __always_inline
int buffer_uptodate(const struct buffer_head
*bh
)
168 * make it consistent with folio_test_uptodate
169 * pairs with smp_mb__before_atomic in set_buffer_uptodate
171 return test_bit_acquire(BH_Uptodate
, &bh
->b_state
);
174 static inline unsigned long bh_offset(const struct buffer_head
*bh
)
176 return (unsigned long)(bh
)->b_data
& (page_size(bh
->b_page
) - 1);
179 /* If we *know* page->private refers to buffer_heads */
180 #define page_buffers(page) \
182 BUG_ON(!PagePrivate(page)); \
183 ((struct buffer_head *)page_private(page)); \
185 #define page_has_buffers(page) PagePrivate(page)
186 #define folio_buffers(folio) folio_get_private(folio)
188 void buffer_check_dirty_writeback(struct folio
*folio
,
189 bool *dirty
, bool *writeback
);
195 void mark_buffer_dirty(struct buffer_head
*bh
);
196 void mark_buffer_write_io_error(struct buffer_head
*bh
);
197 void touch_buffer(struct buffer_head
*bh
);
198 void folio_set_bh(struct buffer_head
*bh
, struct folio
*folio
,
199 unsigned long offset
);
200 struct buffer_head
*folio_alloc_buffers(struct folio
*folio
, unsigned long size
,
202 struct buffer_head
*alloc_page_buffers(struct page
*page
, unsigned long size
);
203 struct buffer_head
*create_empty_buffers(struct folio
*folio
,
204 unsigned long blocksize
, unsigned long b_state
);
205 void end_buffer_read_sync(struct buffer_head
*bh
, int uptodate
);
206 void end_buffer_write_sync(struct buffer_head
*bh
, int uptodate
);
208 /* Things to do with buffers at mapping->private_list */
209 void mark_buffer_dirty_inode(struct buffer_head
*bh
, struct inode
*inode
);
210 int generic_buffers_fsync_noflush(struct file
*file
, loff_t start
, loff_t end
,
212 int generic_buffers_fsync(struct file
*file
, loff_t start
, loff_t end
,
214 void clean_bdev_aliases(struct block_device
*bdev
, sector_t block
,
216 static inline void clean_bdev_bh_alias(struct buffer_head
*bh
)
218 clean_bdev_aliases(bh
->b_bdev
, bh
->b_blocknr
, 1);
221 void mark_buffer_async_write(struct buffer_head
*bh
);
222 void __wait_on_buffer(struct buffer_head
*);
223 wait_queue_head_t
*bh_waitq_head(struct buffer_head
*bh
);
224 struct buffer_head
*__find_get_block(struct block_device
*bdev
, sector_t block
,
226 struct buffer_head
*bdev_getblk(struct block_device
*bdev
, sector_t block
,
227 unsigned size
, gfp_t gfp
);
228 void __brelse(struct buffer_head
*);
229 void __bforget(struct buffer_head
*);
230 void __breadahead(struct block_device
*, sector_t block
, unsigned int size
);
231 struct buffer_head
*__bread_gfp(struct block_device
*,
232 sector_t block
, unsigned size
, gfp_t gfp
);
233 struct buffer_head
*alloc_buffer_head(gfp_t gfp_flags
);
234 void free_buffer_head(struct buffer_head
* bh
);
235 void unlock_buffer(struct buffer_head
*bh
);
236 void __lock_buffer(struct buffer_head
*bh
);
237 int sync_dirty_buffer(struct buffer_head
*bh
);
238 int __sync_dirty_buffer(struct buffer_head
*bh
, blk_opf_t op_flags
);
239 void write_dirty_buffer(struct buffer_head
*bh
, blk_opf_t op_flags
);
240 void submit_bh(blk_opf_t
, struct buffer_head
*);
241 void write_boundary_block(struct block_device
*bdev
,
242 sector_t bblock
, unsigned blocksize
);
243 int bh_uptodate_or_lock(struct buffer_head
*bh
);
244 int __bh_read(struct buffer_head
*bh
, blk_opf_t op_flags
, bool wait
);
245 void __bh_read_batch(int nr
, struct buffer_head
*bhs
[],
246 blk_opf_t op_flags
, bool force_lock
);
249 * Generic address_space_operations implementations for buffer_head-backed
252 void block_invalidate_folio(struct folio
*folio
, size_t offset
, size_t length
);
253 int block_write_full_folio(struct folio
*folio
, struct writeback_control
*wbc
,
255 int __block_write_full_folio(struct inode
*inode
, struct folio
*folio
,
256 get_block_t
*get_block
, struct writeback_control
*wbc
);
257 int block_read_full_folio(struct folio
*, get_block_t
*);
258 bool block_is_partially_uptodate(struct folio
*, size_t from
, size_t count
);
259 int block_write_begin(struct address_space
*mapping
, loff_t pos
, unsigned len
,
260 struct folio
**foliop
, get_block_t
*get_block
);
261 int __block_write_begin(struct folio
*folio
, loff_t pos
, unsigned len
,
262 get_block_t
*get_block
);
263 int block_write_end(struct file
*, struct address_space
*,
264 loff_t
, unsigned len
, unsigned copied
,
265 struct folio
*, void *);
266 int generic_write_end(struct file
*, struct address_space
*,
267 loff_t
, unsigned len
, unsigned copied
,
268 struct folio
*, void *);
269 void folio_zero_new_buffers(struct folio
*folio
, size_t from
, size_t to
);
270 int cont_write_begin(struct file
*, struct address_space
*, loff_t
,
271 unsigned, struct folio
**, void **,
272 get_block_t
*, loff_t
*);
273 int generic_cont_expand_simple(struct inode
*inode
, loff_t size
);
274 void block_commit_write(struct page
*page
, unsigned int from
, unsigned int to
);
275 int block_page_mkwrite(struct vm_area_struct
*vma
, struct vm_fault
*vmf
,
276 get_block_t get_block
);
277 sector_t
generic_block_bmap(struct address_space
*, sector_t
, get_block_t
*);
278 int block_truncate_page(struct address_space
*, loff_t
, get_block_t
*);
280 #ifdef CONFIG_MIGRATION
281 extern int buffer_migrate_folio(struct address_space
*,
282 struct folio
*dst
, struct folio
*src
, enum migrate_mode
);
283 extern int buffer_migrate_folio_norefs(struct address_space
*,
284 struct folio
*dst
, struct folio
*src
, enum migrate_mode
);
286 #define buffer_migrate_folio NULL
287 #define buffer_migrate_folio_norefs NULL
294 static inline void get_bh(struct buffer_head
*bh
)
296 atomic_inc(&bh
->b_count
);
299 static inline void put_bh(struct buffer_head
*bh
)
301 smp_mb__before_atomic();
302 atomic_dec(&bh
->b_count
);
306 * brelse - Release a buffer.
307 * @bh: The buffer to release.
309 * Decrement a buffer_head's reference count. If @bh is NULL, this
310 * function is a no-op.
312 * If all buffers on a folio have zero reference count, are clean
313 * and unlocked, and if the folio is unlocked and not under writeback
314 * then try_to_free_buffers() may strip the buffers from the folio in
315 * preparation for freeing it (sometimes, rarely, buffers are removed
316 * from a folio but it ends up not being freed, and buffers may later
319 * Context: Any context.
321 static inline void brelse(struct buffer_head
*bh
)
328 * bforget - Discard any dirty data in a buffer.
329 * @bh: The buffer to forget.
331 * Call this function instead of brelse() if the data written to a buffer
332 * no longer needs to be written back. It will clear the buffer's dirty
333 * flag so writeback of this buffer will be skipped.
335 * Context: Any context.
337 static inline void bforget(struct buffer_head
*bh
)
343 static inline struct buffer_head
*
344 sb_bread(struct super_block
*sb
, sector_t block
)
346 return __bread_gfp(sb
->s_bdev
, block
, sb
->s_blocksize
, __GFP_MOVABLE
);
349 static inline struct buffer_head
*
350 sb_bread_unmovable(struct super_block
*sb
, sector_t block
)
352 return __bread_gfp(sb
->s_bdev
, block
, sb
->s_blocksize
, 0);
356 sb_breadahead(struct super_block
*sb
, sector_t block
)
358 __breadahead(sb
->s_bdev
, block
, sb
->s_blocksize
);
361 static inline struct buffer_head
*getblk_unmovable(struct block_device
*bdev
,
362 sector_t block
, unsigned size
)
366 gfp
= mapping_gfp_constraint(bdev
->bd_mapping
, ~__GFP_FS
);
369 return bdev_getblk(bdev
, block
, size
, gfp
);
372 static inline struct buffer_head
*__getblk(struct block_device
*bdev
,
373 sector_t block
, unsigned size
)
377 gfp
= mapping_gfp_constraint(bdev
->bd_mapping
, ~__GFP_FS
);
378 gfp
|= __GFP_MOVABLE
| __GFP_NOFAIL
;
380 return bdev_getblk(bdev
, block
, size
, gfp
);
383 static inline struct buffer_head
*sb_getblk(struct super_block
*sb
,
386 return __getblk(sb
->s_bdev
, block
, sb
->s_blocksize
);
389 static inline struct buffer_head
*sb_getblk_gfp(struct super_block
*sb
,
390 sector_t block
, gfp_t gfp
)
392 return bdev_getblk(sb
->s_bdev
, block
, sb
->s_blocksize
, gfp
);
395 static inline struct buffer_head
*
396 sb_find_get_block(struct super_block
*sb
, sector_t block
)
398 return __find_get_block(sb
->s_bdev
, block
, sb
->s_blocksize
);
402 map_bh(struct buffer_head
*bh
, struct super_block
*sb
, sector_t block
)
404 set_buffer_mapped(bh
);
405 bh
->b_bdev
= sb
->s_bdev
;
406 bh
->b_blocknr
= block
;
407 bh
->b_size
= sb
->s_blocksize
;
410 static inline void wait_on_buffer(struct buffer_head
*bh
)
413 if (buffer_locked(bh
))
414 __wait_on_buffer(bh
);
417 static inline int trylock_buffer(struct buffer_head
*bh
)
419 return likely(!test_and_set_bit_lock(BH_Lock
, &bh
->b_state
));
422 static inline void lock_buffer(struct buffer_head
*bh
)
425 if (!trylock_buffer(bh
))
429 static inline void bh_readahead(struct buffer_head
*bh
, blk_opf_t op_flags
)
431 if (!buffer_uptodate(bh
) && trylock_buffer(bh
)) {
432 if (!buffer_uptodate(bh
))
433 __bh_read(bh
, op_flags
, false);
439 static inline void bh_read_nowait(struct buffer_head
*bh
, blk_opf_t op_flags
)
441 if (!bh_uptodate_or_lock(bh
))
442 __bh_read(bh
, op_flags
, false);
445 /* Returns 1 if buffer uptodated, 0 on success, and -EIO on error. */
446 static inline int bh_read(struct buffer_head
*bh
, blk_opf_t op_flags
)
448 if (bh_uptodate_or_lock(bh
))
450 return __bh_read(bh
, op_flags
, true);
453 static inline void bh_read_batch(int nr
, struct buffer_head
*bhs
[])
455 __bh_read_batch(nr
, bhs
, 0, true);
458 static inline void bh_readahead_batch(int nr
, struct buffer_head
*bhs
[],
461 __bh_read_batch(nr
, bhs
, op_flags
, false);
465 * __bread() - Read a block.
466 * @bdev: The block device to read from.
467 * @block: Block number in units of block size.
468 * @size: The block size of this device in bytes.
470 * Read a specified block, and return the buffer head that refers
471 * to it. The memory is allocated from the movable area so that it can
472 * be migrated. The returned buffer head has its refcount increased.
473 * The caller should call brelse() when it has finished with the buffer.
475 * Context: May sleep waiting for I/O.
476 * Return: NULL if the block was unreadable.
478 static inline struct buffer_head
*__bread(struct block_device
*bdev
,
479 sector_t block
, unsigned size
)
481 return __bread_gfp(bdev
, block
, size
, __GFP_MOVABLE
);
485 * get_nth_bh - Get a reference on the n'th buffer after this one.
486 * @bh: The buffer to start counting from.
487 * @count: How many buffers to skip.
489 * This is primarily useful for finding the nth buffer in a folio; in
490 * that case you pass the head buffer and the byte offset in the folio
491 * divided by the block size. It can be used for other purposes, but
492 * it will wrap at the end of the folio rather than returning NULL or
493 * proceeding to the next folio for you.
495 * Return: The requested buffer with an elevated refcount.
497 static inline __must_check
498 struct buffer_head
*get_nth_bh(struct buffer_head
*bh
, unsigned int count
)
501 bh
= bh
->b_this_page
;
506 bool block_dirty_folio(struct address_space
*mapping
, struct folio
*folio
);
508 #ifdef CONFIG_BUFFER_HEAD
510 void buffer_init(void);
511 bool try_to_free_buffers(struct folio
*folio
);
512 int inode_has_buffers(struct inode
*inode
);
513 void invalidate_inode_buffers(struct inode
*inode
);
514 int remove_inode_buffers(struct inode
*inode
);
515 int sync_mapping_buffers(struct address_space
*mapping
);
516 void invalidate_bh_lrus(void);
517 void invalidate_bh_lrus_cpu(void);
518 bool has_bh_in_lru(int cpu
, void *dummy
);
519 extern int buffer_heads_over_limit
;
521 #else /* CONFIG_BUFFER_HEAD */
523 static inline void buffer_init(void) {}
524 static inline bool try_to_free_buffers(struct folio
*folio
) { return true; }
525 static inline int inode_has_buffers(struct inode
*inode
) { return 0; }
526 static inline void invalidate_inode_buffers(struct inode
*inode
) {}
527 static inline int remove_inode_buffers(struct inode
*inode
) { return 1; }
528 static inline int sync_mapping_buffers(struct address_space
*mapping
) { return 0; }
529 static inline void invalidate_bh_lrus(void) {}
530 static inline void invalidate_bh_lrus_cpu(void) {}
531 static inline bool has_bh_in_lru(int cpu
, void *dummy
) { return false; }
532 #define buffer_heads_over_limit 0
534 #endif /* CONFIG_BUFFER_HEAD */
535 #endif /* _LINUX_BUFFER_HEAD_H */