1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (c) 2023-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
9 #include "xfs_buf_mem.h"
10 #include "xfs_trace.h"
11 #include <linux/shmem_fs.h>
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_buf_item.h"
15 #include "xfs_error.h"
18 * Buffer Cache for In-Memory Files
19 * ================================
21 * Online fsck wants to create ephemeral ordered recordsets. The existing
22 * btree infrastructure can do this, but we need the buffer cache to target
23 * memory instead of block devices.
25 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
26 * requirements. Therefore, the xmbuf mechanism uses an unlinked shmem file to
27 * store our staging data. This file is not installed in the file descriptor
28 * table so that user programs cannot access the data, which means that the
29 * xmbuf must be freed with xmbuf_destroy.
31 * xmbufs assume that the caller will handle all required concurrency
32 * management; standard vfs locks (freezer and inode) are not taken. Reads
33 * and writes are satisfied directly from the page cache.
35 * The only supported block size is PAGE_SIZE, and we cannot use highmem.
39 * shmem files used to back an in-memory buffer cache must not be exposed to
40 * userspace. Upper layers must coordinate access to the one handle returned
41 * by the constructor, so establish a separate lock class for xmbufs to avoid
44 static struct lock_class_key xmbuf_i_mutex_key
;
47 * Allocate a buffer cache target for a memory-backed file and set up the
54 struct xfs_buftarg
**btpp
)
58 struct xfs_buftarg
*btp
;
61 btp
= kzalloc(struct_size(btp
, bt_cache
, 1), GFP_KERNEL
);
65 file
= shmem_kernel_file_setup(descr
, 0, 0);
67 error
= PTR_ERR(file
);
70 inode
= file_inode(file
);
72 /* private file, private locking */
73 lockdep_set_class(&inode
->i_rwsem
, &xmbuf_i_mutex_key
);
76 * We don't want to bother with kmapping data during repair, so don't
77 * allow highmem pages to back this mapping.
79 mapping_set_gfp_mask(inode
->i_mapping
, GFP_KERNEL
);
81 /* ensure all writes are below EOF to avoid pagecache zeroing */
82 i_size_write(inode
, inode
->i_sb
->s_maxbytes
);
84 error
= xfs_buf_cache_init(btp
->bt_cache
);
88 /* Initialize buffer target */
90 btp
->bt_dev
= (dev_t
)-1U;
91 btp
->bt_bdev
= NULL
; /* in-memory buftargs have no bdev */
93 btp
->bt_meta_sectorsize
= XMBUF_BLOCKSIZE
;
94 btp
->bt_meta_sectormask
= XMBUF_BLOCKSIZE
- 1;
96 error
= xfs_init_buftarg(btp
, XMBUF_BLOCKSIZE
, descr
);
100 trace_xmbuf_create(btp
);
106 xfs_buf_cache_destroy(btp
->bt_cache
);
114 /* Free a buffer cache target for a memory-backed buffer cache. */
117 struct xfs_buftarg
*btp
)
119 ASSERT(xfs_buftarg_is_mem(btp
));
120 ASSERT(percpu_counter_sum(&btp
->bt_io_count
) == 0);
122 trace_xmbuf_free(btp
);
124 xfs_destroy_buftarg(btp
);
125 xfs_buf_cache_destroy(btp
->bt_cache
);
130 /* Directly map a shmem page into the buffer cache. */
135 struct inode
*inode
= file_inode(bp
->b_target
->bt_file
);
136 struct folio
*folio
= NULL
;
138 loff_t pos
= BBTOB(xfs_buf_daddr(bp
));
141 ASSERT(xfs_buftarg_is_mem(bp
->b_target
));
143 if (bp
->b_map_count
!= 1)
145 if (BBTOB(bp
->b_length
) != XMBUF_BLOCKSIZE
)
147 if (offset_in_page(pos
) != 0) {
148 ASSERT(offset_in_page(pos
));
152 error
= shmem_get_folio(inode
, pos
>> PAGE_SHIFT
, 0, &folio
, SGP_CACHE
);
156 if (filemap_check_wb_err(inode
->i_mapping
, 0)) {
162 page
= folio_file_page(folio
, pos
>> PAGE_SHIFT
);
165 * Mark the page dirty so that it won't be reclaimed once we drop the
166 * (potentially last) reference in xmbuf_unmap_page.
168 set_page_dirty(page
);
171 bp
->b_addr
= page_address(page
);
172 bp
->b_pages
= bp
->b_page_array
;
173 bp
->b_pages
[0] = page
;
174 bp
->b_page_count
= 1;
178 /* Unmap a shmem page that was mapped into the buffer cache. */
183 struct page
*page
= bp
->b_pages
[0];
185 ASSERT(xfs_buftarg_is_mem(bp
->b_target
));
190 bp
->b_pages
[0] = NULL
;
192 bp
->b_page_count
= 0;
195 /* Is this a valid daddr within the buftarg? */
198 struct xfs_buftarg
*btp
,
201 struct inode
*inode
= file_inode(btp
->bt_file
);
203 ASSERT(xfs_buftarg_is_mem(btp
));
205 return daddr
< (inode
->i_sb
->s_maxbytes
>> BBSHIFT
);
208 /* Discard the page backing this buffer. */
213 struct inode
*inode
= file_inode(bp
->b_target
->bt_file
);
216 ASSERT(xfs_buftarg_is_mem(bp
->b_target
));
218 pos
= BBTOB(xfs_buf_daddr(bp
));
219 shmem_truncate_range(inode
, pos
, pos
+ BBTOB(bp
->b_length
) - 1);
223 * Finalize a buffer -- discard the backing page if it's stale, or run the
224 * write verifier to detect problems.
233 if (bp
->b_flags
& XBF_STALE
) {
239 * Although this btree is ephemeral, validate the buffer structure so
240 * that we can detect memory corruption errors and software bugs.
242 fa
= bp
->b_ops
->verify_struct(bp
);
244 error
= -EFSCORRUPTED
;
245 xfs_verifier_error(bp
, error
, fa
);
252 * Detach this xmbuf buffer from the transaction by any means necessary.
253 * All buffers are direct-mapped, so they do not need bwrite.
257 struct xfs_trans
*tp
,
260 struct xfs_buf_log_item
*bli
= bp
->b_log_item
;
264 bli
->bli_flags
&= ~(XFS_BLI_DIRTY
| XFS_BLI_ORDERED
|
265 XFS_BLI_LOGGED
| XFS_BLI_STALE
);
266 clear_bit(XFS_LI_DIRTY
, &bli
->bli_item
.li_flags
);
268 while (bp
->b_log_item
!= NULL
)
269 xfs_trans_bdetach(tp
, bp
);