1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "scrub/scrub.h"
14 #include "scrub/xfile.h"
15 #include "scrub/xfarray.h"
16 #include "scrub/trace.h"
17 #include <linux/shmem_fs.h>
20 * Swappable Temporary Memory
21 * ==========================
23 * Online checking sometimes needs to be able to stage a large amount of data
24 * in memory. This information might not fit in the available memory and it
25 * doesn't all need to be accessible at all times. In other words, we want an
26 * indexed data buffer to store data that can be paged out.
28 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
29 * requirements. Therefore, the xfile mechanism uses an unlinked shmem file to
30 * store our staging data. This file is not installed in the file descriptor
31 * table so that user programs cannot access the data, which means that the
32 * xfile must be freed with xfile_destroy.
34 * xfiles assume that the caller will handle all required concurrency
35 * management; standard vfs locks (freezer and inode) are not taken. Reads
36 * and writes are satisfied directly from the page cache.
40 * xfiles must not be exposed to userspace and require upper layers to
41 * coordinate access to the one handle returned by the constructor, so
42 * establish a separate lock class for xfiles to avoid confusing lockdep.
44 static struct lock_class_key xfile_i_mutex_key
;
47 * Create an xfile of the given size. The description will be used in the
52 const char *description
,
54 struct xfile
**xfilep
)
60 xf
= kmalloc(sizeof(struct xfile
), XCHK_GFP_FLAGS
);
64 xf
->file
= shmem_kernel_file_setup(description
, isize
, VM_NORESERVE
);
65 if (IS_ERR(xf
->file
)) {
66 error
= PTR_ERR(xf
->file
);
70 inode
= file_inode(xf
->file
);
71 lockdep_set_class(&inode
->i_rwsem
, &xfile_i_mutex_key
);
74 * We don't want to bother with kmapping data during repair, so don't
75 * allow highmem pages to back this mapping.
77 mapping_set_gfp_mask(inode
->i_mapping
, GFP_KERNEL
);
79 trace_xfile_create(xf
);
88 /* Close the file and release all resources. */
93 struct inode
*inode
= file_inode(xf
->file
);
95 trace_xfile_destroy(xf
);
97 lockdep_set_class(&inode
->i_rwsem
, &inode
->i_sb
->s_type
->i_mutex_key
);
103 * Load an object. Since we're treating this file as "memory", any error or
104 * short IO is treated as a failure to allocate memory.
113 struct inode
*inode
= file_inode(xf
->file
);
116 if (count
> MAX_RW_COUNT
)
118 if (inode
->i_sb
->s_maxbytes
- pos
< count
)
121 trace_xfile_load(xf
, pos
, count
);
123 pflags
= memalloc_nofs_save();
129 if (shmem_get_folio(inode
, pos
>> PAGE_SHIFT
, 0, &folio
,
134 * No data stored at this offset, just zero the output
135 * buffer until the next page boundary.
137 len
= min_t(ssize_t
, count
,
138 PAGE_SIZE
- offset_in_page(pos
));
141 if (filemap_check_wb_err(inode
->i_mapping
, 0)) {
147 offset
= offset_in_folio(folio
, pos
);
148 len
= min_t(ssize_t
, count
, folio_size(folio
) - offset
);
149 memcpy(buf
, folio_address(folio
) + offset
, len
);
158 memalloc_nofs_restore(pflags
);
166 * Store an object. Since we're treating this file as "memory", any error or
167 * short IO is treated as a failure to allocate memory.
176 struct inode
*inode
= file_inode(xf
->file
);
179 if (count
> MAX_RW_COUNT
)
181 if (inode
->i_sb
->s_maxbytes
- pos
< count
)
184 trace_xfile_store(xf
, pos
, count
);
187 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
188 * actually allocates a folio instead of erroring out.
190 if (pos
+ count
> i_size_read(inode
))
191 i_size_write(inode
, pos
+ count
);
193 pflags
= memalloc_nofs_save();
199 if (shmem_get_folio(inode
, pos
>> PAGE_SHIFT
, 0, &folio
,
202 if (filemap_check_wb_err(inode
->i_mapping
, 0)) {
208 offset
= offset_in_folio(folio
, pos
);
209 len
= min_t(ssize_t
, count
, folio_size(folio
) - offset
);
210 memcpy(folio_address(folio
) + offset
, buf
, len
);
212 folio_mark_dirty(folio
);
220 memalloc_nofs_restore(pflags
);
227 /* Find the next written area in the xfile data for a given offset. */
235 ret
= vfs_llseek(xf
->file
, pos
, SEEK_DATA
);
236 trace_xfile_seek_data(xf
, pos
, ret
);
241 * Grab the (locked) folio for a memory object. The object cannot span a folio
242 * boundary. Returns the locked folio if successful, NULL if there was no
243 * folio or it didn't cover the range requested, or an ERR_PTR on failure.
252 struct inode
*inode
= file_inode(xf
->file
);
253 struct folio
*folio
= NULL
;
257 if (inode
->i_sb
->s_maxbytes
- pos
< len
)
258 return ERR_PTR(-ENOMEM
);
260 trace_xfile_get_folio(xf
, pos
, len
);
263 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
264 * actually allocates a folio instead of erroring out.
266 if ((flags
& XFILE_ALLOC
) && pos
+ len
> i_size_read(inode
))
267 i_size_write(inode
, pos
+ len
);
269 pflags
= memalloc_nofs_save();
270 error
= shmem_get_folio(inode
, pos
>> PAGE_SHIFT
, 0, &folio
,
271 (flags
& XFILE_ALLOC
) ? SGP_CACHE
: SGP_READ
);
272 memalloc_nofs_restore(pflags
);
274 return ERR_PTR(error
);
279 if (len
> folio_size(folio
) - offset_in_folio(folio
, pos
)) {
285 if (filemap_check_wb_err(inode
->i_mapping
, 0)) {
288 return ERR_PTR(-EIO
);
292 * Mark the folio dirty so that it won't be reclaimed once we drop the
293 * (potentially last) reference in xfile_put_folio.
295 if (flags
& XFILE_ALLOC
)
296 folio_mark_dirty(folio
);
301 * Release the (locked) folio for a memory object.
308 trace_xfile_put_folio(xf
, folio_pos(folio
), folio_size(folio
));
314 /* Discard the page cache that's backing a range of the xfile. */
321 trace_xfile_discard(xf
, pos
, count
);
323 shmem_truncate_range(file_inode(xf
->file
), pos
, pos
+ count
- 1);