fs/xfs/scrub/xfile.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
   4  * Author: Darrick J. Wong <djwong@kernel.org>
   5  */
   6 #include "xfs.h"
   7 #include "xfs_fs.h"
   8 #include "xfs_shared.h"
   9 #include "xfs_format.h"
  10 #include "xfs_log_format.h"
  11 #include "xfs_trans_resv.h"
  12 #include "xfs_mount.h"
  13 #include "scrub/scrub.h"
  14 #include "scrub/xfile.h"
  15 #include "scrub/xfarray.h"
  16 #include "scrub/trace.h"
  17 #include <linux/shmem_fs.h>
  18
  19 /*
  20  * Swappable Temporary Memory
  21  * ==========================
  22  *
  23  * Online checking sometimes needs to be able to stage a large amount of data
  24  * in memory.  This information might not fit in the available memory and it
  25  * doesn't all need to be accessible at all times.  In other words, we want an
  26  * indexed data buffer to store data that can be paged out.
  27  *
  28  * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
  29  * requirements.  Therefore, the xfile mechanism uses an unlinked shmem file to
  30  * store our staging data.  This file is not installed in the file descriptor
  31  * table so that user programs cannot access the data, which means that the
  32  * xfile must be freed with xfile_destroy.
  33  *
  34  * xfiles assume that the caller will handle all required concurrency
  35  * management; standard vfs locks (freezer and inode) are not taken.  Reads
  36  * and writes are satisfied directly from the page cache.
  37  */
  38
  39 /*
  40  * xfiles must not be exposed to userspace and require upper layers to
  41  * coordinate access to the one handle returned by the constructor, so
  42  * establish a separate lock class for xfiles to avoid confusing lockdep.
  43  */
  44 static struct lock_class_key xfile_i_mutex_key;
  45
  46 /*
  47  * Create an xfile of the given size.  The description will be used in the
  48  * trace output.
  49  */
  50 int
  51 xfile_create(
  52         const char              *description,
  53         loff_t                  isize,
  54         struct xfile            **xfilep)
  55 {
  56         struct inode            *inode;
  57         struct xfile            *xf;
  58         int                     error;
  59
  60         xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS);
  61         if (!xf)
  62                 return -ENOMEM;
  63
  64         xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE);
  65         if (IS_ERR(xf->file)) {
  66                 error = PTR_ERR(xf->file);
  67                 goto out_xfile;
  68         }
  69
  70         inode = file_inode(xf->file);
  71         lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);
  72
  73         /*
  74          * We don't want to bother with kmapping data during repair, so don't
  75          * allow highmem pages to back this mapping.
  76          */
  77         mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
  78
  79         trace_xfile_create(xf);
  80
  81         *xfilep = xf;
  82         return 0;
  83 out_xfile:
  84         kfree(xf);
  85         return error;
  86 }
  87
  88 /* Close the file and release all resources. */
  89 void
  90 xfile_destroy(
  91         struct xfile            *xf)
  92 {
  93         struct inode            *inode = file_inode(xf->file);
  94
  95         trace_xfile_destroy(xf);
  96
  97         lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key);
  98         fput(xf->file);
  99         kfree(xf);
 100 }
 101
 102 /*
 103  * Load an object.  Since we're treating this file as "memory", any error or
 104  * short IO is treated as a failure to allocate memory.
 105  */
 106 int
 107 xfile_load(
 108         struct xfile            *xf,
 109         void                    *buf,
 110         size_t                  count,
 111         loff_t                  pos)
 112 {
 113         struct inode            *inode = file_inode(xf->file);
 114         unsigned int            pflags;
 115
 116         if (count > MAX_RW_COUNT)
 117                 return -ENOMEM;
 118         if (inode->i_sb->s_maxbytes - pos < count)
 119                 return -ENOMEM;
 120
 121         trace_xfile_load(xf, pos, count);
 122
 123         pflags = memalloc_nofs_save();
 124         while (count > 0) {
 125                 struct folio    *folio;
 126                 unsigned int    len;
 127                 unsigned int    offset;
 128
 129                 if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
 130                                 SGP_READ) < 0)
 131                         break;
 132                 if (!folio) {
 133                         /*
 134                          * No data stored at this offset, just zero the output
 135                          * buffer until the next page boundary.
 136                          */
 137                         len = min_t(ssize_t, count,
 138                                 PAGE_SIZE - offset_in_page(pos));
 139                         memset(buf, 0, len);
 140                 } else {
 141                         if (filemap_check_wb_err(inode->i_mapping, 0)) {
 142                                 folio_unlock(folio);
 143                                 folio_put(folio);
 144                                 break;
 145                         }
 146
 147                         offset = offset_in_folio(folio, pos);
 148                         len = min_t(ssize_t, count, folio_size(folio) - offset);
 149                         memcpy(buf, folio_address(folio) + offset, len);
 150
 151                         folio_unlock(folio);
 152                         folio_put(folio);
 153                 }
 154                 count -= len;
 155                 pos += len;
 156                 buf += len;
 157         }
 158         memalloc_nofs_restore(pflags);
 159
 160         if (count)
 161                 return -ENOMEM;
 162         return 0;
 163 }
 164
 165 /*
 166  * Store an object.  Since we're treating this file as "memory", any error or
 167  * short IO is treated as a failure to allocate memory.
 168  */
 169 int
 170 xfile_store(
 171         struct xfile            *xf,
 172         const void              *buf,
 173         size_t                  count,
 174         loff_t                  pos)
 175 {
 176         struct inode            *inode = file_inode(xf->file);
 177         unsigned int            pflags;
 178
 179         if (count > MAX_RW_COUNT)
 180                 return -ENOMEM;
 181         if (inode->i_sb->s_maxbytes - pos < count)
 182                 return -ENOMEM;
 183
 184         trace_xfile_store(xf, pos, count);
 185
 186         /*
 187          * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
 188          * actually allocates a folio instead of erroring out.
 189          */
 190         if (pos + count > i_size_read(inode))
 191                 i_size_write(inode, pos + count);
 192
 193         pflags = memalloc_nofs_save();
 194         while (count > 0) {
 195                 struct folio    *folio;
 196                 unsigned int    len;
 197                 unsigned int    offset;
 198
 199                 if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
 200                                 SGP_CACHE) < 0)
 201                         break;
 202                 if (filemap_check_wb_err(inode->i_mapping, 0)) {
 203                         folio_unlock(folio);
 204                         folio_put(folio);
 205                         break;
 206                 }
 207
 208                 offset = offset_in_folio(folio, pos);
 209                 len = min_t(ssize_t, count, folio_size(folio) - offset);
 210                 memcpy(folio_address(folio) + offset, buf, len);
 211
 212                 folio_mark_dirty(folio);
 213                 folio_unlock(folio);
 214                 folio_put(folio);
 215
 216                 count -= len;
 217                 pos += len;
 218                 buf += len;
 219         }
 220         memalloc_nofs_restore(pflags);
 221
 222         if (count)
 223                 return -ENOMEM;
 224         return 0;
 225 }
 226
 227 /* Find the next written area in the xfile data for a given offset. */
 228 loff_t
 229 xfile_seek_data(
 230         struct xfile            *xf,
 231         loff_t                  pos)
 232 {
 233         loff_t                  ret;
 234
 235         ret = vfs_llseek(xf->file, pos, SEEK_DATA);
 236         trace_xfile_seek_data(xf, pos, ret);
 237         return ret;
 238 }
 239
 240 /*
 241  * Grab the (locked) folio for a memory object.  The object cannot span a folio
 242  * boundary.  Returns the locked folio if successful, NULL if there was no
 243  * folio or it didn't cover the range requested, or an ERR_PTR on failure.
 244  */
 245 struct folio *
 246 xfile_get_folio(
 247         struct xfile            *xf,
 248         loff_t                  pos,
 249         size_t                  len,
 250         unsigned int            flags)
 251 {
 252         struct inode            *inode = file_inode(xf->file);
 253         struct folio            *folio = NULL;
 254         unsigned int            pflags;
 255         int                     error;
 256
 257         if (inode->i_sb->s_maxbytes - pos < len)
 258                 return ERR_PTR(-ENOMEM);
 259
 260         trace_xfile_get_folio(xf, pos, len);
 261
 262         /*
 263          * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
 264          * actually allocates a folio instead of erroring out.
 265          */
 266         if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode))
 267                 i_size_write(inode, pos + len);
 268
 269         pflags = memalloc_nofs_save();
 270         error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
 271                         (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
 272         memalloc_nofs_restore(pflags);
 273         if (error)
 274                 return ERR_PTR(error);
 275
 276         if (!folio)
 277                 return NULL;
 278
 279         if (len > folio_size(folio) - offset_in_folio(folio, pos)) {
 280                 folio_unlock(folio);
 281                 folio_put(folio);
 282                 return NULL;
 283         }
 284
 285         if (filemap_check_wb_err(inode->i_mapping, 0)) {
 286                 folio_unlock(folio);
 287                 folio_put(folio);
 288                 return ERR_PTR(-EIO);
 289         }
 290
 291         /*
 292          * Mark the folio dirty so that it won't be reclaimed once we drop the
 293          * (potentially last) reference in xfile_put_folio.
 294          */
 295         if (flags & XFILE_ALLOC)
 296                 folio_mark_dirty(folio);
 297         return folio;
 298 }
 299
 300 /*
 301  * Release the (locked) folio for a memory object.
 302  */
 303 void
 304 xfile_put_folio(
 305         struct xfile            *xf,
 306         struct folio            *folio)
 307 {
 308         trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio));
 309
 310         folio_unlock(folio);
 311         folio_put(folio);
 312 }
 313
 314 /* Discard the page cache that's backing a range of the xfile. */
 315 void
 316 xfile_discard(
 317         struct xfile            *xf,
 318         loff_t                  pos,
 319         u64                     count)
 320 {
 321         trace_xfile_discard(xf, pos, count);
 322
 323         shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1);
 324 }