module/os/linux/zfs/zpl_super.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or https://opensource.org/licenses/CDDL-1.0.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
  23  * Copyright (c) 2023, Datto Inc. All rights reserved.
  24  */
  25
  26
  27 #include <sys/zfs_znode.h>
  28 #include <sys/zfs_vfsops.h>
  29 #include <sys/zfs_vnops.h>
  30 #include <sys/zfs_ctldir.h>
  31 #include <sys/zpl.h>
  32
  33
  34 static struct inode *
  35 zpl_inode_alloc(struct super_block *sb)
  36 {
  37         struct inode *ip;
  38
  39         VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0);
  40         inode_set_iversion(ip, 1);
  41
  42         return (ip);
  43 }
  44
  45 static void
  46 zpl_inode_destroy(struct inode *ip)
  47 {
  48         ASSERT(atomic_read(&ip->i_count) == 0);
  49         zfs_inode_destroy(ip);
  50 }
  51
  52 /*
  53  * Called from __mark_inode_dirty() to reflect that something in the
  54  * inode has changed.  We use it to ensure the znode system attributes
  55  * are always strictly update to date with respect to the inode.
  56  */
  57 #ifdef HAVE_DIRTY_INODE_WITH_FLAGS
  58 static void
  59 zpl_dirty_inode(struct inode *ip, int flags)
  60 {
  61         fstrans_cookie_t cookie;
  62
  63         cookie = spl_fstrans_mark();
  64         zfs_dirty_inode(ip, flags);
  65         spl_fstrans_unmark(cookie);
  66 }
  67 #else
  68 static void
  69 zpl_dirty_inode(struct inode *ip)
  70 {
  71         fstrans_cookie_t cookie;
  72
  73         cookie = spl_fstrans_mark();
  74         zfs_dirty_inode(ip, 0);
  75         spl_fstrans_unmark(cookie);
  76 }
  77 #endif /* HAVE_DIRTY_INODE_WITH_FLAGS */
  78
  79 /*
  80  * When ->drop_inode() is called its return value indicates if the
  81  * inode should be evicted from the inode cache.  If the inode is
  82  * unhashed and has no links the default policy is to evict it
  83  * immediately.
  84  *
  85  * The ->evict_inode() callback must minimally truncate the inode pages,
  86  * and call clear_inode().  For 2.6.35 and later kernels this will
  87  * simply update the inode state, with the sync occurring before the
  88  * truncate in evict().  For earlier kernels clear_inode() maps to
  89  * end_writeback() which is responsible for completing all outstanding
  90  * write back.  In either case, once this is done it is safe to cleanup
  91  * any remaining inode specific data via zfs_inactive().
  92  * remaining filesystem specific data.
  93  */
  94 static void
  95 zpl_evict_inode(struct inode *ip)
  96 {
  97         fstrans_cookie_t cookie;
  98
  99         cookie = spl_fstrans_mark();
 100         truncate_setsize(ip, 0);
 101         clear_inode(ip);
 102         zfs_inactive(ip);
 103         spl_fstrans_unmark(cookie);
 104 }
 105
 106 static void
 107 zpl_put_super(struct super_block *sb)
 108 {
 109         fstrans_cookie_t cookie;
 110         int error;
 111
 112         cookie = spl_fstrans_mark();
 113         error = -zfs_umount(sb);
 114         spl_fstrans_unmark(cookie);
 115         ASSERT3S(error, <=, 0);
 116 }
 117
 118 static int
 119 zpl_sync_fs(struct super_block *sb, int wait)
 120 {
 121         fstrans_cookie_t cookie;
 122         cred_t *cr = CRED();
 123         int error;
 124
 125         crhold(cr);
 126         cookie = spl_fstrans_mark();
 127         error = -zfs_sync(sb, wait, cr);
 128         spl_fstrans_unmark(cookie);
 129         crfree(cr);
 130         ASSERT3S(error, <=, 0);
 131
 132         return (error);
 133 }
 134
 135 static int
 136 zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
 137 {
 138         fstrans_cookie_t cookie;
 139         int error;
 140
 141         cookie = spl_fstrans_mark();
 142         error = -zfs_statvfs(dentry->d_inode, statp);
 143         spl_fstrans_unmark(cookie);
 144         ASSERT3S(error, <=, 0);
 145
 146         /*
 147          * If required by a 32-bit system call, dynamically scale the
 148          * block size up to 16MiB and decrease the block counts.  This
 149          * allows for a maximum size of 64EiB to be reported.  The file
 150          * counts must be artificially capped at 2^32-1.
 151          */
 152         if (unlikely(zpl_is_32bit_api())) {
 153                 while (statp->f_blocks > UINT32_MAX &&
 154                     statp->f_bsize < SPA_MAXBLOCKSIZE) {
 155                         statp->f_frsize <<= 1;
 156                         statp->f_bsize <<= 1;
 157
 158                         statp->f_blocks >>= 1;
 159                         statp->f_bfree >>= 1;
 160                         statp->f_bavail >>= 1;
 161                 }
 162
 163                 uint64_t usedobjs = statp->f_files - statp->f_ffree;
 164                 statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs);
 165                 statp->f_files = statp->f_ffree + usedobjs;
 166         }
 167
 168         return (error);
 169 }
 170
 171 static int
 172 zpl_remount_fs(struct super_block *sb, int *flags, char *data)
 173 {
 174         zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data };
 175         fstrans_cookie_t cookie;
 176         int error;
 177
 178         cookie = spl_fstrans_mark();
 179         error = -zfs_remount(sb, flags, &zm);
 180         spl_fstrans_unmark(cookie);
 181         ASSERT3S(error, <=, 0);
 182
 183         return (error);
 184 }
 185
 186 static int
 187 __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs)
 188 {
 189         int error;
 190         if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
 191                 return (error);
 192
 193         char *fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 194         dmu_objset_name(zfsvfs->z_os, fsname);
 195
 196         for (int i = 0; fsname[i] != 0; i++) {
 197                 /*
 198                  * Spaces in the dataset name must be converted to their
 199                  * octal escape sequence for getmntent(3) to correctly
 200                  * parse then fsname portion of /proc/self/mounts.
 201                  */
 202                 if (fsname[i] == ' ') {
 203                         seq_puts(seq, "\\040");
 204                 } else {
 205                         seq_putc(seq, fsname[i]);
 206                 }
 207         }
 208
 209         kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
 210
 211         zpl_exit(zfsvfs, FTAG);
 212
 213         return (0);
 214 }
 215
 216 static int
 217 zpl_show_devname(struct seq_file *seq, struct dentry *root)
 218 {
 219         return (__zpl_show_devname(seq, root->d_sb->s_fs_info));
 220 }
 221
 222 static int
 223 __zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs)
 224 {
 225         seq_printf(seq, ",%s",
 226             zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
 227
 228 #ifdef CONFIG_FS_POSIX_ACL
 229         switch (zfsvfs->z_acl_type) {
 230         case ZFS_ACLTYPE_POSIX:
 231                 seq_puts(seq, ",posixacl");
 232                 break;
 233         default:
 234                 seq_puts(seq, ",noacl");
 235                 break;
 236         }
 237 #endif /* CONFIG_FS_POSIX_ACL */
 238
 239         switch (zfsvfs->z_case) {
 240         case ZFS_CASE_SENSITIVE:
 241                 seq_puts(seq, ",casesensitive");
 242                 break;
 243         case ZFS_CASE_INSENSITIVE:
 244                 seq_puts(seq, ",caseinsensitive");
 245                 break;
 246         default:
 247                 seq_puts(seq, ",casemixed");
 248                 break;
 249         }
 250
 251         return (0);
 252 }
 253
 254 static int
 255 zpl_show_options(struct seq_file *seq, struct dentry *root)
 256 {
 257         return (__zpl_show_options(seq, root->d_sb->s_fs_info));
 258 }
 259
 260 static int
 261 zpl_fill_super(struct super_block *sb, void *data, int silent)
 262 {
 263         zfs_mnt_t *zm = (zfs_mnt_t *)data;
 264         fstrans_cookie_t cookie;
 265         int error;
 266
 267         cookie = spl_fstrans_mark();
 268         error = -zfs_domount(sb, zm, silent);
 269         spl_fstrans_unmark(cookie);
 270         ASSERT3S(error, <=, 0);
 271
 272         return (error);
 273 }
 274
 275 static int
 276 zpl_test_super(struct super_block *s, void *data)
 277 {
 278         zfsvfs_t *zfsvfs = s->s_fs_info;
 279         objset_t *os = data;
 280         /*
 281          * If the os doesn't match the z_os in the super_block, assume it is
 282          * not a match. Matching would imply a multimount of a dataset. It is
 283          * possible that during a multimount, there is a simultaneous operation
 284          * that changes the z_os, e.g., rollback, where the match will be
 285          * missed, but in that case the user will get an EBUSY.
 286          */
 287         return (zfsvfs != NULL && os == zfsvfs->z_os);
 288 }
 289
 290 static struct super_block *
 291 zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
 292 {
 293         struct super_block *s;
 294         objset_t *os;
 295         int err;
 296
 297         err = dmu_objset_hold(zm->mnt_osname, FTAG, &os);
 298         if (err)
 299                 return (ERR_PTR(-err));
 300
 301         /*
 302          * The dsl pool lock must be released prior to calling sget().
 303          * It is possible sget() may block on the lock in grab_super()
 304          * while deactivate_super() holds that same lock and waits for
 305          * a txg sync.  If the dsl_pool lock is held over sget()
 306          * this can prevent the pool sync and cause a deadlock.
 307          */
 308         dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
 309         dsl_pool_rele(dmu_objset_pool(os), FTAG);
 310
 311         s = sget(fs_type, zpl_test_super, set_anon_super, flags, os);
 312
 313         /*
 314          * Recheck with the lock held to prevent mounting the wrong dataset
 315          * since z_os can be stale when the teardown lock is held.
 316          *
 317          * We can't do this in zpl_test_super in since it's under spinlock and
 318          * also s_umount lock is not held there so it would race with
 319          * zfs_umount and zfsvfs can be freed.
 320          */
 321         if (!IS_ERR(s) && s->s_fs_info != NULL) {
 322                 zfsvfs_t *zfsvfs = s->s_fs_info;
 323                 if (zpl_enter(zfsvfs, FTAG) == 0) {
 324                         if (os != zfsvfs->z_os)
 325                                 err = -SET_ERROR(EBUSY);
 326                         zpl_exit(zfsvfs, FTAG);
 327                 } else {
 328                         err = -SET_ERROR(EBUSY);
 329                 }
 330         }
 331         dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
 332         dsl_dataset_rele(dmu_objset_ds(os), FTAG);
 333
 334         if (IS_ERR(s))
 335                 return (ERR_CAST(s));
 336
 337         if (err) {
 338                 deactivate_locked_super(s);
 339                 return (ERR_PTR(err));
 340         }
 341
 342         if (s->s_root == NULL) {
 343                 err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
 344                 if (err) {
 345                         deactivate_locked_super(s);
 346                         return (ERR_PTR(err));
 347                 }
 348                 s->s_flags |= SB_ACTIVE;
 349         } else if ((flags ^ s->s_flags) & SB_RDONLY) {
 350                 deactivate_locked_super(s);
 351                 return (ERR_PTR(-EBUSY));
 352         }
 353
 354         return (s);
 355 }
 356
 357 static struct dentry *
 358 zpl_mount(struct file_system_type *fs_type, int flags,
 359     const char *osname, void *data)
 360 {
 361         zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
 362
 363         struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm);
 364         if (IS_ERR(sb))
 365                 return (ERR_CAST(sb));
 366
 367         return (dget(sb->s_root));
 368 }
 369
 370 static void
 371 zpl_kill_sb(struct super_block *sb)
 372 {
 373         zfs_preumount(sb);
 374         kill_anon_super(sb);
 375 }
 376
 377 void
 378 zpl_prune_sb(uint64_t nr_to_scan, void *arg)
 379 {
 380         struct super_block *sb = (struct super_block *)arg;
 381         int objects = 0;
 382
 383         (void) -zfs_prune(sb, nr_to_scan, &objects);
 384 }
 385
 386 const struct super_operations zpl_super_operations = {
 387         .alloc_inode            = zpl_inode_alloc,
 388         .destroy_inode          = zpl_inode_destroy,
 389         .dirty_inode            = zpl_dirty_inode,
 390         .write_inode            = NULL,
 391         .evict_inode            = zpl_evict_inode,
 392         .put_super              = zpl_put_super,
 393         .sync_fs                = zpl_sync_fs,
 394         .statfs                 = zpl_statfs,
 395         .remount_fs             = zpl_remount_fs,
 396         .show_devname           = zpl_show_devname,
 397         .show_options           = zpl_show_options,
 398         .show_stats             = NULL,
 399 };
 400
 401 struct file_system_type zpl_fs_type = {
 402         .owner                  = THIS_MODULE,
 403         .name                   = ZFS_DRIVER,
 404 #if defined(HAVE_IDMAP_MNT_API)
 405         .fs_flags               = FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
 406 #else
 407         .fs_flags               = FS_USERNS_MOUNT,
 408 #endif
 409         .mount                  = zpl_mount,
 410         .kill_sb                = zpl_kill_sb,
 411 };