module/os/linux/zfs/zpl_file.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or https://opensource.org/licenses/CDDL-1.0.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
  23  * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
  24  */
  25
  26
  27 #ifdef CONFIG_COMPAT
  28 #include <linux/compat.h>
  29 #endif
  30 #include <linux/fs.h>
  31 #include <sys/file.h>
  32 #include <sys/dmu_objset.h>
  33 #include <sys/zfs_znode.h>
  34 #include <sys/zfs_vfsops.h>
  35 #include <sys/zfs_vnops.h>
  36 #include <sys/zfs_project.h>
  37 #if defined(HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS) || \
  38     defined(HAVE_VFS_FILEMAP_DIRTY_FOLIO)
  39 #include <linux/pagemap.h>
  40 #endif
  41 #ifdef HAVE_FILE_FADVISE
  42 #include <linux/fadvise.h>
  43 #endif
  44 #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
  45 #include <linux/writeback.h>
  46 #endif
  47
  48 /*
  49  * When using fallocate(2) to preallocate space, inflate the requested
  50  * capacity check by 10% to account for the required metadata blocks.
  51  */
  52 static unsigned int zfs_fallocate_reserve_percent = 110;
  53
  54 static int
  55 zpl_open(struct inode *ip, struct file *filp)
  56 {
  57         cred_t *cr = CRED();
  58         int error;
  59         fstrans_cookie_t cookie;
  60
  61         error = generic_file_open(ip, filp);
  62         if (error)
  63                 return (error);
  64
  65         crhold(cr);
  66         cookie = spl_fstrans_mark();
  67         error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
  68         spl_fstrans_unmark(cookie);
  69         crfree(cr);
  70         ASSERT3S(error, <=, 0);
  71
  72         return (error);
  73 }
  74
  75 static int
  76 zpl_release(struct inode *ip, struct file *filp)
  77 {
  78         cred_t *cr = CRED();
  79         int error;
  80         fstrans_cookie_t cookie;
  81
  82         cookie = spl_fstrans_mark();
  83         if (ITOZ(ip)->z_atime_dirty)
  84                 zfs_mark_inode_dirty(ip);
  85
  86         crhold(cr);
  87         error = -zfs_close(ip, filp->f_flags, cr);
  88         spl_fstrans_unmark(cookie);
  89         crfree(cr);
  90         ASSERT3S(error, <=, 0);
  91
  92         return (error);
  93 }
  94
  95 static int
  96 zpl_iterate(struct file *filp, zpl_dir_context_t *ctx)
  97 {
  98         cred_t *cr = CRED();
  99         int error;
 100         fstrans_cookie_t cookie;
 101
 102         crhold(cr);
 103         cookie = spl_fstrans_mark();
 104         error = -zfs_readdir(file_inode(filp), ctx, cr);
 105         spl_fstrans_unmark(cookie);
 106         crfree(cr);
 107         ASSERT3S(error, <=, 0);
 108
 109         return (error);
 110 }
 111
 112 #if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
 113 static int
 114 zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
 115 {
 116         zpl_dir_context_t ctx =
 117             ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
 118         int error;
 119
 120         error = zpl_iterate(filp, &ctx);
 121         filp->f_pos = ctx.pos;
 122
 123         return (error);
 124 }
 125 #endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
 126
 127 #if defined(HAVE_FSYNC_WITHOUT_DENTRY)
 128 /*
 129  * Linux 2.6.35 - 3.0 API,
 130  * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed
 131  * redundant.  The dentry is still accessible via filp->f_path.dentry,
 132  * and we are guaranteed that filp will never be NULL.
 133  */
 134 static int
 135 zpl_fsync(struct file *filp, int datasync)
 136 {
 137         struct inode *inode = filp->f_mapping->host;
 138         cred_t *cr = CRED();
 139         int error;
 140         fstrans_cookie_t cookie;
 141
 142         crhold(cr);
 143         cookie = spl_fstrans_mark();
 144         error = -zfs_fsync(ITOZ(inode), datasync, cr);
 145         spl_fstrans_unmark(cookie);
 146         crfree(cr);
 147         ASSERT3S(error, <=, 0);
 148
 149         return (error);
 150 }
 151
 152 #ifdef HAVE_FILE_AIO_FSYNC
 153 static int
 154 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
 155 {
 156         return (zpl_fsync(kiocb->ki_filp, datasync));
 157 }
 158 #endif
 159
 160 #elif defined(HAVE_FSYNC_RANGE)
 161 /*
 162  * Linux 3.1 API,
 163  * As of 3.1 the responsibility to call filemap_write_and_wait_range() has
 164  * been pushed down in to the .fsync() vfs hook.  Additionally, the i_mutex
 165  * lock is no longer held by the caller, for zfs we don't require the lock
 166  * to be held so we don't acquire it.
 167  */
 168 static int
 169 zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 170 {
 171         struct inode *inode = filp->f_mapping->host;
 172         znode_t *zp = ITOZ(inode);
 173         zfsvfs_t *zfsvfs = ITOZSB(inode);
 174         cred_t *cr = CRED();
 175         int error;
 176         fstrans_cookie_t cookie;
 177
 178         /*
 179          * The variables z_sync_writes_cnt and z_async_writes_cnt work in
 180          * tandem so that sync writes can detect if there are any non-sync
 181          * writes going on and vice-versa. The "vice-versa" part to this logic
 182          * is located in zfs_putpage() where non-sync writes check if there are
 183          * any ongoing sync writes. If any sync and non-sync writes overlap,
 184          * we do a commit to complete the non-sync writes since the latter can
 185          * potentially take several seconds to complete and thus block sync
 186          * writes in the upcoming call to filemap_write_and_wait_range().
 187          */
 188         atomic_inc_32(&zp->z_sync_writes_cnt);
 189         /*
 190          * If the following check does not detect an overlapping non-sync write
 191          * (say because it's just about to start), then it is guaranteed that
 192          * the non-sync write will detect this sync write. This is because we
 193          * always increment z_sync_writes_cnt / z_async_writes_cnt before doing
 194          * the check on z_async_writes_cnt / z_sync_writes_cnt here and in
 195          * zfs_putpage() respectively.
 196          */
 197         if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
 198                 if ((error = zpl_enter(zfsvfs, FTAG)) != 0) {
 199                         atomic_dec_32(&zp->z_sync_writes_cnt);
 200                         return (error);
 201                 }
 202                 zil_commit(zfsvfs->z_log, zp->z_id);
 203                 zpl_exit(zfsvfs, FTAG);
 204         }
 205
 206         error = filemap_write_and_wait_range(inode->i_mapping, start, end);
 207
 208         /*
 209          * The sync write is not complete yet but we decrement
 210          * z_sync_writes_cnt since zfs_fsync() increments and decrements
 211          * it internally. If a non-sync write starts just after the decrement
 212          * operation but before we call zfs_fsync(), it may not detect this
 213          * overlapping sync write but it does not matter since we have already
 214          * gone past filemap_write_and_wait_range() and we won't block due to
 215          * the non-sync write.
 216          */
 217         atomic_dec_32(&zp->z_sync_writes_cnt);
 218
 219         if (error)
 220                 return (error);
 221
 222         crhold(cr);
 223         cookie = spl_fstrans_mark();
 224         error = -zfs_fsync(zp, datasync, cr);
 225         spl_fstrans_unmark(cookie);
 226         crfree(cr);
 227         ASSERT3S(error, <=, 0);
 228
 229         return (error);
 230 }
 231
 232 #ifdef HAVE_FILE_AIO_FSYNC
 233 static int
 234 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
 235 {
 236         return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos, -1, datasync));
 237 }
 238 #endif
 239
 240 #else
 241 #error "Unsupported fops->fsync() implementation"
 242 #endif
 243
 244 static inline int
 245 zfs_io_flags(struct kiocb *kiocb)
 246 {
 247         int flags = 0;
 248
 249 #if defined(IOCB_DSYNC)
 250         if (kiocb->ki_flags & IOCB_DSYNC)
 251                 flags |= O_DSYNC;
 252 #endif
 253 #if defined(IOCB_SYNC)
 254         if (kiocb->ki_flags & IOCB_SYNC)
 255                 flags |= O_SYNC;
 256 #endif
 257 #if defined(IOCB_APPEND)
 258         if (kiocb->ki_flags & IOCB_APPEND)
 259                 flags |= O_APPEND;
 260 #endif
 261 #if defined(IOCB_DIRECT)
 262         if (kiocb->ki_flags & IOCB_DIRECT)
 263                 flags |= O_DIRECT;
 264 #endif
 265         return (flags);
 266 }
 267
 268 /*
 269  * If relatime is enabled, call file_accessed() if zfs_relatime_need_update()
 270  * is true.  This is needed since datasets with inherited "relatime" property
 271  * aren't necessarily mounted with the MNT_RELATIME flag (e.g. after
 272  * `zfs set relatime=...`), which is what relatime test in VFS by
 273  * relatime_need_update() is based on.
 274  */
 275 static inline void
 276 zpl_file_accessed(struct file *filp)
 277 {
 278         struct inode *ip = filp->f_mapping->host;
 279
 280         if (!IS_NOATIME(ip) && ITOZSB(ip)->z_relatime) {
 281                 if (zfs_relatime_need_update(ip))
 282                         file_accessed(filp);
 283         } else {
 284                 file_accessed(filp);
 285         }
 286 }
 287
 288 #if defined(HAVE_VFS_RW_ITERATE)
 289
 290 /*
 291  * When HAVE_VFS_IOV_ITER is defined the iov_iter structure supports
 292  * iovecs, kvevs, bvecs and pipes, plus all the required interfaces to
 293  * manipulate the iov_iter are available.  In which case the full iov_iter
 294  * can be attached to the uio and correctly handled in the lower layers.
 295  * Otherwise, for older kernels extract the iovec and pass it instead.
 296  */
 297 static void
 298 zpl_uio_init(zfs_uio_t *uio, struct kiocb *kiocb, struct iov_iter *to,
 299     loff_t pos, ssize_t count, size_t skip)
 300 {
 301 #if defined(HAVE_VFS_IOV_ITER)
 302         zfs_uio_iov_iter_init(uio, to, pos, count, skip);
 303 #else
 304         zfs_uio_iovec_init(uio, zfs_uio_iter_iov(to), to->nr_segs, pos,
 305             zfs_uio_iov_iter_type(to) & ITER_KVEC ?
 306             UIO_SYSSPACE : UIO_USERSPACE,
 307             count, skip);
 308 #endif
 309 }
 310
 311 static ssize_t
 312 zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
 313 {
 314         cred_t *cr = CRED();
 315         fstrans_cookie_t cookie;
 316         struct file *filp = kiocb->ki_filp;
 317         ssize_t count = iov_iter_count(to);
 318         zfs_uio_t uio;
 319
 320         zpl_uio_init(&uio, kiocb, to, kiocb->ki_pos, count, 0);
 321
 322         crhold(cr);
 323         cookie = spl_fstrans_mark();
 324
 325         int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio,
 326             filp->f_flags | zfs_io_flags(kiocb), cr);
 327
 328         spl_fstrans_unmark(cookie);
 329         crfree(cr);
 330
 331         if (error < 0)
 332                 return (error);
 333
 334         ssize_t read = count - uio.uio_resid;
 335         kiocb->ki_pos += read;
 336
 337         zpl_file_accessed(filp);
 338
 339         return (read);
 340 }
 341
 342 static inline ssize_t
 343 zpl_generic_write_checks(struct kiocb *kiocb, struct iov_iter *from,
 344     size_t *countp)
 345 {
 346 #ifdef HAVE_GENERIC_WRITE_CHECKS_KIOCB
 347         ssize_t ret = generic_write_checks(kiocb, from);
 348         if (ret <= 0)
 349                 return (ret);
 350
 351         *countp = ret;
 352 #else
 353         struct file *file = kiocb->ki_filp;
 354         struct address_space *mapping = file->f_mapping;
 355         struct inode *ip = mapping->host;
 356         int isblk = S_ISBLK(ip->i_mode);
 357
 358         *countp = iov_iter_count(from);
 359         ssize_t ret = generic_write_checks(file, &kiocb->ki_pos, countp, isblk);
 360         if (ret)
 361                 return (ret);
 362 #endif
 363
 364         return (0);
 365 }
 366
 367 static ssize_t
 368 zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
 369 {
 370         cred_t *cr = CRED();
 371         fstrans_cookie_t cookie;
 372         struct file *filp = kiocb->ki_filp;
 373         struct inode *ip = filp->f_mapping->host;
 374         zfs_uio_t uio;
 375         size_t count = 0;
 376         ssize_t ret;
 377
 378         ret = zpl_generic_write_checks(kiocb, from, &count);
 379         if (ret)
 380                 return (ret);
 381
 382         zpl_uio_init(&uio, kiocb, from, kiocb->ki_pos, count, from->iov_offset);
 383
 384         crhold(cr);
 385         cookie = spl_fstrans_mark();
 386
 387         int error = -zfs_write(ITOZ(ip), &uio,
 388             filp->f_flags | zfs_io_flags(kiocb), cr);
 389
 390         spl_fstrans_unmark(cookie);
 391         crfree(cr);
 392
 393         if (error < 0)
 394                 return (error);
 395
 396         ssize_t wrote = count - uio.uio_resid;
 397         kiocb->ki_pos += wrote;
 398
 399         return (wrote);
 400 }
 401
 402 #else /* !HAVE_VFS_RW_ITERATE */
 403
 404 static ssize_t
 405 zpl_aio_read(struct kiocb *kiocb, const struct iovec *iov,
 406     unsigned long nr_segs, loff_t pos)
 407 {
 408         cred_t *cr = CRED();
 409         fstrans_cookie_t cookie;
 410         struct file *filp = kiocb->ki_filp;
 411         size_t count;
 412         ssize_t ret;
 413
 414         ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
 415         if (ret)
 416                 return (ret);
 417
 418         zfs_uio_t uio;
 419         zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE,
 420             count, 0);
 421
 422         crhold(cr);
 423         cookie = spl_fstrans_mark();
 424
 425         int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio,
 426             filp->f_flags | zfs_io_flags(kiocb), cr);
 427
 428         spl_fstrans_unmark(cookie);
 429         crfree(cr);
 430
 431         if (error < 0)
 432                 return (error);
 433
 434         ssize_t read = count - uio.uio_resid;
 435         kiocb->ki_pos += read;
 436
 437         zpl_file_accessed(filp);
 438
 439         return (read);
 440 }
 441
 442 static ssize_t
 443 zpl_aio_write(struct kiocb *kiocb, const struct iovec *iov,
 444     unsigned long nr_segs, loff_t pos)
 445 {
 446         cred_t *cr = CRED();
 447         fstrans_cookie_t cookie;
 448         struct file *filp = kiocb->ki_filp;
 449         struct inode *ip = filp->f_mapping->host;
 450         size_t count;
 451         ssize_t ret;
 452
 453         ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
 454         if (ret)
 455                 return (ret);
 456
 457         ret = generic_write_checks(filp, &pos, &count, S_ISBLK(ip->i_mode));
 458         if (ret)
 459                 return (ret);
 460
 461         kiocb->ki_pos = pos;
 462
 463         zfs_uio_t uio;
 464         zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE,
 465             count, 0);
 466
 467         crhold(cr);
 468         cookie = spl_fstrans_mark();
 469
 470         int error = -zfs_write(ITOZ(ip), &uio,
 471             filp->f_flags | zfs_io_flags(kiocb), cr);
 472
 473         spl_fstrans_unmark(cookie);
 474         crfree(cr);
 475
 476         if (error < 0)
 477                 return (error);
 478
 479         ssize_t wrote = count - uio.uio_resid;
 480         kiocb->ki_pos += wrote;
 481
 482         return (wrote);
 483 }
 484 #endif /* HAVE_VFS_RW_ITERATE */
 485
 486 #if defined(HAVE_VFS_RW_ITERATE)
 487 static ssize_t
 488 zpl_direct_IO_impl(int rw, struct kiocb *kiocb, struct iov_iter *iter)
 489 {
 490         if (rw == WRITE)
 491                 return (zpl_iter_write(kiocb, iter));
 492         else
 493                 return (zpl_iter_read(kiocb, iter));
 494 }
 495 #if defined(HAVE_VFS_DIRECT_IO_ITER)
 496 static ssize_t
 497 zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter)
 498 {
 499         return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter));
 500 }
 501 #elif defined(HAVE_VFS_DIRECT_IO_ITER_OFFSET)
 502 static ssize_t
 503 zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter, loff_t pos)
 504 {
 505         ASSERT3S(pos, ==, kiocb->ki_pos);
 506         return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter));
 507 }
 508 #elif defined(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET)
 509 static ssize_t
 510 zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos)
 511 {
 512         ASSERT3S(pos, ==, kiocb->ki_pos);
 513         return (zpl_direct_IO_impl(rw, kiocb, iter));
 514 }
 515 #else
 516 #error "Unknown direct IO interface"
 517 #endif
 518
 519 #else /* HAVE_VFS_RW_ITERATE */
 520
 521 #if defined(HAVE_VFS_DIRECT_IO_IOVEC)
 522 static ssize_t
 523 zpl_direct_IO(int rw, struct kiocb *kiocb, const struct iovec *iov,
 524     loff_t pos, unsigned long nr_segs)
 525 {
 526         if (rw == WRITE)
 527                 return (zpl_aio_write(kiocb, iov, nr_segs, pos));
 528         else
 529                 return (zpl_aio_read(kiocb, iov, nr_segs, pos));
 530 }
 531 #elif defined(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET)
 532 static ssize_t
 533 zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos)
 534 {
 535         const struct iovec *iovp = iov_iter_iovec(iter);
 536         unsigned long nr_segs = iter->nr_segs;
 537
 538         ASSERT3S(pos, ==, kiocb->ki_pos);
 539         if (rw == WRITE)
 540                 return (zpl_aio_write(kiocb, iovp, nr_segs, pos));
 541         else
 542                 return (zpl_aio_read(kiocb, iovp, nr_segs, pos));
 543 }
 544 #else
 545 #error "Unknown direct IO interface"
 546 #endif
 547
 548 #endif /* HAVE_VFS_RW_ITERATE */
 549
 550 static loff_t
 551 zpl_llseek(struct file *filp, loff_t offset, int whence)
 552 {
 553 #if defined(SEEK_HOLE) && defined(SEEK_DATA)
 554         fstrans_cookie_t cookie;
 555
 556         if (whence == SEEK_DATA || whence == SEEK_HOLE) {
 557                 struct inode *ip = filp->f_mapping->host;
 558                 loff_t maxbytes = ip->i_sb->s_maxbytes;
 559                 loff_t error;
 560
 561                 spl_inode_lock_shared(ip);
 562                 cookie = spl_fstrans_mark();
 563                 error = -zfs_holey(ITOZ(ip), whence, &offset);
 564                 spl_fstrans_unmark(cookie);
 565                 if (error == 0)
 566                         error = lseek_execute(filp, ip, offset, maxbytes);
 567                 spl_inode_unlock_shared(ip);
 568
 569                 return (error);
 570         }
 571 #endif /* SEEK_HOLE && SEEK_DATA */
 572
 573         return (generic_file_llseek(filp, offset, whence));
 574 }
 575
 576 /*
 577  * It's worth taking a moment to describe how mmap is implemented
 578  * for zfs because it differs considerably from other Linux filesystems.
 579  * However, this issue is handled the same way under OpenSolaris.
 580  *
 581  * The issue is that by design zfs bypasses the Linux page cache and
 582  * leaves all caching up to the ARC.  This has been shown to work
 583  * well for the common read(2)/write(2) case.  However, mmap(2)
 584  * is problem because it relies on being tightly integrated with the
 585  * page cache.  To handle this we cache mmap'ed files twice, once in
 586  * the ARC and a second time in the page cache.  The code is careful
 587  * to keep both copies synchronized.
 588  *
 589  * When a file with an mmap'ed region is written to using write(2)
 590  * both the data in the ARC and existing pages in the page cache
 591  * are updated.  For a read(2) data will be read first from the page
 592  * cache then the ARC if needed.  Neither a write(2) or read(2) will
 593  * will ever result in new pages being added to the page cache.
 594  *
 595  * New pages are added to the page cache only via .readpage() which
 596  * is called when the vfs needs to read a page off disk to back the
 597  * virtual memory region.  These pages may be modified without
 598  * notifying the ARC and will be written out periodically via
 599  * .writepage().  This will occur due to either a sync or the usual
 600  * page aging behavior.  Note because a read(2) of a mmap'ed file
 601  * will always check the page cache first even when the ARC is out
 602  * of date correct data will still be returned.
 603  *
 604  * While this implementation ensures correct behavior it does have
 605  * have some drawbacks.  The most obvious of which is that it
 606  * increases the required memory footprint when access mmap'ed
 607  * files.  It also adds additional complexity to the code keeping
 608  * both caches synchronized.
 609  *
 610  * Longer term it may be possible to cleanly resolve this wart by
 611  * mapping page cache pages directly on to the ARC buffers.  The
 612  * Linux address space operations are flexible enough to allow
 613  * selection of which pages back a particular index.  The trick
 614  * would be working out the details of which subsystem is in
 615  * charge, the ARC, the page cache, or both.  It may also prove
 616  * helpful to move the ARC buffers to a scatter-gather lists
 617  * rather than a vmalloc'ed region.
 618  */
 619 static int
 620 zpl_mmap(struct file *filp, struct vm_area_struct *vma)
 621 {
 622         struct inode *ip = filp->f_mapping->host;
 623         int error;
 624         fstrans_cookie_t cookie;
 625
 626         cookie = spl_fstrans_mark();
 627         error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
 628             (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
 629         spl_fstrans_unmark(cookie);
 630         if (error)
 631                 return (error);
 632
 633         error = generic_file_mmap(filp, vma);
 634         if (error)
 635                 return (error);
 636
 637 #if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
 638         znode_t *zp = ITOZ(ip);
 639         mutex_enter(&zp->z_lock);
 640         zp->z_is_mapped = B_TRUE;
 641         mutex_exit(&zp->z_lock);
 642 #endif
 643
 644         return (error);
 645 }
 646
 647 /*
 648  * Populate a page with data for the Linux page cache.  This function is
 649  * only used to support mmap(2).  There will be an identical copy of the
 650  * data in the ARC which is kept up to date via .write() and .writepage().
 651  */
 652 static inline int
 653 zpl_readpage_common(struct page *pp)
 654 {
 655         fstrans_cookie_t cookie;
 656
 657         ASSERT(PageLocked(pp));
 658
 659         cookie = spl_fstrans_mark();
 660         int error = -zfs_getpage(pp->mapping->host, pp);
 661         spl_fstrans_unmark(cookie);
 662
 663         unlock_page(pp);
 664
 665         return (error);
 666 }
 667
 668 #ifdef HAVE_VFS_READ_FOLIO
 669 static int
 670 zpl_read_folio(struct file *filp, struct folio *folio)
 671 {
 672         return (zpl_readpage_common(&folio->page));
 673 }
 674 #else
 675 static int
 676 zpl_readpage(struct file *filp, struct page *pp)
 677 {
 678         return (zpl_readpage_common(pp));
 679 }
 680 #endif
 681
 682 static int
 683 zpl_readpage_filler(void *data, struct page *pp)
 684 {
 685         return (zpl_readpage_common(pp));
 686 }
 687
 688 /*
 689  * Populate a set of pages with data for the Linux page cache.  This
 690  * function will only be called for read ahead and never for demand
 691  * paging.  For simplicity, the code relies on read_cache_pages() to
 692  * correctly lock each page for IO and call zpl_readpage().
 693  */
 694 #ifdef HAVE_VFS_READPAGES
 695 static int
 696 zpl_readpages(struct file *filp, struct address_space *mapping,
 697     struct list_head *pages, unsigned nr_pages)
 698 {
 699         return (read_cache_pages(mapping, pages, zpl_readpage_filler, NULL));
 700 }
 701 #else
 702 static void
 703 zpl_readahead(struct readahead_control *ractl)
 704 {
 705         struct page *page;
 706
 707         while ((page = readahead_page(ractl)) != NULL) {
 708                 int ret;
 709
 710                 ret = zpl_readpage_filler(NULL, page);
 711                 put_page(page);
 712                 if (ret)
 713                         break;
 714         }
 715 }
 716 #endif
 717
 718 static int
 719 zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
 720 {
 721         boolean_t *for_sync = data;
 722         fstrans_cookie_t cookie;
 723
 724         ASSERT(PageLocked(pp));
 725         ASSERT(!PageWriteback(pp));
 726
 727         cookie = spl_fstrans_mark();
 728         (void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
 729         spl_fstrans_unmark(cookie);
 730
 731         return (0);
 732 }
 733
 734 #ifdef HAVE_WRITEPAGE_T_FOLIO
 735 static int
 736 zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data)
 737 {
 738         (void) zpl_putpage(&pp->page, wbc, data);
 739         return (0);
 740 }
 741 #endif
 742
 743 static inline int
 744 zpl_write_cache_pages(struct address_space *mapping,
 745     struct writeback_control *wbc, void *data)
 746 {
 747         int result;
 748
 749 #ifdef HAVE_WRITEPAGE_T_FOLIO
 750         result = write_cache_pages(mapping, wbc, zpl_putfolio, data);
 751 #else
 752         result = write_cache_pages(mapping, wbc, zpl_putpage, data);
 753 #endif
 754         return (result);
 755 }
 756
 757 static int
 758 zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
 759 {
 760         znode_t         *zp = ITOZ(mapping->host);
 761         zfsvfs_t        *zfsvfs = ITOZSB(mapping->host);
 762         enum writeback_sync_modes sync_mode;
 763         int result;
 764
 765         if ((result = zpl_enter(zfsvfs, FTAG)) != 0)
 766                 return (result);
 767         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 768                 wbc->sync_mode = WB_SYNC_ALL;
 769         zpl_exit(zfsvfs, FTAG);
 770         sync_mode = wbc->sync_mode;
 771
 772         /*
 773          * We don't want to run write_cache_pages() in SYNC mode here, because
 774          * that would make putpage() wait for a single page to be committed to
 775          * disk every single time, resulting in atrocious performance. Instead
 776          * we run it once in non-SYNC mode so that the ZIL gets all the data,
 777          * and then we commit it all in one go.
 778          */
 779         boolean_t for_sync = (sync_mode == WB_SYNC_ALL);
 780         wbc->sync_mode = WB_SYNC_NONE;
 781         result = zpl_write_cache_pages(mapping, wbc, &for_sync);
 782         if (sync_mode != wbc->sync_mode) {
 783                 if ((result = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 784                         return (result);
 785                 if (zfsvfs->z_log != NULL)
 786                         zil_commit(zfsvfs->z_log, zp->z_id);
 787                 zpl_exit(zfsvfs, FTAG);
 788
 789                 /*
 790                  * We need to call write_cache_pages() again (we can't just
 791                  * return after the commit) because the previous call in
 792                  * non-SYNC mode does not guarantee that we got all the dirty
 793                  * pages (see the implementation of write_cache_pages() for
 794                  * details). That being said, this is a no-op in most cases.
 795                  */
 796                 wbc->sync_mode = sync_mode;
 797                 result = zpl_write_cache_pages(mapping, wbc, &for_sync);
 798         }
 799         return (result);
 800 }
 801
 802 /*
 803  * Write out dirty pages to the ARC, this function is only required to
 804  * support mmap(2).  Mapped pages may be dirtied by memory operations
 805  * which never call .write().  These dirty pages are kept in sync with
 806  * the ARC buffers via this hook.
 807  */
 808 static int
 809 zpl_writepage(struct page *pp, struct writeback_control *wbc)
 810 {
 811         if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
 812                 wbc->sync_mode = WB_SYNC_ALL;
 813
 814         boolean_t for_sync = (wbc->sync_mode == WB_SYNC_ALL);
 815
 816         return (zpl_putpage(pp, wbc, &for_sync));
 817 }
 818
 819 /*
 820  * The flag combination which matches the behavior of zfs_space() is
 821  * FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE.  The FALLOC_FL_PUNCH_HOLE
 822  * flag was introduced in the 2.6.38 kernel.
 823  *
 824  * The original mode=0 (allocate space) behavior can be reasonably emulated
 825  * by checking if enough space exists and creating a sparse file, as real
 826  * persistent space reservation is not possible due to COW, snapshots, etc.
 827  */
 828 static long
 829 zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
 830 {
 831         cred_t *cr = CRED();
 832         loff_t olen;
 833         fstrans_cookie_t cookie;
 834         int error = 0;
 835
 836         int test_mode = FALLOC_FL_PUNCH_HOLE;
 837 #ifdef HAVE_FALLOC_FL_ZERO_RANGE
 838         test_mode |= FALLOC_FL_ZERO_RANGE;
 839 #endif
 840
 841         if ((mode & ~(FALLOC_FL_KEEP_SIZE | test_mode)) != 0)
 842                 return (-EOPNOTSUPP);
 843
 844         if (offset < 0 || len <= 0)
 845                 return (-EINVAL);
 846
 847         spl_inode_lock(ip);
 848         olen = i_size_read(ip);
 849
 850         crhold(cr);
 851         cookie = spl_fstrans_mark();
 852         if (mode & (test_mode)) {
 853                 flock64_t bf;
 854
 855                 if (mode & FALLOC_FL_KEEP_SIZE) {
 856                         if (offset > olen)
 857                                 goto out_unmark;
 858
 859                         if (offset + len > olen)
 860                                 len = olen - offset;
 861                 }
 862                 bf.l_type = F_WRLCK;
 863                 bf.l_whence = SEEK_SET;
 864                 bf.l_start = offset;
 865                 bf.l_len = len;
 866                 bf.l_pid = 0;
 867
 868                 error = -zfs_space(ITOZ(ip), F_FREESP, &bf, O_RDWR, offset, cr);
 869         } else if ((mode & ~FALLOC_FL_KEEP_SIZE) == 0) {
 870                 unsigned int percent = zfs_fallocate_reserve_percent;
 871                 struct kstatfs statfs;
 872
 873                 /* Legacy mode, disable fallocate compatibility. */
 874                 if (percent == 0) {
 875                         error = -EOPNOTSUPP;
 876                         goto out_unmark;
 877                 }
 878
 879                 /*
 880                  * Use zfs_statvfs() instead of dmu_objset_space() since it
 881                  * also checks project quota limits, which are relevant here.
 882                  */
 883                 error = zfs_statvfs(ip, &statfs);
 884                 if (error)
 885                         goto out_unmark;
 886
 887                 /*
 888                  * Shrink available space a bit to account for overhead/races.
 889                  * We know the product previously fit into availbytes from
 890                  * dmu_objset_space(), so the smaller product will also fit.
 891                  */
 892                 if (len > statfs.f_bavail * (statfs.f_bsize * 100 / percent)) {
 893                         error = -ENOSPC;
 894                         goto out_unmark;
 895                 }
 896                 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > olen)
 897                         error = zfs_freesp(ITOZ(ip), offset + len, 0, 0, FALSE);
 898         }
 899 out_unmark:
 900         spl_fstrans_unmark(cookie);
 901         spl_inode_unlock(ip);
 902
 903         crfree(cr);
 904
 905         return (error);
 906 }
 907
 908 static long
 909 zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
 910 {
 911         return zpl_fallocate_common(file_inode(filp),
 912             mode, offset, len);
 913 }
 914
 915 static int
 916 zpl_ioctl_getversion(struct file *filp, void __user *arg)
 917 {
 918         uint32_t generation = file_inode(filp)->i_generation;
 919
 920         return (copy_to_user(arg, &generation, sizeof (generation)));
 921 }
 922
 923 #ifdef HAVE_FILE_FADVISE
 924 static int
 925 zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice)
 926 {
 927         struct inode *ip = file_inode(filp);
 928         znode_t *zp = ITOZ(ip);
 929         zfsvfs_t *zfsvfs = ITOZSB(ip);
 930         objset_t *os = zfsvfs->z_os;
 931         int error = 0;
 932
 933         if (S_ISFIFO(ip->i_mode))
 934                 return (-ESPIPE);
 935
 936         if (offset < 0 || len < 0)
 937                 return (-EINVAL);
 938
 939         if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 940                 return (error);
 941
 942         switch (advice) {
 943         case POSIX_FADV_SEQUENTIAL:
 944         case POSIX_FADV_WILLNEED:
 945 #ifdef HAVE_GENERIC_FADVISE
 946                 if (zn_has_cached_data(zp, offset, offset + len - 1))
 947                         error = generic_fadvise(filp, offset, len, advice);
 948 #endif
 949                 /*
 950                  * Pass on the caller's size directly, but note that
 951                  * dmu_prefetch_max will effectively cap it.  If there
 952                  * really is a larger sequential access pattern, perhaps
 953                  * dmu_zfetch will detect it.
 954                  */
 955                 if (len == 0)
 956                         len = i_size_read(ip) - offset;
 957
 958                 dmu_prefetch(os, zp->z_id, 0, offset, len,
 959                     ZIO_PRIORITY_ASYNC_READ);
 960                 break;
 961         case POSIX_FADV_NORMAL:
 962         case POSIX_FADV_RANDOM:
 963         case POSIX_FADV_DONTNEED:
 964         case POSIX_FADV_NOREUSE:
 965                 /* ignored for now */
 966                 break;
 967         default:
 968                 error = -EINVAL;
 969                 break;
 970         }
 971
 972         zfs_exit(zfsvfs, FTAG);
 973
 974         return (error);
 975 }
 976 #endif /* HAVE_FILE_FADVISE */
 977
 978 #define ZFS_FL_USER_VISIBLE     (FS_FL_USER_VISIBLE | ZFS_PROJINHERIT_FL)
 979 #define ZFS_FL_USER_MODIFIABLE  (FS_FL_USER_MODIFIABLE | ZFS_PROJINHERIT_FL)
 980
 981 static uint32_t
 982 __zpl_ioctl_getflags(struct inode *ip)
 983 {
 984         uint64_t zfs_flags = ITOZ(ip)->z_pflags;
 985         uint32_t ioctl_flags = 0;
 986
 987         if (zfs_flags & ZFS_IMMUTABLE)
 988                 ioctl_flags |= FS_IMMUTABLE_FL;
 989
 990         if (zfs_flags & ZFS_APPENDONLY)
 991                 ioctl_flags |= FS_APPEND_FL;
 992
 993         if (zfs_flags & ZFS_NODUMP)
 994                 ioctl_flags |= FS_NODUMP_FL;
 995
 996         if (zfs_flags & ZFS_PROJINHERIT)
 997                 ioctl_flags |= ZFS_PROJINHERIT_FL;
 998
 999         return (ioctl_flags & ZFS_FL_USER_VISIBLE);
1000 }
1001
1002 /*
1003  * Map zfs file z_pflags (xvattr_t) to linux file attributes. Only file
1004  * attributes common to both Linux and Solaris are mapped.
1005  */
1006 static int
1007 zpl_ioctl_getflags(struct file *filp, void __user *arg)
1008 {
1009         uint32_t flags;
1010         int err;
1011
1012         flags = __zpl_ioctl_getflags(file_inode(filp));
1013         err = copy_to_user(arg, &flags, sizeof (flags));
1014
1015         return (err);
1016 }
1017
1018 /*
1019  * fchange() is a helper macro to detect if we have been asked to change a
1020  * flag. This is ugly, but the requirement that we do this is a consequence of
1021  * how the Linux file attribute interface was designed. Another consequence is
1022  * that concurrent modification of files suffers from a TOCTOU race. Neither
1023  * are things we can fix without modifying the kernel-userland interface, which
1024  * is outside of our jurisdiction.
1025  */
1026
1027 #define fchange(f0, f1, b0, b1) (!((f0) & (b0)) != !((f1) & (b1)))
1028
1029 static int
1030 __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
1031 {
1032         uint64_t zfs_flags = ITOZ(ip)->z_pflags;
1033         xoptattr_t *xoap;
1034
1035         if (ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL |
1036             ZFS_PROJINHERIT_FL))
1037                 return (-EOPNOTSUPP);
1038
1039         if (ioctl_flags & ~ZFS_FL_USER_MODIFIABLE)
1040                 return (-EACCES);
1041
1042         if ((fchange(ioctl_flags, zfs_flags, FS_IMMUTABLE_FL, ZFS_IMMUTABLE) ||
1043             fchange(ioctl_flags, zfs_flags, FS_APPEND_FL, ZFS_APPENDONLY)) &&
1044             !capable(CAP_LINUX_IMMUTABLE))
1045                 return (-EPERM);
1046
1047         if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
1048                 return (-EACCES);
1049
1050         xva_init(xva);
1051         xoap = xva_getxoptattr(xva);
1052
1053 #define FLAG_CHANGE(iflag, zflag, xflag, xfield)        do {    \
1054         if (((ioctl_flags & (iflag)) && !(zfs_flags & (zflag))) ||      \
1055             ((zfs_flags & (zflag)) && !(ioctl_flags & (iflag)))) {      \
1056                 XVA_SET_REQ(xva, (xflag));      \
1057                 (xfield) = ((ioctl_flags & (iflag)) != 0);      \
1058         }       \
1059 } while (0)
1060
1061         FLAG_CHANGE(FS_IMMUTABLE_FL, ZFS_IMMUTABLE, XAT_IMMUTABLE,
1062             xoap->xoa_immutable);
1063         FLAG_CHANGE(FS_APPEND_FL, ZFS_APPENDONLY, XAT_APPENDONLY,
1064             xoap->xoa_appendonly);
1065         FLAG_CHANGE(FS_NODUMP_FL, ZFS_NODUMP, XAT_NODUMP,
1066             xoap->xoa_nodump);
1067         FLAG_CHANGE(ZFS_PROJINHERIT_FL, ZFS_PROJINHERIT, XAT_PROJINHERIT,
1068             xoap->xoa_projinherit);
1069
1070 #undef  FLAG_CHANGE
1071
1072         return (0);
1073 }
1074
1075 static int
1076 zpl_ioctl_setflags(struct file *filp, void __user *arg)
1077 {
1078         struct inode *ip = file_inode(filp);
1079         uint32_t flags;
1080         cred_t *cr = CRED();
1081         xvattr_t xva;
1082         int err;
1083         fstrans_cookie_t cookie;
1084
1085         if (copy_from_user(&flags, arg, sizeof (flags)))
1086                 return (-EFAULT);
1087
1088         err = __zpl_ioctl_setflags(ip, flags, &xva);
1089         if (err)
1090                 return (err);
1091
1092         crhold(cr);
1093         cookie = spl_fstrans_mark();
1094         err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr, zfs_init_idmap);
1095         spl_fstrans_unmark(cookie);
1096         crfree(cr);
1097
1098         return (err);
1099 }
1100
1101 static int
1102 zpl_ioctl_getxattr(struct file *filp, void __user *arg)
1103 {
1104         zfsxattr_t fsx = { 0 };
1105         struct inode *ip = file_inode(filp);
1106         int err;
1107
1108         fsx.fsx_xflags = __zpl_ioctl_getflags(ip);
1109         fsx.fsx_projid = ITOZ(ip)->z_projid;
1110         err = copy_to_user(arg, &fsx, sizeof (fsx));
1111
1112         return (err);
1113 }
1114
1115 static int
1116 zpl_ioctl_setxattr(struct file *filp, void __user *arg)
1117 {
1118         struct inode *ip = file_inode(filp);
1119         zfsxattr_t fsx;
1120         cred_t *cr = CRED();
1121         xvattr_t xva;
1122         xoptattr_t *xoap;
1123         int err;
1124         fstrans_cookie_t cookie;
1125
1126         if (copy_from_user(&fsx, arg, sizeof (fsx)))
1127                 return (-EFAULT);
1128
1129         if (!zpl_is_valid_projid(fsx.fsx_projid))
1130                 return (-EINVAL);
1131
1132         err = __zpl_ioctl_setflags(ip, fsx.fsx_xflags, &xva);
1133         if (err)
1134                 return (err);
1135
1136         xoap = xva_getxoptattr(&xva);
1137         XVA_SET_REQ(&xva, XAT_PROJID);
1138         xoap->xoa_projid = fsx.fsx_projid;
1139
1140         crhold(cr);
1141         cookie = spl_fstrans_mark();
1142         err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr, zfs_init_idmap);
1143         spl_fstrans_unmark(cookie);
1144         crfree(cr);
1145
1146         return (err);
1147 }
1148
1149 /*
1150  * Expose Additional File Level Attributes of ZFS.
1151  */
1152 static int
1153 zpl_ioctl_getdosflags(struct file *filp, void __user *arg)
1154 {
1155         struct inode *ip = file_inode(filp);
1156         uint64_t dosflags = ITOZ(ip)->z_pflags;
1157         dosflags &= ZFS_DOS_FL_USER_VISIBLE;
1158         int err = copy_to_user(arg, &dosflags, sizeof (dosflags));
1159
1160         return (err);
1161 }
1162
1163 static int
1164 __zpl_ioctl_setdosflags(struct inode *ip, uint64_t ioctl_flags, xvattr_t *xva)
1165 {
1166         uint64_t zfs_flags = ITOZ(ip)->z_pflags;
1167         xoptattr_t *xoap;
1168
1169         if (ioctl_flags & (~ZFS_DOS_FL_USER_VISIBLE))
1170                 return (-EOPNOTSUPP);
1171
1172         if ((fchange(ioctl_flags, zfs_flags, ZFS_IMMUTABLE, ZFS_IMMUTABLE) ||
1173             fchange(ioctl_flags, zfs_flags, ZFS_APPENDONLY, ZFS_APPENDONLY)) &&
1174             !capable(CAP_LINUX_IMMUTABLE))
1175                 return (-EPERM);
1176
1177         if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
1178                 return (-EACCES);
1179
1180         xva_init(xva);
1181         xoap = xva_getxoptattr(xva);
1182
1183 #define FLAG_CHANGE(iflag, xflag, xfield)       do {    \
1184         if (((ioctl_flags & (iflag)) && !(zfs_flags & (iflag))) ||      \
1185             ((zfs_flags & (iflag)) && !(ioctl_flags & (iflag)))) {      \
1186                 XVA_SET_REQ(xva, (xflag));      \
1187                 (xfield) = ((ioctl_flags & (iflag)) != 0);      \
1188         }       \
1189 } while (0)
1190
1191         FLAG_CHANGE(ZFS_IMMUTABLE, XAT_IMMUTABLE, xoap->xoa_immutable);
1192         FLAG_CHANGE(ZFS_APPENDONLY, XAT_APPENDONLY, xoap->xoa_appendonly);
1193         FLAG_CHANGE(ZFS_NODUMP, XAT_NODUMP, xoap->xoa_nodump);
1194         FLAG_CHANGE(ZFS_READONLY, XAT_READONLY, xoap->xoa_readonly);
1195         FLAG_CHANGE(ZFS_HIDDEN, XAT_HIDDEN, xoap->xoa_hidden);
1196         FLAG_CHANGE(ZFS_SYSTEM, XAT_SYSTEM, xoap->xoa_system);
1197         FLAG_CHANGE(ZFS_ARCHIVE, XAT_ARCHIVE, xoap->xoa_archive);
1198         FLAG_CHANGE(ZFS_NOUNLINK, XAT_NOUNLINK, xoap->xoa_nounlink);
1199         FLAG_CHANGE(ZFS_REPARSE, XAT_REPARSE, xoap->xoa_reparse);
1200         FLAG_CHANGE(ZFS_OFFLINE, XAT_OFFLINE, xoap->xoa_offline);
1201         FLAG_CHANGE(ZFS_SPARSE, XAT_SPARSE, xoap->xoa_sparse);
1202
1203 #undef  FLAG_CHANGE
1204
1205         return (0);
1206 }
1207
1208 /*
1209  * Set Additional File Level Attributes of ZFS.
1210  */
1211 static int
1212 zpl_ioctl_setdosflags(struct file *filp, void __user *arg)
1213 {
1214         struct inode *ip = file_inode(filp);
1215         uint64_t dosflags;
1216         cred_t *cr = CRED();
1217         xvattr_t xva;
1218         int err;
1219         fstrans_cookie_t cookie;
1220
1221         if (copy_from_user(&dosflags, arg, sizeof (dosflags)))
1222                 return (-EFAULT);
1223
1224         err = __zpl_ioctl_setdosflags(ip, dosflags, &xva);
1225         if (err)
1226                 return (err);
1227
1228         crhold(cr);
1229         cookie = spl_fstrans_mark();
1230         err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr, zfs_init_idmap);
1231         spl_fstrans_unmark(cookie);
1232         crfree(cr);
1233
1234         return (err);
1235 }
1236
1237 static long
1238 zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1239 {
1240         switch (cmd) {
1241         case FS_IOC_GETVERSION:
1242                 return (zpl_ioctl_getversion(filp, (void *)arg));
1243         case FS_IOC_GETFLAGS:
1244                 return (zpl_ioctl_getflags(filp, (void *)arg));
1245         case FS_IOC_SETFLAGS:
1246                 return (zpl_ioctl_setflags(filp, (void *)arg));
1247         case ZFS_IOC_FSGETXATTR:
1248                 return (zpl_ioctl_getxattr(filp, (void *)arg));
1249         case ZFS_IOC_FSSETXATTR:
1250                 return (zpl_ioctl_setxattr(filp, (void *)arg));
1251         case ZFS_IOC_GETDOSFLAGS:
1252                 return (zpl_ioctl_getdosflags(filp, (void *)arg));
1253         case ZFS_IOC_SETDOSFLAGS:
1254                 return (zpl_ioctl_setdosflags(filp, (void *)arg));
1255         case ZFS_IOC_COMPAT_FICLONE:
1256                 return (zpl_ioctl_ficlone(filp, (void *)arg));
1257         case ZFS_IOC_COMPAT_FICLONERANGE:
1258                 return (zpl_ioctl_ficlonerange(filp, (void *)arg));
1259         case ZFS_IOC_COMPAT_FIDEDUPERANGE:
1260                 return (zpl_ioctl_fideduperange(filp, (void *)arg));
1261         default:
1262                 return (-ENOTTY);
1263         }
1264 }
1265
1266 #ifdef CONFIG_COMPAT
1267 static long
1268 zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1269 {
1270         switch (cmd) {
1271         case FS_IOC32_GETVERSION:
1272                 cmd = FS_IOC_GETVERSION;
1273                 break;
1274         case FS_IOC32_GETFLAGS:
1275                 cmd = FS_IOC_GETFLAGS;
1276                 break;
1277         case FS_IOC32_SETFLAGS:
1278                 cmd = FS_IOC_SETFLAGS;
1279                 break;
1280         default:
1281                 return (-ENOTTY);
1282         }
1283         return (zpl_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)));
1284 }
1285 #endif /* CONFIG_COMPAT */
1286
1287 const struct address_space_operations zpl_address_space_operations = {
1288 #ifdef HAVE_VFS_READPAGES
1289         .readpages      = zpl_readpages,
1290 #else
1291         .readahead      = zpl_readahead,
1292 #endif
1293 #ifdef HAVE_VFS_READ_FOLIO
1294         .read_folio     = zpl_read_folio,
1295 #else
1296         .readpage       = zpl_readpage,
1297 #endif
1298         .writepage      = zpl_writepage,
1299         .writepages     = zpl_writepages,
1300         .direct_IO      = zpl_direct_IO,
1301 #ifdef HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS
1302         .set_page_dirty = __set_page_dirty_nobuffers,
1303 #endif
1304 #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
1305         .dirty_folio    = filemap_dirty_folio,
1306 #endif
1307 };
1308
1309 #ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
1310 const struct file_operations_extend zpl_file_operations = {
1311         .kabi_fops = {
1312 #else
1313 const struct file_operations zpl_file_operations = {
1314 #endif
1315         .open           = zpl_open,
1316         .release        = zpl_release,
1317         .llseek         = zpl_llseek,
1318 #ifdef HAVE_VFS_RW_ITERATE
1319 #ifdef HAVE_NEW_SYNC_READ
1320         .read           = new_sync_read,
1321         .write          = new_sync_write,
1322 #endif
1323         .read_iter      = zpl_iter_read,
1324         .write_iter     = zpl_iter_write,
1325 #ifdef HAVE_VFS_IOV_ITER
1326 #ifdef HAVE_COPY_SPLICE_READ
1327         .splice_read    = copy_splice_read,
1328 #else
1329         .splice_read    = generic_file_splice_read,
1330 #endif
1331         .splice_write   = iter_file_splice_write,
1332 #endif
1333 #else
1334         .read           = do_sync_read,
1335         .write          = do_sync_write,
1336         .aio_read       = zpl_aio_read,
1337         .aio_write      = zpl_aio_write,
1338 #endif
1339         .mmap           = zpl_mmap,
1340         .fsync          = zpl_fsync,
1341 #ifdef HAVE_FILE_AIO_FSYNC
1342         .aio_fsync      = zpl_aio_fsync,
1343 #endif
1344         .fallocate      = zpl_fallocate,
1345 #ifdef HAVE_VFS_COPY_FILE_RANGE
1346         .copy_file_range        = zpl_copy_file_range,
1347 #endif
1348 #ifdef HAVE_VFS_CLONE_FILE_RANGE
1349         .clone_file_range       = zpl_clone_file_range,
1350 #endif
1351 #ifdef HAVE_VFS_REMAP_FILE_RANGE
1352         .remap_file_range       = zpl_remap_file_range,
1353 #endif
1354 #ifdef HAVE_VFS_DEDUPE_FILE_RANGE
1355         .dedupe_file_range      = zpl_dedupe_file_range,
1356 #endif
1357 #ifdef HAVE_FILE_FADVISE
1358         .fadvise        = zpl_fadvise,
1359 #endif
1360         .unlocked_ioctl = zpl_ioctl,
1361 #ifdef CONFIG_COMPAT
1362         .compat_ioctl   = zpl_compat_ioctl,
1363 #endif
1364 #ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
1365         }, /* kabi_fops */
1366         .copy_file_range        = zpl_copy_file_range,
1367         .clone_file_range       = zpl_clone_file_range,
1368 #endif
1369 };
1370
1371 const struct file_operations zpl_dir_file_operations = {
1372         .llseek         = generic_file_llseek,
1373         .read           = generic_read_dir,
1374 #if defined(HAVE_VFS_ITERATE_SHARED)
1375         .iterate_shared = zpl_iterate,
1376 #elif defined(HAVE_VFS_ITERATE)
1377         .iterate        = zpl_iterate,
1378 #else
1379         .readdir        = zpl_readdir,
1380 #endif
1381         .fsync          = zpl_fsync,
1382         .unlocked_ioctl = zpl_ioctl,
1383 #ifdef CONFIG_COMPAT
1384         .compat_ioctl   = zpl_compat_ioctl,
1385 #endif
1386 };
1387
1388 /* CSTYLED */
1389 module_param(zfs_fallocate_reserve_percent, uint, 0644);
1390 MODULE_PARM_DESC(zfs_fallocate_reserve_percent,
1391         "Percentage of length to use for the available capacity check");