module/os/linux/zfs/zfs_uio.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or https://opensource.org/licenses/CDDL-1.0.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  27 /*        All Rights Reserved   */
  28
  29 /*
  30  * University Copyright- Copyright (c) 1982, 1986, 1988
  31  * The Regents of the University of California
  32  * All Rights Reserved
  33  *
  34  * University Acknowledgment- Portions of this document are derived from
  35  * software developed by the University of California, Berkeley, and its
  36  * contributors.
  37  */
  38 /*
  39  * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
  40  */
  41
  42 #ifdef _KERNEL
  43
  44 #include <sys/errno.h>
  45 #include <sys/vmem.h>
  46 #include <sys/sysmacros.h>
  47 #include <sys/types.h>
  48 #include <sys/uio_impl.h>
  49 #include <sys/sysmacros.h>
  50 #include <sys/string.h>
  51 #include <sys/zfs_refcount.h>
  52 #include <sys/zfs_debug.h>
  53 #include <linux/kmap_compat.h>
  54 #include <linux/uaccess.h>
  55 #include <linux/pagemap.h>
  56 #include <linux/mman.h>
  57
  58 /*
  59  * Move "n" bytes at byte address "p"; "rw" indicates the direction
  60  * of the move, and the I/O parameters are provided in "uio", which is
  61  * update to reflect the data which was moved.  Returns 0 on success or
  62  * a non-zero errno on failure.
  63  */
  64 static int
  65 zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
  66 {
  67         const struct iovec *iov = uio->uio_iov;
  68         size_t skip = uio->uio_skip;
  69         ulong_t cnt;
  70
  71         while (n && uio->uio_resid) {
  72                 cnt = MIN(iov->iov_len - skip, n);
  73                 switch (uio->uio_segflg) {
  74                 case UIO_USERSPACE:
  75                         /*
  76                          * p = kernel data pointer
  77                          * iov->iov_base = user data pointer
  78                          */
  79                         if (rw == UIO_READ) {
  80                                 if (copy_to_user(iov->iov_base+skip, p, cnt))
  81                                         return (EFAULT);
  82                         } else {
  83                                 unsigned long b_left = 0;
  84                                 if (uio->uio_fault_disable) {
  85                                         if (!zfs_access_ok(VERIFY_READ,
  86                                             (iov->iov_base + skip), cnt)) {
  87                                                 return (EFAULT);
  88                                         }
  89                                         pagefault_disable();
  90                                         b_left =
  91                                             __copy_from_user_inatomic(p,
  92                                             (iov->iov_base + skip), cnt);
  93                                         pagefault_enable();
  94                                 } else {
  95                                         b_left =
  96                                             copy_from_user(p,
  97                                             (iov->iov_base + skip), cnt);
  98                                 }
  99                                 if (b_left > 0) {
 100                                         unsigned long c_bytes =
 101                                             cnt - b_left;
 102                                         uio->uio_skip += c_bytes;
 103                                         ASSERT3U(uio->uio_skip, <,
 104                                             iov->iov_len);
 105                                         uio->uio_resid -= c_bytes;
 106                                         uio->uio_loffset += c_bytes;
 107                                         return (EFAULT);
 108                                 }
 109                         }
 110                         break;
 111                 case UIO_SYSSPACE:
 112                         if (rw == UIO_READ)
 113                                 memcpy(iov->iov_base + skip, p, cnt);
 114                         else
 115                                 memcpy(p, iov->iov_base + skip, cnt);
 116                         break;
 117                 default:
 118                         ASSERT(0);
 119                 }
 120                 skip += cnt;
 121                 if (skip == iov->iov_len) {
 122                         skip = 0;
 123                         uio->uio_iov = (++iov);
 124                         uio->uio_iovcnt--;
 125                 }
 126                 uio->uio_skip = skip;
 127                 uio->uio_resid -= cnt;
 128                 uio->uio_loffset += cnt;
 129                 p = (caddr_t)p + cnt;
 130                 n -= cnt;
 131         }
 132         return (0);
 133 }
 134
 135 static int
 136 zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 137 {
 138         const struct bio_vec *bv = uio->uio_bvec;
 139         size_t skip = uio->uio_skip;
 140         ulong_t cnt;
 141
 142         while (n && uio->uio_resid) {
 143                 void *paddr;
 144                 cnt = MIN(bv->bv_len - skip, n);
 145
 146                 paddr = zfs_kmap_local(bv->bv_page);
 147                 if (rw == UIO_READ) {
 148                         /* Copy from buffer 'p' to the bvec data */
 149                         memcpy(paddr + bv->bv_offset + skip, p, cnt);
 150                 } else {
 151                         /* Copy from bvec data to buffer 'p' */
 152                         memcpy(p, paddr + bv->bv_offset + skip, cnt);
 153                 }
 154                 zfs_kunmap_local(paddr);
 155
 156                 skip += cnt;
 157                 if (skip == bv->bv_len) {
 158                         skip = 0;
 159                         uio->uio_bvec = (++bv);
 160                         uio->uio_iovcnt--;
 161                 }
 162                 uio->uio_skip = skip;
 163                 uio->uio_resid -= cnt;
 164                 uio->uio_loffset += cnt;
 165                 p = (caddr_t)p + cnt;
 166                 n -= cnt;
 167         }
 168         return (0);
 169 }
 170
 171 static void
 172 zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
 173     struct bio_vec *bv)
 174 {
 175         void *paddr;
 176
 177         paddr = zfs_kmap_local(bv->bv_page);
 178         if (rw == UIO_READ) {
 179                 /* Copy from buffer 'p' to the bvec data */
 180                 memcpy(paddr + bv->bv_offset + skip, p, cnt);
 181         } else {
 182                 /* Copy from bvec data to buffer 'p' */
 183                 memcpy(p, paddr + bv->bv_offset + skip, cnt);
 184         }
 185         zfs_kunmap_local(paddr);
 186 }
 187
 188 /*
 189  * Copy 'n' bytes of data between the buffer p[] and the data represented
 190  * by the request in the uio.
 191  */
 192 static int
 193 zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 194 {
 195         struct request *rq = uio->rq;
 196         struct bio_vec bv;
 197         struct req_iterator iter;
 198         size_t this_seg_start;  /* logical offset */
 199         size_t this_seg_end;            /* logical offset */
 200         size_t skip_in_seg;
 201         size_t copy_from_seg;
 202         size_t orig_loffset;
 203         int copied = 0;
 204
 205         /*
 206          * Get the original logical offset of this entire request (because
 207          * uio->uio_loffset will be modified over time).
 208          */
 209         orig_loffset = io_offset(NULL, rq);
 210         this_seg_start = orig_loffset;
 211
 212         rq_for_each_segment(bv, rq, iter) {
 213                 /*
 214                  * Lookup what the logical offset of the last byte of this
 215                  * segment is.
 216                  */
 217                 this_seg_end = this_seg_start + bv.bv_len - 1;
 218
 219                 /*
 220                  * We only need to operate on segments that have data we're
 221                  * copying.
 222                  */
 223                 if (uio->uio_loffset >= this_seg_start &&
 224                     uio->uio_loffset <= this_seg_end) {
 225                         /*
 226                          * Some, or all, of the data in this segment needs to be
 227                          * copied.
 228                          */
 229
 230                         /*
 231                          * We may be not be copying from the first byte in the
 232                          * segment.  Figure out how many bytes to skip copying
 233                          * from the beginning of this segment.
 234                          */
 235                         skip_in_seg = uio->uio_loffset - this_seg_start;
 236
 237                         /*
 238                          * Calculate the total number of bytes from this
 239                          * segment that we will be copying.
 240                          */
 241                         copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
 242
 243                         /* Copy the bytes */
 244                         zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
 245                         p = ((char *)p) + copy_from_seg;
 246
 247                         n -= copy_from_seg;
 248                         uio->uio_resid -= copy_from_seg;
 249                         uio->uio_loffset += copy_from_seg;
 250                         copied = 1;     /* We copied some data */
 251                 }
 252
 253                 this_seg_start = this_seg_end + 1;
 254         }
 255
 256         if (!copied) {
 257                 /* Didn't copy anything */
 258                 uio->uio_resid = 0;
 259         }
 260         return (0);
 261 }
 262
 263 static int
 264 zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 265 {
 266         if (uio->rq != NULL)
 267                 return (zfs_uiomove_bvec_rq(p, n, rw, uio));
 268         return (zfs_uiomove_bvec_impl(p, n, rw, uio));
 269 }
 270
 271 #if defined(HAVE_VFS_IOV_ITER)
 272 static int
 273 zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
 274     boolean_t revert)
 275 {
 276         size_t cnt = MIN(n, uio->uio_resid);
 277
 278         if (uio->uio_skip)
 279                 iov_iter_advance(uio->uio_iter, uio->uio_skip);
 280
 281         if (rw == UIO_READ)
 282                 cnt = copy_to_iter(p, cnt, uio->uio_iter);
 283         else
 284                 cnt = copy_from_iter(p, cnt, uio->uio_iter);
 285
 286         /*
 287          * When operating on a full pipe no bytes are processed.
 288          * In which case return EFAULT which is converted to EAGAIN
 289          * by the kernel's generic_file_splice_read() function.
 290          */
 291         if (cnt == 0)
 292                 return (EFAULT);
 293
 294         /*
 295          * Revert advancing the uio_iter.  This is set by zfs_uiocopy()
 296          * to avoid consuming the uio and its iov_iter structure.
 297          */
 298         if (revert)
 299                 iov_iter_revert(uio->uio_iter, cnt);
 300
 301         uio->uio_resid -= cnt;
 302         uio->uio_loffset += cnt;
 303
 304         return (0);
 305 }
 306 #endif
 307
 308 int
 309 zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 310 {
 311         if (uio->uio_segflg == UIO_BVEC)
 312                 return (zfs_uiomove_bvec(p, n, rw, uio));
 313 #if defined(HAVE_VFS_IOV_ITER)
 314         else if (uio->uio_segflg == UIO_ITER)
 315                 return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE));
 316 #endif
 317         else
 318                 return (zfs_uiomove_iov(p, n, rw, uio));
 319 }
 320 EXPORT_SYMBOL(zfs_uiomove);
 321
 322 /*
 323  * Fault in the pages of the first n bytes specified by the uio structure.
 324  * 1 byte in each page is touched and the uio struct is unmodified. Any
 325  * error will terminate the process as this is only a best attempt to get
 326  * the pages resident.
 327  */
 328 int
 329 zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
 330 {
 331         if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC ||
 332             (uio->uio_extflg & UIO_DIRECT)) {
 333                 /*
 334                  * There's never a need to fault in kernel pages or Direct I/O
 335                  * write pages. Direct I/O write pages have been pinned in so
 336                  * there is never a time for these pages a fault will occur.
 337                  */
 338                 return (0);
 339 #if defined(HAVE_VFS_IOV_ITER)
 340         } else if (uio->uio_segflg == UIO_ITER) {
 341                 /*
 342                  * At least a Linux 4.9 kernel, iov_iter_fault_in_readable()
 343                  * can be relied on to fault in user pages when referenced.
 344                  */
 345                 if (iov_iter_fault_in_readable(uio->uio_iter, n))
 346                         return (EFAULT);
 347 #endif
 348         } else {
 349                 /* Fault in all user pages */
 350                 ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE);
 351                 const struct iovec *iov = uio->uio_iov;
 352                 int iovcnt = uio->uio_iovcnt;
 353                 size_t skip = uio->uio_skip;
 354                 uint8_t tmp;
 355                 caddr_t p;
 356
 357                 for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
 358                         ulong_t cnt = MIN(iov->iov_len - skip, n);
 359                         /* empty iov */
 360                         if (cnt == 0)
 361                                 continue;
 362                         n -= cnt;
 363                         /* touch each page in this segment. */
 364                         p = iov->iov_base + skip;
 365                         while (cnt) {
 366                                 if (copy_from_user(&tmp, p, 1))
 367                                         return (EFAULT);
 368                                 ulong_t incr = MIN(cnt, PAGESIZE);
 369                                 p += incr;
 370                                 cnt -= incr;
 371                         }
 372                         /* touch the last byte in case it straddles a page. */
 373                         p--;
 374                         if (copy_from_user(&tmp, p, 1))
 375                                 return (EFAULT);
 376                 }
 377         }
 378
 379         return (0);
 380 }
 381 EXPORT_SYMBOL(zfs_uio_prefaultpages);
 382
 383 /*
 384  * The same as zfs_uiomove() but doesn't modify uio structure.
 385  * return in cbytes how many bytes were copied.
 386  */
 387 int
 388 zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
 389 {
 390         zfs_uio_t uio_copy;
 391         int ret;
 392
 393         memcpy(&uio_copy, uio, sizeof (zfs_uio_t));
 394
 395         if (uio->uio_segflg == UIO_BVEC)
 396                 ret = zfs_uiomove_bvec(p, n, rw, &uio_copy);
 397 #if defined(HAVE_VFS_IOV_ITER)
 398         else if (uio->uio_segflg == UIO_ITER)
 399                 ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
 400 #endif
 401         else
 402                 ret = zfs_uiomove_iov(p, n, rw, &uio_copy);
 403
 404         *cbytes = uio->uio_resid - uio_copy.uio_resid;
 405
 406         return (ret);
 407 }
 408 EXPORT_SYMBOL(zfs_uiocopy);
 409
 410 /*
 411  * Drop the next n chars out of *uio.
 412  */
 413 void
 414 zfs_uioskip(zfs_uio_t *uio, size_t n)
 415 {
 416         if (n > uio->uio_resid)
 417                 return;
 418         /*
 419          * When using a uio with a struct request, we simply
 420          * use uio_loffset as a pointer to the next logical byte to
 421          * copy in the request.  We don't have to do any fancy
 422          * accounting with uio_bvec/uio_iovcnt since we don't use
 423          * them.
 424          */
 425         if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
 426                 uio->uio_skip += n;
 427                 while (uio->uio_iovcnt &&
 428                     uio->uio_skip >= uio->uio_bvec->bv_len) {
 429                         uio->uio_skip -= uio->uio_bvec->bv_len;
 430                         uio->uio_bvec++;
 431                         uio->uio_iovcnt--;
 432                 }
 433 #if defined(HAVE_VFS_IOV_ITER)
 434         } else if (uio->uio_segflg == UIO_ITER) {
 435                 iov_iter_advance(uio->uio_iter, n);
 436 #endif
 437         } else {
 438                 uio->uio_skip += n;
 439                 while (uio->uio_iovcnt &&
 440                     uio->uio_skip >= uio->uio_iov->iov_len) {
 441                         uio->uio_skip -= uio->uio_iov->iov_len;
 442                         uio->uio_iov++;
 443                         uio->uio_iovcnt--;
 444                 }
 445         }
 446
 447         uio->uio_loffset += n;
 448         uio->uio_resid -= n;
 449 }
 450 EXPORT_SYMBOL(zfs_uioskip);
 451
 452 /*
 453  * Check if the uio is page-aligned in memory.
 454  */
 455 boolean_t
 456 zfs_uio_page_aligned(zfs_uio_t *uio)
 457 {
 458         boolean_t aligned = B_TRUE;
 459
 460         if (uio->uio_segflg == UIO_USERSPACE ||
 461             uio->uio_segflg == UIO_SYSSPACE) {
 462                 const struct iovec *iov = uio->uio_iov;
 463                 size_t skip = uio->uio_skip;
 464
 465                 for (int i = uio->uio_iovcnt; i > 0; iov++, i--) {
 466                         uintptr_t addr = (uintptr_t)(iov->iov_base + skip);
 467                         size_t size = iov->iov_len - skip;
 468                         if ((addr & (PAGE_SIZE - 1)) ||
 469                             (size & (PAGE_SIZE - 1))) {
 470                                 aligned = B_FALSE;
 471                                 break;
 472                         }
 473                         skip = 0;
 474                 }
 475 #if defined(HAVE_VFS_IOV_ITER)
 476         } else if (uio->uio_segflg == UIO_ITER) {
 477                 unsigned long alignment =
 478                     iov_iter_alignment(uio->uio_iter);
 479                 aligned = IS_P2ALIGNED(alignment, PAGE_SIZE);
 480 #endif
 481         } else {
 482                 /* Currently not supported */
 483                 aligned = B_FALSE;
 484         }
 485
 486         return (aligned);
 487 }
 488
 489
 490 #if defined(HAVE_ZERO_PAGE_GPL_ONLY) || !defined(_LP64)
 491 #define ZFS_MARKEED_PAGE        0x0
 492 #define IS_ZFS_MARKED_PAGE(_p)  0
 493 #define zfs_mark_page(_p)
 494 #define zfs_unmark_page(_p)
 495 #define IS_ZERO_PAGE(_p)        0
 496
 497 #else
 498 /*
 499  * Mark pages to know if they were allocated to replace ZERO_PAGE() for
 500  * Direct I/O writes.
 501  */
 502 #define ZFS_MARKED_PAGE         0x5a465350414745 /* ASCII: ZFSPAGE */
 503 #define IS_ZFS_MARKED_PAGE(_p) \
 504         (page_private(_p) == (unsigned long)ZFS_MARKED_PAGE)
 505 #define IS_ZERO_PAGE(_p) ((_p) == ZERO_PAGE(0))
 506
 507 static inline void
 508 zfs_mark_page(struct page *page)
 509 {
 510         ASSERT3P(page, !=, NULL);
 511         get_page(page);
 512         SetPagePrivate(page);
 513         set_page_private(page, ZFS_MARKED_PAGE);
 514 }
 515
 516 static inline void
 517 zfs_unmark_page(struct page *page)
 518 {
 519         ASSERT3P(page, !=, NULL);
 520         set_page_private(page, 0UL);
 521         ClearPagePrivate(page);
 522         put_page(page);
 523 }
 524 #endif /* HAVE_ZERO_PAGE_GPL_ONLY || !_LP64 */
 525
 526 static void
 527 zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio)
 528 {
 529         ASSERT3P(uio->uio_dio.pages, !=, NULL);
 530
 531         for (long i = 0; i < uio->uio_dio.npages; i++) {
 532                 struct page *p = uio->uio_dio.pages[i];
 533                 lock_page(p);
 534
 535                 if (IS_ZERO_PAGE(p)) {
 536                         /*
 537                          * If the user page points the kernels ZERO_PAGE() a
 538                          * new zero filled page will just be allocated so the
 539                          * contents of the page can not be changed by the user
 540                          * while a Direct I/O write is taking place.
 541                          */
 542                         gfp_t gfp_zero_page  = __GFP_NOWARN | GFP_NOIO |
 543                             __GFP_ZERO | GFP_KERNEL;
 544
 545                         ASSERT0(IS_ZFS_MARKED_PAGE(p));
 546                         unlock_page(p);
 547                         put_page(p);
 548
 549                         p = __page_cache_alloc(gfp_zero_page);
 550                         zfs_mark_page(p);
 551                 } else {
 552                         unlock_page(p);
 553                 }
 554         }
 555 }
 556
 557 void
 558 zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
 559 {
 560
 561         ASSERT(uio->uio_extflg & UIO_DIRECT);
 562         ASSERT3P(uio->uio_dio.pages, !=, NULL);
 563
 564         for (long i = 0; i < uio->uio_dio.npages; i++) {
 565                 struct page *p = uio->uio_dio.pages[i];
 566
 567                 if (IS_ZFS_MARKED_PAGE(p)) {
 568                         zfs_unmark_page(p);
 569                         __free_page(p);
 570                         continue;
 571                 }
 572
 573                 put_page(p);
 574         }
 575
 576         vmem_free(uio->uio_dio.pages,
 577             uio->uio_dio.npages * sizeof (struct page *));
 578 }
 579
 580 /*
 581  * zfs_uio_iov_step() is just a modified version of the STEP function of Linux's
 582  * iov_iter_get_pages().
 583  */
 584 static int
 585 zfs_uio_iov_step(struct iovec v, zfs_uio_rw_t rw, zfs_uio_t *uio,
 586     long *numpages)
 587 {
 588         unsigned long addr = (unsigned long)(v.iov_base);
 589         size_t len = v.iov_len;
 590         unsigned long n = DIV_ROUND_UP(len, PAGE_SIZE);
 591
 592         /*
 593          * read returning FOLL_WRITE is due to the fact that we are stating
 594          * that the kernel will have write access to the user pages. So, when a
 595          * Direct I/O read request is issued, the kernel must write to the user
 596          * pages.
 597          */
 598         long res = get_user_pages_unlocked(
 599             P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n,
 600             &uio->uio_dio.pages[uio->uio_dio.npages],
 601             rw == UIO_READ ? FOLL_WRITE : 0);
 602         if (res < 0) {
 603                 return (SET_ERROR(-res));
 604         } else if (len != (res * PAGE_SIZE)) {
 605                 return (SET_ERROR(EFAULT));
 606         }
 607
 608         ASSERT3S(len, ==, res * PAGE_SIZE);
 609         *numpages = res;
 610         return (0);
 611 }
 612
 613 static int
 614 zfs_uio_get_dio_pages_iov(zfs_uio_t *uio, zfs_uio_rw_t rw)
 615 {
 616         const struct iovec *iovp = uio->uio_iov;
 617         size_t skip = uio->uio_skip;
 618         size_t len = uio->uio_resid - skip;
 619
 620         ASSERT(uio->uio_segflg != UIO_SYSSPACE);
 621
 622         for (int i = 0; i < uio->uio_iovcnt; i++) {
 623                 struct iovec iov;
 624                 long numpages = 0;
 625
 626                 if (iovp->iov_len == 0) {
 627                         iovp++;
 628                         skip = 0;
 629                         continue;
 630                 }
 631                 iov.iov_len = MIN(len, iovp->iov_len - skip);
 632                 iov.iov_base = iovp->iov_base + skip;
 633                 int error = zfs_uio_iov_step(iov, rw, uio, &numpages);
 634
 635                 if (error)
 636                         return (error);
 637
 638                 uio->uio_dio.npages += numpages;
 639                 len -= iov.iov_len;
 640                 skip = 0;
 641                 iovp++;
 642         }
 643
 644         ASSERT0(len);
 645
 646         return (0);
 647 }
 648
 649 #if defined(HAVE_VFS_IOV_ITER)
 650 static int
 651 zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
 652 {
 653         size_t skip = uio->uio_skip;
 654         size_t wanted = uio->uio_resid - uio->uio_skip;
 655         ssize_t rollback = 0;
 656         ssize_t cnt;
 657         unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE);
 658
 659         while (wanted) {
 660 #if defined(HAVE_IOV_ITER_GET_PAGES2)
 661                 cnt = iov_iter_get_pages2(uio->uio_iter,
 662                     &uio->uio_dio.pages[uio->uio_dio.npages],
 663                     wanted, maxpages, &skip);
 664 #else
 665                 cnt = iov_iter_get_pages(uio->uio_iter,
 666                     &uio->uio_dio.pages[uio->uio_dio.npages],
 667                     wanted, maxpages, &skip);
 668 #endif
 669                 if (cnt < 0) {
 670                         iov_iter_revert(uio->uio_iter, rollback);
 671                         return (SET_ERROR(-cnt));
 672                 }
 673                 uio->uio_dio.npages += DIV_ROUND_UP(cnt, PAGE_SIZE);
 674                 rollback += cnt;
 675                 wanted -= cnt;
 676                 skip = 0;
 677 #if !defined(HAVE_IOV_ITER_GET_PAGES2)
 678                 /*
 679                  * iov_iter_get_pages2() advances the iov_iter on success.
 680                  */
 681                 iov_iter_advance(uio->uio_iter, cnt);
 682 #endif
 683
 684         }
 685         ASSERT3U(rollback, ==, uio->uio_resid - uio->uio_skip);
 686         iov_iter_revert(uio->uio_iter, rollback);
 687
 688         return (0);
 689 }
 690 #endif /* HAVE_VFS_IOV_ITER */
 691
 692 /*
 693  * This function pins user pages. In the event that the user pages were not
 694  * successfully pinned an error value is returned.
 695  *
 696  * On success, 0 is returned.
 697  */
 698 int
 699 zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
 700 {
 701         int error = 0;
 702         long npages = DIV_ROUND_UP(uio->uio_resid, PAGE_SIZE);
 703         size_t size = npages * sizeof (struct page *);
 704
 705         if (uio->uio_segflg == UIO_USERSPACE) {
 706                 uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
 707                 error = zfs_uio_get_dio_pages_iov(uio, rw);
 708 #if defined(HAVE_VFS_IOV_ITER)
 709         } else if (uio->uio_segflg == UIO_ITER) {
 710                 uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
 711                 error = zfs_uio_get_dio_pages_iov_iter(uio, rw);
 712 #endif
 713         } else {
 714                 return (SET_ERROR(EOPNOTSUPP));
 715         }
 716
 717         ASSERT3S(uio->uio_dio.npages, >=, 0);
 718
 719         if (error) {
 720                 for (long i = 0; i < uio->uio_dio.npages; i++)
 721                         put_page(uio->uio_dio.pages[i]);
 722                 vmem_free(uio->uio_dio.pages, size);
 723                 return (error);
 724         } else {
 725                 ASSERT3S(uio->uio_dio.npages, ==, npages);
 726         }
 727
 728         if (rw == UIO_WRITE) {
 729                 zfs_uio_dio_check_for_zero_page(uio);
 730         }
 731
 732         uio->uio_extflg |= UIO_DIRECT;
 733
 734         return (0);
 735 }
 736
 737 #endif /* _KERNEL */