4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * Portions of this source code were derived from Berkeley 4.3 BSD
31 * under license from the Regents of the University of California.
34 #include <sys/types.h>
35 #include <sys/t_lock.h>
36 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/sysmacros.h>
40 #include <sys/resource.h>
41 #include <sys/signal.h>
46 #include <sys/vnode.h>
50 #include <sys/fcntl.h>
51 #include <sys/flock.h>
57 #include <sys/pathname.h>
58 #include <sys/debug.h>
59 #include <sys/vmsystm.h>
60 #include <sys/cmn_err.h>
61 #include <sys/filio.h>
62 #include <sys/atomic.h>
64 #include <sys/fssnap_if.h>
65 #include <sys/fs/ufs_fs.h>
66 #include <sys/fs/ufs_lockfs.h>
67 #include <sys/fs/ufs_filio.h>
68 #include <sys/fs/ufs_inode.h>
69 #include <sys/fs/ufs_fsdir.h>
70 #include <sys/fs/ufs_quota.h>
71 #include <sys/fs/ufs_trans.h>
72 #include <sys/fs/ufs_panic.h>
73 #include <sys/dirent.h> /* must be AFTER <sys/fs/fsdir.h>! */
74 #include <sys/errno.h>
76 #include <sys/filio.h> /* _FIOIO */
83 #include <vm/seg_map.h>
84 #include <vm/seg_vn.h>
85 #include <vm/seg_kmem.h>
90 #include <sys/fs_subr.h>
92 static void *ufs_directio_zero_buf
;
93 static int ufs_directio_zero_len
= 8192;
95 int ufs_directio_enabled
= 1; /* feature is enabled */
100 struct ufs_directio_kstats
{
101 kstat_named_t logical_reads
;
102 kstat_named_t phys_reads
;
103 kstat_named_t hole_reads
;
105 kstat_named_t logical_writes
;
106 kstat_named_t phys_writes
;
107 kstat_named_t nwritten
;
108 kstat_named_t nflushes
;
109 } ufs_directio_kstats
= {
110 { "logical_reads", KSTAT_DATA_UINT64
},
111 { "phys_reads", KSTAT_DATA_UINT64
},
112 { "hole_reads", KSTAT_DATA_UINT64
},
113 { "nread", KSTAT_DATA_UINT64
},
114 { "logical_writes", KSTAT_DATA_UINT64
},
115 { "phys_writes", KSTAT_DATA_UINT64
},
116 { "nwritten", KSTAT_DATA_UINT64
},
117 { "nflushes", KSTAT_DATA_UINT64
},
120 kstat_t
*ufs_directio_kstatsp
;
123 * use kmem_cache_create for direct-physio buffers. This has shown
124 * a better cache distribution compared to buffers on the
125 * stack. It also avoids semaphore construction/deconstruction
128 struct directio_buf
{
129 struct directio_buf
*next
;
134 static struct kmem_cache
*directio_buf_cache
;
139 directio_buf_constructor(void *dbp
, void *cdrarg
, int kmflags
)
141 bioinit((struct buf
*)&((struct directio_buf
*)dbp
)->buf
);
147 directio_buf_destructor(void *dbp
, void *cdrarg
)
149 biofini((struct buf
*)&((struct directio_buf
*)dbp
)->buf
);
153 directio_bufs_init(void)
155 directio_buf_cache
= kmem_cache_create("directio_buf_cache",
156 sizeof (struct directio_buf
), 0,
157 directio_buf_constructor
, directio_buf_destructor
,
158 NULL
, NULL
, NULL
, 0);
162 ufs_directio_init(void)
167 ufs_directio_kstatsp
= kstat_create("ufs", 0,
168 "directio", "ufs", KSTAT_TYPE_NAMED
,
169 sizeof (ufs_directio_kstats
) / sizeof (kstat_named_t
),
170 KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_WRITABLE
);
171 if (ufs_directio_kstatsp
) {
172 ufs_directio_kstatsp
->ks_data
= (void *)&ufs_directio_kstats
;
173 kstat_install(ufs_directio_kstatsp
);
176 * kzero is broken so we have to use a private buf of zeroes
178 ufs_directio_zero_buf
= kmem_zalloc(ufs_directio_zero_len
, KM_SLEEP
);
179 directio_bufs_init();
183 * Wait for the first direct IO operation to finish
186 directio_wait_one(struct directio_buf
*dbp
, long *bytes_iop
)
192 * Wait for IO to finish
198 * bytes_io will be used to figure out a resid
199 * for the caller. The resid is approximated by reporting
200 * the bytes following the first failed IO as the residual.
202 * I am cautious about using b_resid because I
203 * am not sure how well the disk drivers maintain it.
207 *bytes_iop
= bp
->b_bcount
- bp
->b_resid
;
211 *bytes_iop
+= bp
->b_bcount
;
213 * Release direct IO resources
215 bp
->b_flags
&= ~(B_BUSY
|B_WANTED
|B_PHYS
|B_SHADOW
);
216 kmem_cache_free(directio_buf_cache
, dbp
);
221 * Wait for all of the direct IO operations to finish
224 uint32_t ufs_directio_drop_kpri
= 0; /* enable kpri hack */
227 directio_wait(struct directio_buf
*tail
, long *bytes_iop
)
229 int error
= 0, newerror
;
230 struct directio_buf
*dbp
;
231 uint_t kpri_req_save
;
234 * The linked list of directio buf structures is maintained
235 * in reverse order (tail->last request->penultimate request->...)
238 * This is the k_pri_req hack. Large numbers of threads
239 * sleeping with kernel priority will cause scheduler thrashing
240 * on an MP machine. This can be seen running Oracle using
241 * directio to ufs files. Sleep at normal priority here to
242 * more closely mimic physio to a device partition. This
243 * workaround is disabled by default as a niced thread could
244 * be starved from running while holding i_rwlock and i_contents.
246 if (ufs_directio_drop_kpri
) {
247 kpri_req_save
= curthread
->t_kpri_req
;
248 curthread
->t_kpri_req
= 0;
250 while ((dbp
= tail
) != NULL
) {
252 newerror
= directio_wait_one(dbp
, bytes_iop
);
256 if (ufs_directio_drop_kpri
)
257 curthread
->t_kpri_req
= kpri_req_save
;
261 * Initiate direct IO request
264 directio_start(struct ufsvfs
*ufsvfsp
, struct inode
*ip
, size_t nbytes
,
265 offset_t offset
, char *addr
, enum seg_rw rw
, struct proc
*procp
,
266 struct directio_buf
**tailp
, page_t
**pplist
)
269 struct directio_buf
*dbp
;
272 * Allocate a directio buf header
273 * Note - list is maintained in reverse order.
274 * directio_wait_one() depends on this fact when
275 * adjusting the ``bytes_io'' param. bytes_io
276 * is used to compute a residual in the case of error.
278 dbp
= kmem_cache_alloc(directio_buf_cache
, KM_SLEEP
);
283 * Initialize buf header
286 dbp
->nbytes
= nbytes
;
288 bp
->b_edev
= ip
->i_dev
;
289 bp
->b_lblkno
= btodt(offset
);
290 bp
->b_bcount
= nbytes
;
291 bp
->b_un
.b_addr
= addr
;
293 bp
->b_file
= ip
->i_vnode
;
296 * Note that S_WRITE implies B_READ and vice versa: a read(2)
297 * will B_READ data from the filesystem and S_WRITE it into
298 * the user's buffer; a write(2) will S_READ data from the
299 * user's buffer and B_WRITE it to the filesystem.
302 bp
->b_flags
= B_BUSY
| B_PHYS
| B_READ
;
303 ufs_directio_kstats
.phys_reads
.value
.ui64
++;
304 ufs_directio_kstats
.nread
.value
.ui64
+= nbytes
;
306 bp
->b_flags
= B_BUSY
| B_PHYS
| B_WRITE
;
307 ufs_directio_kstats
.phys_writes
.value
.ui64
++;
308 ufs_directio_kstats
.nwritten
.value
.ui64
+= nbytes
;
310 bp
->b_shadow
= pplist
;
312 bp
->b_flags
|= B_SHADOW
;
317 ufsvfsp
->vfs_iotstamp
= ddi_get_lbolt();
318 if (ufsvfsp
->vfs_snapshot
)
319 fssnap_strategy(&ufsvfsp
->vfs_snapshot
, bp
);
321 (void) bdev_strategy(bp
);
324 lwp_stat_update(LWP_STAT_OUBLK
, 1);
326 lwp_stat_update(LWP_STAT_INBLK
, 1);
330 uint32_t ufs_shared_writes
; /* writes done w/ lock shared */
331 uint32_t ufs_cur_writes
; /* # concurrent writes */
332 uint32_t ufs_maxcur_writes
; /* high water concurrent writes */
333 uint32_t ufs_posix_hits
; /* writes done /w lock excl. */
336 * Force POSIX syncronous data integrity on all writes for testing.
338 uint32_t ufs_force_posix_sdi
= 0;
345 ufs_directio_write(struct inode
*ip
, uio_t
*arg_uio
, int ioflag
, int rewrite
,
346 cred_t
*cr
, int *statusp
)
348 long resid
, bytes_written
;
350 uio_t
*uio
= arg_uio
;
351 rlim64_t limit
= uio
->uio_llimit
;
352 int on
, n
, error
, newerror
, len
, has_holes
;
358 struct ufsvfs
*ufsvfsp
= ip
->i_ufsvfs
;
361 struct directio_buf
*tail
;
362 int exclusive
, ncur
, bmap_peek
;
369 * assume that directio isn't possible (normal case)
371 *statusp
= DIRECTIO_FAILURE
;
376 if (ufs_directio_enabled
== 0)
380 * mapped file; nevermind
386 * CAN WE DO DIRECT IO?
388 uoff
= uio
->uio_loffset
;
389 resid
= uio
->uio_resid
;
394 if (uoff
+ resid
> limit
)
398 * must be sector aligned
400 if ((uoff
& (uoff_t
)(DEV_BSIZE
- 1)) || (resid
& (DEV_BSIZE
- 1)))
404 * SHOULD WE DO DIRECT IO?
410 * only on regular files; no metadata
412 if (((ip
->i_mode
& IFMT
) != IFREG
) || ip
->i_ufsvfs
->vfs_qinod
== ip
)
416 * Synchronous, allocating writes run very slow in Direct-Mode
417 * XXX - can be fixed with bmap_write changes for large writes!!!
418 * XXX - can be fixed for updates to "almost-full" files
419 * XXX - WARNING - system hangs if bmap_write() has to
420 * allocate lots of pages since pageout
421 * suspends on locked inode
423 if (!rewrite
&& (ip
->i_flag
& ISYNC
)) {
424 if ((uoff
+ resid
) > size
)
426 has_holes
= bmap_has_holes(ip
);
432 * Each iovec must be short aligned and sector aligned. If
433 * one is not, then kmem_alloc a new buffer and copy all of
434 * the smaller buffers into the new buffer. This new
435 * buffer will be short aligned and sector aligned.
438 nbytes
= uio
->uio_iovcnt
;
440 if (((uint_t
)iov
->iov_len
& (DEV_BSIZE
- 1)) != 0 ||
441 (intptr_t)(iov
->iov_base
) & 1) {
442 copy_resid
= uio
->uio_resid
;
443 copy_base
= kmem_alloc(copy_resid
, KM_NOSLEEP
);
444 if (copy_base
== NULL
)
446 copy_iov
.iov_base
= copy_base
;
447 copy_iov
.iov_len
= copy_resid
;
448 copy_uio
.uio_iov
= ©_iov
;
449 copy_uio
.uio_iovcnt
= 1;
450 copy_uio
.uio_segflg
= UIO_SYSSPACE
;
451 copy_uio
.uio_extflg
= UIO_COPY_DEFAULT
;
452 copy_uio
.uio_loffset
= uio
->uio_loffset
;
453 copy_uio
.uio_resid
= uio
->uio_resid
;
454 copy_uio
.uio_llimit
= uio
->uio_llimit
;
455 error
= uiomove(copy_base
, copy_resid
, UIO_WRITE
, uio
);
457 kmem_free(copy_base
, copy_resid
);
467 * From here on down, all error exits must go to errout and
468 * not simply return a 0.
478 * POSIX check. If attempting a concurrent re-write, make sure
479 * that this will be a single request to the driver to meet
480 * POSIX synchronous data integrity requirements.
483 if (rewrite
&& ((ioflag
& FDSYNC
) || ufs_force_posix_sdi
)) {
486 /* check easy conditions first */
487 if (uio
->uio_iovcnt
!= 1 || resid
> ufsvfsp
->vfs_ioclustsz
) {
490 /* now look for contiguous allocation */
491 len
= (ssize_t
)blkroundup(fs
, resid
);
492 error
= bmap_read(ip
, uoff
, &bn
, &len
);
493 if (error
|| bn
== UFS_HOLE
|| len
== 0)
495 /* save a call to bmap_read later */
501 rw_exit(&ip
->i_contents
);
502 rw_enter(&ip
->i_contents
, RW_WRITER
);
513 * If attempting a re-write, there is no allocation to do.
514 * bmap_write would trip an ASSERT if i_contents is held shared.
520 on
= (int)blkoff(fs
, uoff
);
521 n
= (int)MIN(fs
->fs_bsize
- on
, resid
);
522 if ((uoff
+ n
) > ip
->i_size
) {
523 error
= bmap_write(ip
, uoff
, (int)(on
+ n
),
524 (int)(uoff
& (offset_t
)MAXBOFFSET
) == 0,
526 /* Caller is responsible for updating i_seq if needed */
529 ip
->i_size
= uoff
+ n
;
530 ip
->i_flag
|= IATTCHG
;
531 } else if (n
== MAXBSIZE
) {
532 error
= bmap_write(ip
, uoff
, (int)(on
+ n
),
533 BI_ALLOC_ONLY
, NULL
, cr
);
534 /* Caller is responsible for updating i_seq if needed */
537 has_holes
= bmap_has_holes(ip
);
542 offset
= uoff
& (offset_t
)fs
->fs_bmask
;
543 blk_size
= (int)blksize(fs
, ip
,
544 (daddr_t
)lblkno(fs
, offset
));
545 error
= bmap_write(ip
, uoff
, blk_size
,
546 BI_NORMAL
, NULL
, cr
);
548 * Caller is responsible for updating
559 * if file has grown larger than 2GB, set flag
560 * in superblock if not already set
562 if ((ip
->i_size
> MAXOFF32_T
) &&
563 !(fs
->fs_flags
& FSLARGEFILES
)) {
564 ASSERT(ufsvfsp
->vfs_lfflags
& UFS_LARGEFILES
);
565 mutex_enter(&ufsvfsp
->vfs_lock
);
566 fs
->fs_flags
|= FSLARGEFILES
;
567 ufs_sbwrite(ufsvfsp
);
568 mutex_exit(&ufsvfsp
->vfs_lock
);
574 * restore original state
577 if (size
== ip
->i_size
)
579 (void) ufs_itrunc(ip
, size
, 0, cr
);
582 * try non-directio path
589 * get rid of cached pages
592 exclusive
= rw_write_held(&ip
->i_contents
);
593 if (vn_has_cached_data(vp
)) {
596 * Still holding i_rwlock, so no allocations
597 * can happen after dropping contents.
599 rw_exit(&ip
->i_contents
);
600 rw_enter(&ip
->i_contents
, RW_WRITER
);
602 (void) fop_putpage(vp
, (offset_t
)0, (size_t)0,
604 if (vn_has_cached_data(vp
))
607 rw_downgrade(&ip
->i_contents
);
608 ufs_directio_kstats
.nflushes
.value
.ui64
++;
617 ncur
= atomic_inc_32_nv(&ufs_cur_writes
);
618 if (ncur
> ufs_maxcur_writes
)
619 ufs_maxcur_writes
= ncur
;
623 * proc and as are for VM operations in directio_start()
625 if (uio
->uio_segflg
== UIO_USERSPACE
) {
626 procp
= ttoproc(curthread
);
632 *statusp
= DIRECTIO_SUCCESS
;
635 resid
= uio
->uio_resid
;
637 ufs_directio_kstats
.logical_writes
.value
.ui64
++;
638 while (error
== 0 && newerror
== 0 && resid
&& uio
->uio_iovcnt
) {
639 size_t pglck_len
, pglck_size
;
641 page_t
**pplist
, **spplist
;
646 * Adjust number of bytes
649 pglck_len
= (size_t)MIN(iov
->iov_len
, resid
);
650 pglck_base
= iov
->iov_base
;
651 if (pglck_len
== 0) {
658 * Try to Lock down the largest chunck of pages possible.
660 pglck_len
= (size_t)MIN(pglck_len
, ufsvfsp
->vfs_ioclustsz
);
661 error
= as_pagelock(as
, &pplist
, pglck_base
, pglck_len
, S_READ
);
666 pglck_size
= pglck_len
;
670 uoff
= uio
->uio_loffset
;
675 * Re-adjust number of bytes to contiguous
676 * range. May have already called bmap_read
677 * in the case of a concurrent rewrite.
679 len
= (ssize_t
)blkroundup(fs
, nbytes
);
680 error
= bmap_read(ip
, uoff
, &bn
, &len
);
683 if (bn
== UFS_HOLE
|| len
== 0)
686 nbytes
= (size_t)MIN(nbytes
, len
);
690 * Get the pagelist pointer for this offset to be
691 * passed to directio_start.
696 btop((uintptr_t)iov
->iov_base
-
697 ((uintptr_t)pglck_base
& PAGEMASK
));
702 * Kick off the direct write requests
704 directio_start(ufsvfsp
, ip
, nbytes
, ldbtob(bn
),
705 iov
->iov_base
, S_READ
, procp
, &tail
, spplist
);
708 * Adjust pointers and counters
710 iov
->iov_len
-= nbytes
;
711 iov
->iov_base
+= nbytes
;
712 uio
->uio_loffset
+= nbytes
;
718 * Wait for outstanding requests
720 newerror
= directio_wait(tail
, &bytes_written
);
723 * Release VM resources
725 as_pageunlock(as
, pplist
, pglck_base
, pglck_size
, S_READ
);
730 atomic_dec_32(&ufs_cur_writes
);
732 * If this write was done shared, readers may
733 * have pulled in unmodified pages. Get rid of
734 * these potentially stale pages.
736 if (vn_has_cached_data(vp
)) {
737 rw_exit(&ip
->i_contents
);
738 rw_enter(&ip
->i_contents
, RW_WRITER
);
739 (void) fop_putpage(vp
, (offset_t
)0, (size_t)0,
741 ufs_directio_kstats
.nflushes
.value
.ui64
++;
742 rw_downgrade(&ip
->i_contents
);
747 * If error, adjust resid to begin at the first
753 resid
= uio
->uio_resid
- bytes_written
;
754 arg_uio
->uio_resid
= resid
;
757 ip
->i_flag
|= IUPD
| ICHG
;
758 /* Caller will update i_seq */
759 TRANS_INODE(ip
->i_ufsvfs
, ip
);
762 * If there is a residual; adjust the EOF if necessary
765 if (size
!= ip
->i_size
) {
766 if (uio
->uio_loffset
> size
)
767 size
= uio
->uio_loffset
;
768 (void) ufs_itrunc(ip
, size
, 0, cr
);
772 if (uio
== ©_uio
)
773 kmem_free(copy_base
, copy_resid
);
778 if (uio
== ©_uio
)
779 kmem_free(copy_base
, copy_resid
);
784 * Direct read of a hole
787 directio_hole(struct uio
*uio
, size_t nbytes
)
789 int error
= 0, nzero
;
793 ufs_directio_kstats
.hole_reads
.value
.ui64
++;
794 ufs_directio_kstats
.nread
.value
.ui64
+= nbytes
;
796 phys_iov
.iov_base
= uio
->uio_iov
->iov_base
;
797 phys_iov
.iov_len
= nbytes
;
799 phys_uio
.uio_iov
= &phys_iov
;
800 phys_uio
.uio_iovcnt
= 1;
801 phys_uio
.uio_resid
= phys_iov
.iov_len
;
802 phys_uio
.uio_segflg
= uio
->uio_segflg
;
803 phys_uio
.uio_extflg
= uio
->uio_extflg
;
804 while (error
== 0 && phys_uio
.uio_resid
) {
805 nzero
= (int)MIN(phys_iov
.iov_len
, ufs_directio_zero_len
);
806 error
= uiomove(ufs_directio_zero_buf
, nzero
, UIO_READ
,
816 ufs_directio_read(struct inode
*ip
, uio_t
*uio
, cred_t
*cr
, int *statusp
)
818 ssize_t resid
, bytes_read
;
820 int error
, newerror
, len
;
826 struct ufsvfs
*ufsvfsp
= ip
->i_ufsvfs
;
829 struct directio_buf
*tail
;
832 * assume that directio isn't possible (normal case)
834 *statusp
= DIRECTIO_FAILURE
;
839 if (ufs_directio_enabled
== 0)
843 * mapped file; nevermind
849 * CAN WE DO DIRECT IO?
852 * must be sector aligned
854 uoff
= uio
->uio_loffset
;
855 resid
= uio
->uio_resid
;
856 if ((uoff
& (uoff_t
)(DEV_BSIZE
- 1)) || (resid
& (DEV_BSIZE
- 1)))
859 * must be short aligned and sector aligned
862 nbytes
= uio
->uio_iovcnt
;
864 if (((size_t)iov
->iov_len
& (DEV_BSIZE
- 1)) != 0)
866 if ((intptr_t)(iov
++->iov_base
) & 1)
876 * don't read past EOF
881 * The file offset is past EOF so bail out here; we don't want
882 * to update uio_resid and make it look like we read something.
883 * We say that direct I/O was a success to avoid having rdip()
884 * go through the same "read past EOF logic".
887 *statusp
= DIRECTIO_SUCCESS
;
892 * The read would extend past EOF so make it smaller.
894 if ((uoff
+ resid
) > size
) {
897 * recheck sector alignment
899 if (resid
& (DEV_BSIZE
- 1))
904 * At this point, we know there is some real work to do.
909 * get rid of cached pages
912 if (vn_has_cached_data(vp
)) {
913 rw_exit(&ip
->i_contents
);
914 rw_enter(&ip
->i_contents
, RW_WRITER
);
915 (void) fop_putpage(vp
, (offset_t
)0, (size_t)0,
917 if (vn_has_cached_data(vp
))
919 rw_downgrade(&ip
->i_contents
);
920 ufs_directio_kstats
.nflushes
.value
.ui64
++;
927 * proc and as are for VM operations in directio_start()
929 if (uio
->uio_segflg
== UIO_USERSPACE
) {
930 procp
= ttoproc(curthread
);
937 *statusp
= DIRECTIO_SUCCESS
;
941 ufs_directio_kstats
.logical_reads
.value
.ui64
++;
942 while (error
== 0 && newerror
== 0 && resid
&& uio
->uio_iovcnt
) {
943 size_t pglck_len
, pglck_size
;
945 page_t
**pplist
, **spplist
;
950 * Adjust number of bytes
953 pglck_len
= (size_t)MIN(iov
->iov_len
, resid
);
954 pglck_base
= iov
->iov_base
;
955 if (pglck_len
== 0) {
962 * Try to Lock down the largest chunck of pages possible.
964 pglck_len
= (size_t)MIN(pglck_len
, ufsvfsp
->vfs_ioclustsz
);
965 error
= as_pagelock(as
, &pplist
, pglck_base
,
971 pglck_size
= pglck_len
;
975 uoff
= uio
->uio_loffset
;
978 * Re-adjust number of bytes to contiguous range
980 len
= (ssize_t
)blkroundup(fs
, nbytes
);
981 error
= bmap_read(ip
, uoff
, &bn
, &len
);
985 if (bn
== UFS_HOLE
) {
986 nbytes
= (size_t)MIN(fs
->fs_bsize
-
987 (long)blkoff(fs
, uoff
), nbytes
);
988 error
= directio_hole(uio
, nbytes
);
990 * Hole reads are not added to the list
991 * processed by directio_wait() below so
992 * account for bytes read here.
995 bytes_read
+= nbytes
;
997 nbytes
= (size_t)MIN(nbytes
, len
);
1000 * Get the pagelist pointer for this offset
1001 * to be passed to directio_start.
1005 btop((uintptr_t)iov
->iov_base
-
1006 ((uintptr_t)pglck_base
& PAGEMASK
));
1011 * Kick off the direct read requests
1013 directio_start(ufsvfsp
, ip
, nbytes
,
1014 ldbtob(bn
), iov
->iov_base
,
1015 S_WRITE
, procp
, &tail
, spplist
);
1022 * Adjust pointers and counters
1024 iov
->iov_len
-= nbytes
;
1025 iov
->iov_base
+= nbytes
;
1026 uio
->uio_loffset
+= nbytes
;
1028 pglck_len
-= nbytes
;
1032 * Wait for outstanding requests
1034 newerror
= directio_wait(tail
, &bytes_read
);
1036 * Release VM resources
1038 as_pageunlock(as
, pplist
, pglck_base
, pglck_size
, S_WRITE
);
1043 * If error, adjust resid to begin at the first
1048 uio
->uio_resid
-= bytes_read
;