1 /* $NetBSD: ulfs_readwrite.c,v 1.7 2013/10/17 21:01:08 christos Exp $ */
2 /* from NetBSD: ufs_readwrite.c,v 1.105 2013/01/22 09:39:18 dholland Exp */
6 * The Regents of the University of California. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(1, "$NetBSD: ulfs_readwrite.c,v 1.7 2013/10/17 21:01:08 christos Exp $");
42 #define READ_S "lfs_read"
43 #define WRITE lfs_write
44 #define WRITE_S "lfs_write"
45 #define fs_bsize lfs_bsize
46 #define fs_bmask lfs_bmask
51 #define READ_S "ffs_read"
52 #define WRITE ffs_write
53 #define WRITE_S "ffs_write"
57 * Vnode op for reading.
63 struct vop_read_args
/* {
77 long size
, xfersize
, blkoffset
;
85 ioflag
= ap
->a_ioflag
;
89 if (uio
->uio_rw
!= UIO_READ
)
90 panic("%s: mode", READ_S
);
92 if (vp
->v_type
== VLNK
) {
93 if (ip
->i_size
< fs
->um_maxsymlinklen
||
94 (fs
->um_maxsymlinklen
== 0 && DIP(ip
, blocks
) == 0))
95 panic("%s: short symlink", READ_S
);
96 } else if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
)
97 panic("%s: type %d", READ_S
, vp
->v_type
);
99 if ((u_int64_t
)uio
->uio_offset
> fs
->um_maxfilesize
)
101 if (uio
->uio_resid
== 0)
104 #ifndef LFS_READWRITE
105 if ((ip
->i_flags
& (SF_SNAPSHOT
| SF_SNAPINVAL
)) == SF_SNAPSHOT
)
106 return ffs_snapshot_read(vp
, uio
, ioflag
);
107 #endif /* !LFS_READWRITE */
109 fstrans_start(vp
->v_mount
, FSTRANS_SHARED
);
111 if (uio
->uio_offset
>= ip
->i_size
)
115 usepc
= (vp
->v_type
== VREG
&& ip
->i_number
!= LFS_IFILE_INUM
);
116 #else /* !LFS_READWRITE */
117 usepc
= vp
->v_type
== VREG
;
118 #endif /* !LFS_READWRITE */
120 const int advice
= IO_ADV_DECODE(ap
->a_ioflag
);
122 while (uio
->uio_resid
> 0) {
123 if (ioflag
& IO_DIRECT
) {
124 genfs_directio(vp
, uio
, ioflag
);
126 bytelen
= MIN(ip
->i_size
- uio
->uio_offset
,
130 error
= ubc_uiomove(&vp
->v_uobj
, uio
, bytelen
, advice
,
131 UBC_READ
| UBC_PARTIALOK
| UBC_UNMAP_FLAG(vp
));
138 for (error
= 0, bp
= NULL
; uio
->uio_resid
> 0; bp
= NULL
) {
139 bytesinfile
= ip
->i_size
- uio
->uio_offset
;
140 if (bytesinfile
<= 0)
142 lbn
= lfs_lblkno(fs
, uio
->uio_offset
);
144 size
= lfs_blksize(fs
, ip
, lbn
);
145 blkoffset
= lfs_blkoff(fs
, uio
->uio_offset
);
146 xfersize
= MIN(MIN(fs
->fs_bsize
- blkoffset
, uio
->uio_resid
),
149 if (lfs_lblktosize(fs
, nextlbn
) >= ip
->i_size
)
150 error
= bread(vp
, lbn
, size
, NOCRED
, 0, &bp
);
152 int nextsize
= lfs_blksize(fs
, ip
, nextlbn
);
153 error
= breadn(vp
, lbn
,
154 size
, &nextlbn
, &nextsize
, 1, NOCRED
, 0, &bp
);
160 * We should only get non-zero b_resid when an I/O error
161 * has occurred, which should cause us to break above.
162 * However, if the short read did not cause an error,
163 * then we want to ensure that we do not uiomove bad
164 * or uninitialized data.
167 if (size
< xfersize
) {
172 error
= uiomove((char *)bp
->b_data
+ blkoffset
, xfersize
, uio
);
181 if (!(vp
->v_mount
->mnt_flag
& MNT_NOATIME
)) {
182 ip
->i_flag
|= IN_ACCESS
;
183 if ((ap
->a_ioflag
& IO_SYNC
) == IO_SYNC
) {
184 error
= lfs_update(vp
, NULL
, NULL
, UPDATE_WAIT
);
188 fstrans_done(vp
->v_mount
);
193 * Vnode op for writing.
198 struct vop_write_args
/* {
211 off_t osize
, origoff
, oldoff
, preallocoff
, endallocoff
, nsize
;
212 int blkoffset
, error
, flags
, ioflag
, resid
, size
, xfersize
;
219 bool need_unreserve
= false;
223 ioflag
= ap
->a_ioflag
;
228 KASSERT(vp
->v_size
== ip
->i_size
);
230 if (uio
->uio_rw
!= UIO_WRITE
)
231 panic("%s: mode", WRITE_S
);
234 switch (vp
->v_type
) {
236 if (ioflag
& IO_APPEND
)
237 uio
->uio_offset
= ip
->i_size
;
238 if ((ip
->i_flags
& APPEND
) && uio
->uio_offset
!= ip
->i_size
)
244 if ((ioflag
& IO_SYNC
) == 0)
245 panic("%s: nonsync dir write", WRITE_S
);
248 panic("%s: type", WRITE_S
);
252 if (uio
->uio_offset
< 0 ||
253 (u_int64_t
)uio
->uio_offset
+ uio
->uio_resid
> fs
->um_maxfilesize
)
256 /* Disallow writes to the Ifile, even if noschg flag is removed */
257 /* XXX can this go away when the Ifile is no longer in the namespace? */
258 if (vp
== fs
->lfs_ivnode
)
261 if (uio
->uio_resid
== 0)
264 fstrans_start(vp
->v_mount
, FSTRANS_SHARED
);
266 flags
= ioflag
& IO_SYNC
? B_SYNC
: 0;
267 async
= vp
->v_mount
->mnt_flag
& MNT_ASYNC
;
268 origoff
= uio
->uio_offset
;
269 resid
= uio
->uio_resid
;
273 usepc
= vp
->v_type
== VREG
;
277 lfs_availwait(fs
, lfs_btofsb(fs
, uio
->uio_resid
));
278 lfs_check(vp
, LFS_UNUSED_LBN
, 0);
279 #endif /* !LFS_READWRITE */
283 preallocoff
= round_page(lfs_blkroundup(fs
, MAX(osize
, uio
->uio_offset
)));
284 aflag
= ioflag
& IO_SYNC
? B_SYNC
: 0;
285 nsize
= MAX(osize
, uio
->uio_offset
+ uio
->uio_resid
);
286 endallocoff
= nsize
- lfs_blkoff(fs
, nsize
);
289 * if we're increasing the file size, deal with expanding
290 * the fragment if there is one.
293 if (nsize
> osize
&& lfs_lblkno(fs
, osize
) < ULFS_NDADDR
&&
294 lfs_lblkno(fs
, osize
) != lfs_lblkno(fs
, nsize
) &&
295 lfs_blkroundup(fs
, osize
) != osize
) {
298 eob
= lfs_blkroundup(fs
, osize
);
299 uvm_vnp_setwritesize(vp
, eob
);
300 error
= ulfs_balloc_range(vp
, osize
, eob
- osize
, cred
, aflag
);
303 if (flags
& B_SYNC
) {
304 mutex_enter(vp
->v_interlock
);
305 VOP_PUTPAGES(vp
, trunc_page(osize
& fs
->fs_bmask
),
307 PGO_CLEANIT
| PGO_SYNCIO
| PGO_JOURNALLOCKED
);
311 while (uio
->uio_resid
> 0) {
312 int ubc_flags
= UBC_WRITE
;
313 bool overwrite
; /* if we're overwrite a whole block */
316 if (ioflag
& IO_DIRECT
) {
317 genfs_directio(vp
, uio
, ioflag
| IO_JOURNALLOCKED
);
320 oldoff
= uio
->uio_offset
;
321 blkoffset
= lfs_blkoff(fs
, uio
->uio_offset
);
322 bytelen
= MIN(fs
->fs_bsize
- blkoffset
, uio
->uio_resid
);
328 * if we're filling in a hole, allocate the blocks now and
329 * initialize the pages first. if we're extending the file,
330 * we can safely allocate blocks without initializing pages
331 * since the new blocks will be inaccessible until the write
334 overwrite
= uio
->uio_offset
>= preallocoff
&&
335 uio
->uio_offset
< endallocoff
;
336 if (!overwrite
&& (vp
->v_vflag
& VV_MAPPED
) == 0 &&
337 lfs_blkoff(fs
, uio
->uio_offset
) == 0 &&
338 (uio
->uio_offset
& PAGE_MASK
) == 0) {
341 len
= trunc_page(bytelen
);
342 len
-= lfs_blkoff(fs
, len
);
349 newoff
= oldoff
+ bytelen
;
350 if (vp
->v_size
< newoff
) {
351 uvm_vnp_setwritesize(vp
, newoff
);
355 error
= ulfs_balloc_range(vp
, uio
->uio_offset
, bytelen
,
360 genfs_node_wrlock(vp
);
361 error
= GOP_ALLOC(vp
, uio
->uio_offset
, bytelen
,
363 genfs_node_unlock(vp
);
366 ubc_flags
|= UBC_FAULTBUSY
;
373 error
= ubc_uiomove(&vp
->v_uobj
, uio
, bytelen
,
374 IO_ADV_DECODE(ioflag
), ubc_flags
| UBC_UNMAP_FLAG(vp
));
377 * update UVM's notion of the size now that we've
378 * copied the data into the vnode's pages.
380 * we should update the size even when uiomove failed.
383 if (vp
->v_size
< newoff
) {
384 uvm_vnp_setsize(vp
, newoff
);
392 * flush what we just wrote if necessary.
393 * XXXUBC simplistic async flushing.
396 #ifndef LFS_READWRITE
397 if (!async
&& oldoff
>> 16 != uio
->uio_offset
>> 16) {
398 mutex_enter(vp
->v_interlock
);
399 error
= VOP_PUTPAGES(vp
, (oldoff
>> 16) << 16,
400 (uio
->uio_offset
>> 16) << 16,
401 PGO_CLEANIT
| PGO_JOURNALLOCKED
| PGO_LAZY
);
409 if (error
== 0 && ioflag
& IO_SYNC
) {
410 mutex_enter(vp
->v_interlock
);
411 error
= VOP_PUTPAGES(vp
, trunc_page(origoff
& fs
->fs_bmask
),
412 round_page(lfs_blkroundup(fs
, uio
->uio_offset
)),
413 PGO_CLEANIT
| PGO_SYNCIO
| PGO_JOURNALLOCKED
);
418 mutex_enter(vp
->v_interlock
);
419 VOP_PUTPAGES(vp
, trunc_page(origoff
), round_page(origoff
+ resid
),
420 PGO_CLEANIT
| PGO_FREE
| PGO_SYNCIO
| PGO_JOURNALLOCKED
);
421 while (uio
->uio_resid
> 0) {
422 lbn
= lfs_lblkno(fs
, uio
->uio_offset
);
423 blkoffset
= lfs_blkoff(fs
, uio
->uio_offset
);
424 xfersize
= MIN(fs
->fs_bsize
- blkoffset
, uio
->uio_resid
);
425 if (fs
->fs_bsize
> xfersize
)
431 error
= lfs_reserve(fs
, vp
, NULL
,
432 lfs_btofsb(fs
, (ULFS_NIADDR
+ 1) << fs
->lfs_bshift
));
435 need_unreserve
= true;
437 error
= lfs_balloc(vp
, uio
->uio_offset
, xfersize
,
438 ap
->a_cred
, flags
, &bp
);
442 if (uio
->uio_offset
+ xfersize
> ip
->i_size
) {
443 ip
->i_size
= uio
->uio_offset
+ xfersize
;
444 DIP_ASSIGN(ip
, size
, ip
->i_size
);
445 uvm_vnp_setsize(vp
, ip
->i_size
);
448 size
= lfs_blksize(fs
, ip
, lbn
) - bp
->b_resid
;
452 error
= uiomove((char *)bp
->b_data
+ blkoffset
, xfersize
, uio
);
455 * if we didn't clear the block and the uiomove failed,
456 * the buf will now contain part of some other file,
457 * so we need to invalidate it.
459 if (error
&& (flags
& B_CLRBUF
) == 0) {
460 brelse(bp
, BC_INVAL
);
464 (void)VOP_BWRITE(bp
->b_vp
, bp
);
465 lfs_reserve(fs
, vp
, NULL
,
466 -lfs_btofsb(fs
, (ULFS_NIADDR
+ 1) << fs
->lfs_bshift
));
467 need_unreserve
= false;
469 if (ioflag
& IO_SYNC
)
471 else if (xfersize
+ blkoffset
== fs
->fs_bsize
)
476 if (error
|| xfersize
== 0)
480 if (need_unreserve
) {
481 lfs_reserve(fs
, vp
, NULL
,
482 -lfs_btofsb(fs
, (ULFS_NIADDR
+ 1) << fs
->lfs_bshift
));
487 * If we successfully wrote any data, and we are not the superuser
488 * we clear the setuid and setgid bits as a precaution against
492 ip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
493 if (vp
->v_mount
->mnt_flag
& MNT_RELATIME
)
494 ip
->i_flag
|= IN_ACCESS
;
495 if (resid
> uio
->uio_resid
&& ap
->a_cred
) {
496 if (ip
->i_mode
& ISUID
) {
497 if (kauth_authorize_vnode(ap
->a_cred
,
498 KAUTH_VNODE_RETAIN_SUID
, vp
, NULL
, EPERM
) != 0) {
499 ip
->i_mode
&= ~ISUID
;
500 DIP_ASSIGN(ip
, mode
, ip
->i_mode
);
504 if (ip
->i_mode
& ISGID
) {
505 if (kauth_authorize_vnode(ap
->a_cred
,
506 KAUTH_VNODE_RETAIN_SGID
, vp
, NULL
, EPERM
) != 0) {
507 ip
->i_mode
&= ~ISGID
;
508 DIP_ASSIGN(ip
, mode
, ip
->i_mode
);
512 if (resid
> uio
->uio_resid
)
513 VN_KNOTE(vp
, NOTE_WRITE
| (extended
? NOTE_EXTEND
: 0));
515 (void) lfs_truncate(vp
, osize
, ioflag
& IO_SYNC
, ap
->a_cred
);
516 uio
->uio_offset
-= resid
- uio
->uio_resid
;
517 uio
->uio_resid
= resid
;
518 } else if (resid
> uio
->uio_resid
&& (ioflag
& IO_SYNC
) == IO_SYNC
) {
519 error
= lfs_update(vp
, NULL
, NULL
, UPDATE_WAIT
);
523 KASSERT(vp
->v_size
== ip
->i_size
);
524 fstrans_done(vp
->v_mount
);