custom message type for VM_INFO
[minix3.git] / sys / ufs / lfs / ulfs_readwrite.c
blob80d9eb73fd56a3529ebd1434a813a65bf01ad500
1 /* $NetBSD: ulfs_readwrite.c,v 1.7 2013/10/17 21:01:08 christos Exp $ */
2 /* from NetBSD: ufs_readwrite.c,v 1.105 2013/01/22 09:39:18 dholland Exp */
4 /*-
5 * Copyright (c) 1993
6 * The Regents of the University of California. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
32 * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(1, "$NetBSD: ulfs_readwrite.c,v 1.7 2013/10/17 21:01:08 christos Exp $");
38 #ifdef LFS_READWRITE
39 #define FS struct lfs
40 #define I_FS i_lfs
41 #define READ lfs_read
42 #define READ_S "lfs_read"
43 #define WRITE lfs_write
44 #define WRITE_S "lfs_write"
45 #define fs_bsize lfs_bsize
46 #define fs_bmask lfs_bmask
47 #else
48 #define FS struct fs
49 #define I_FS i_fs
50 #define READ ffs_read
51 #define READ_S "ffs_read"
52 #define WRITE ffs_write
53 #define WRITE_S "ffs_write"
54 #endif
57 * Vnode op for reading.
59 /* ARGSUSED */
60 int
61 READ(void *v)
63 struct vop_read_args /* {
64 struct vnode *a_vp;
65 struct uio *a_uio;
66 int a_ioflag;
67 kauth_cred_t a_cred;
68 } */ *ap = v;
69 struct vnode *vp;
70 struct inode *ip;
71 struct uio *uio;
72 struct buf *bp;
73 FS *fs;
74 vsize_t bytelen;
75 daddr_t lbn, nextlbn;
76 off_t bytesinfile;
77 long size, xfersize, blkoffset;
78 int error, ioflag;
79 bool usepc = false;
81 vp = ap->a_vp;
82 ip = VTOI(vp);
83 fs = ip->I_FS;
84 uio = ap->a_uio;
85 ioflag = ap->a_ioflag;
86 error = 0;
88 #ifdef DIAGNOSTIC
89 if (uio->uio_rw != UIO_READ)
90 panic("%s: mode", READ_S);
92 if (vp->v_type == VLNK) {
93 if (ip->i_size < fs->um_maxsymlinklen ||
94 (fs->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0))
95 panic("%s: short symlink", READ_S);
96 } else if (vp->v_type != VREG && vp->v_type != VDIR)
97 panic("%s: type %d", READ_S, vp->v_type);
98 #endif
99 if ((u_int64_t)uio->uio_offset > fs->um_maxfilesize)
100 return (EFBIG);
101 if (uio->uio_resid == 0)
102 return (0);
104 #ifndef LFS_READWRITE
105 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT)
106 return ffs_snapshot_read(vp, uio, ioflag);
107 #endif /* !LFS_READWRITE */
109 fstrans_start(vp->v_mount, FSTRANS_SHARED);
111 if (uio->uio_offset >= ip->i_size)
112 goto out;
114 #ifdef LFS_READWRITE
115 usepc = (vp->v_type == VREG && ip->i_number != LFS_IFILE_INUM);
116 #else /* !LFS_READWRITE */
117 usepc = vp->v_type == VREG;
118 #endif /* !LFS_READWRITE */
119 if (usepc) {
120 const int advice = IO_ADV_DECODE(ap->a_ioflag);
122 while (uio->uio_resid > 0) {
123 if (ioflag & IO_DIRECT) {
124 genfs_directio(vp, uio, ioflag);
126 bytelen = MIN(ip->i_size - uio->uio_offset,
127 uio->uio_resid);
128 if (bytelen == 0)
129 break;
130 error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice,
131 UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
132 if (error)
133 break;
135 goto out;
138 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
139 bytesinfile = ip->i_size - uio->uio_offset;
140 if (bytesinfile <= 0)
141 break;
142 lbn = lfs_lblkno(fs, uio->uio_offset);
143 nextlbn = lbn + 1;
144 size = lfs_blksize(fs, ip, lbn);
145 blkoffset = lfs_blkoff(fs, uio->uio_offset);
146 xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid),
147 bytesinfile);
149 if (lfs_lblktosize(fs, nextlbn) >= ip->i_size)
150 error = bread(vp, lbn, size, NOCRED, 0, &bp);
151 else {
152 int nextsize = lfs_blksize(fs, ip, nextlbn);
153 error = breadn(vp, lbn,
154 size, &nextlbn, &nextsize, 1, NOCRED, 0, &bp);
156 if (error)
157 break;
160 * We should only get non-zero b_resid when an I/O error
161 * has occurred, which should cause us to break above.
162 * However, if the short read did not cause an error,
163 * then we want to ensure that we do not uiomove bad
164 * or uninitialized data.
166 size -= bp->b_resid;
167 if (size < xfersize) {
168 if (size == 0)
169 break;
170 xfersize = size;
172 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
173 if (error)
174 break;
175 brelse(bp, 0);
177 if (bp != NULL)
178 brelse(bp, 0);
180 out:
181 if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
182 ip->i_flag |= IN_ACCESS;
183 if ((ap->a_ioflag & IO_SYNC) == IO_SYNC) {
184 error = lfs_update(vp, NULL, NULL, UPDATE_WAIT);
188 fstrans_done(vp->v_mount);
189 return (error);
193 * Vnode op for writing.
196 WRITE(void *v)
198 struct vop_write_args /* {
199 struct vnode *a_vp;
200 struct uio *a_uio;
201 int a_ioflag;
202 kauth_cred_t a_cred;
203 } */ *ap = v;
204 struct vnode *vp;
205 struct uio *uio;
206 struct inode *ip;
207 FS *fs;
208 struct buf *bp;
209 kauth_cred_t cred;
210 daddr_t lbn;
211 off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize;
212 int blkoffset, error, flags, ioflag, resid, size, xfersize;
213 int aflag;
214 int extended=0;
215 vsize_t bytelen;
216 bool async;
217 bool usepc = false;
218 #ifdef LFS_READWRITE
219 bool need_unreserve = false;
220 #endif
222 cred = ap->a_cred;
223 ioflag = ap->a_ioflag;
224 uio = ap->a_uio;
225 vp = ap->a_vp;
226 ip = VTOI(vp);
228 KASSERT(vp->v_size == ip->i_size);
229 #ifdef DIAGNOSTIC
230 if (uio->uio_rw != UIO_WRITE)
231 panic("%s: mode", WRITE_S);
232 #endif
234 switch (vp->v_type) {
235 case VREG:
236 if (ioflag & IO_APPEND)
237 uio->uio_offset = ip->i_size;
238 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
239 return (EPERM);
240 /* FALLTHROUGH */
241 case VLNK:
242 break;
243 case VDIR:
244 if ((ioflag & IO_SYNC) == 0)
245 panic("%s: nonsync dir write", WRITE_S);
246 break;
247 default:
248 panic("%s: type", WRITE_S);
251 fs = ip->I_FS;
252 if (uio->uio_offset < 0 ||
253 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->um_maxfilesize)
254 return (EFBIG);
255 #ifdef LFS_READWRITE
256 /* Disallow writes to the Ifile, even if noschg flag is removed */
257 /* XXX can this go away when the Ifile is no longer in the namespace? */
258 if (vp == fs->lfs_ivnode)
259 return (EPERM);
260 #endif
261 if (uio->uio_resid == 0)
262 return (0);
264 fstrans_start(vp->v_mount, FSTRANS_SHARED);
266 flags = ioflag & IO_SYNC ? B_SYNC : 0;
267 async = vp->v_mount->mnt_flag & MNT_ASYNC;
268 origoff = uio->uio_offset;
269 resid = uio->uio_resid;
270 osize = ip->i_size;
271 error = 0;
273 usepc = vp->v_type == VREG;
275 #ifdef LFS_READWRITE
276 async = true;
277 lfs_availwait(fs, lfs_btofsb(fs, uio->uio_resid));
278 lfs_check(vp, LFS_UNUSED_LBN, 0);
279 #endif /* !LFS_READWRITE */
280 if (!usepc)
281 goto bcache;
283 preallocoff = round_page(lfs_blkroundup(fs, MAX(osize, uio->uio_offset)));
284 aflag = ioflag & IO_SYNC ? B_SYNC : 0;
285 nsize = MAX(osize, uio->uio_offset + uio->uio_resid);
286 endallocoff = nsize - lfs_blkoff(fs, nsize);
289 * if we're increasing the file size, deal with expanding
290 * the fragment if there is one.
293 if (nsize > osize && lfs_lblkno(fs, osize) < ULFS_NDADDR &&
294 lfs_lblkno(fs, osize) != lfs_lblkno(fs, nsize) &&
295 lfs_blkroundup(fs, osize) != osize) {
296 off_t eob;
298 eob = lfs_blkroundup(fs, osize);
299 uvm_vnp_setwritesize(vp, eob);
300 error = ulfs_balloc_range(vp, osize, eob - osize, cred, aflag);
301 if (error)
302 goto out;
303 if (flags & B_SYNC) {
304 mutex_enter(vp->v_interlock);
305 VOP_PUTPAGES(vp, trunc_page(osize & fs->fs_bmask),
306 round_page(eob),
307 PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED);
311 while (uio->uio_resid > 0) {
312 int ubc_flags = UBC_WRITE;
313 bool overwrite; /* if we're overwrite a whole block */
314 off_t newoff;
316 if (ioflag & IO_DIRECT) {
317 genfs_directio(vp, uio, ioflag | IO_JOURNALLOCKED);
320 oldoff = uio->uio_offset;
321 blkoffset = lfs_blkoff(fs, uio->uio_offset);
322 bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
323 if (bytelen == 0) {
324 break;
328 * if we're filling in a hole, allocate the blocks now and
329 * initialize the pages first. if we're extending the file,
330 * we can safely allocate blocks without initializing pages
331 * since the new blocks will be inaccessible until the write
332 * is complete.
334 overwrite = uio->uio_offset >= preallocoff &&
335 uio->uio_offset < endallocoff;
336 if (!overwrite && (vp->v_vflag & VV_MAPPED) == 0 &&
337 lfs_blkoff(fs, uio->uio_offset) == 0 &&
338 (uio->uio_offset & PAGE_MASK) == 0) {
339 vsize_t len;
341 len = trunc_page(bytelen);
342 len -= lfs_blkoff(fs, len);
343 if (len > 0) {
344 overwrite = true;
345 bytelen = len;
349 newoff = oldoff + bytelen;
350 if (vp->v_size < newoff) {
351 uvm_vnp_setwritesize(vp, newoff);
354 if (!overwrite) {
355 error = ulfs_balloc_range(vp, uio->uio_offset, bytelen,
356 cred, aflag);
357 if (error)
358 break;
359 } else {
360 genfs_node_wrlock(vp);
361 error = GOP_ALLOC(vp, uio->uio_offset, bytelen,
362 aflag, cred);
363 genfs_node_unlock(vp);
364 if (error)
365 break;
366 ubc_flags |= UBC_FAULTBUSY;
370 * copy the data.
373 error = ubc_uiomove(&vp->v_uobj, uio, bytelen,
374 IO_ADV_DECODE(ioflag), ubc_flags | UBC_UNMAP_FLAG(vp));
377 * update UVM's notion of the size now that we've
378 * copied the data into the vnode's pages.
380 * we should update the size even when uiomove failed.
383 if (vp->v_size < newoff) {
384 uvm_vnp_setsize(vp, newoff);
385 extended = 1;
388 if (error)
389 break;
392 * flush what we just wrote if necessary.
393 * XXXUBC simplistic async flushing.
396 #ifndef LFS_READWRITE
397 if (!async && oldoff >> 16 != uio->uio_offset >> 16) {
398 mutex_enter(vp->v_interlock);
399 error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16,
400 (uio->uio_offset >> 16) << 16,
401 PGO_CLEANIT | PGO_JOURNALLOCKED | PGO_LAZY);
402 if (error)
403 break;
405 #else
406 __USE(async);
407 #endif
409 if (error == 0 && ioflag & IO_SYNC) {
410 mutex_enter(vp->v_interlock);
411 error = VOP_PUTPAGES(vp, trunc_page(origoff & fs->fs_bmask),
412 round_page(lfs_blkroundup(fs, uio->uio_offset)),
413 PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED);
415 goto out;
417 bcache:
418 mutex_enter(vp->v_interlock);
419 VOP_PUTPAGES(vp, trunc_page(origoff), round_page(origoff + resid),
420 PGO_CLEANIT | PGO_FREE | PGO_SYNCIO | PGO_JOURNALLOCKED);
421 while (uio->uio_resid > 0) {
422 lbn = lfs_lblkno(fs, uio->uio_offset);
423 blkoffset = lfs_blkoff(fs, uio->uio_offset);
424 xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
425 if (fs->fs_bsize > xfersize)
426 flags |= B_CLRBUF;
427 else
428 flags &= ~B_CLRBUF;
430 #ifdef LFS_READWRITE
431 error = lfs_reserve(fs, vp, NULL,
432 lfs_btofsb(fs, (ULFS_NIADDR + 1) << fs->lfs_bshift));
433 if (error)
434 break;
435 need_unreserve = true;
436 #endif
437 error = lfs_balloc(vp, uio->uio_offset, xfersize,
438 ap->a_cred, flags, &bp);
440 if (error)
441 break;
442 if (uio->uio_offset + xfersize > ip->i_size) {
443 ip->i_size = uio->uio_offset + xfersize;
444 DIP_ASSIGN(ip, size, ip->i_size);
445 uvm_vnp_setsize(vp, ip->i_size);
446 extended = 1;
448 size = lfs_blksize(fs, ip, lbn) - bp->b_resid;
449 if (xfersize > size)
450 xfersize = size;
452 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
455 * if we didn't clear the block and the uiomove failed,
456 * the buf will now contain part of some other file,
457 * so we need to invalidate it.
459 if (error && (flags & B_CLRBUF) == 0) {
460 brelse(bp, BC_INVAL);
461 break;
463 #ifdef LFS_READWRITE
464 (void)VOP_BWRITE(bp->b_vp, bp);
465 lfs_reserve(fs, vp, NULL,
466 -lfs_btofsb(fs, (ULFS_NIADDR + 1) << fs->lfs_bshift));
467 need_unreserve = false;
468 #else
469 if (ioflag & IO_SYNC)
470 (void)bwrite(bp);
471 else if (xfersize + blkoffset == fs->fs_bsize)
472 bawrite(bp);
473 else
474 bdwrite(bp);
475 #endif
476 if (error || xfersize == 0)
477 break;
479 #ifdef LFS_READWRITE
480 if (need_unreserve) {
481 lfs_reserve(fs, vp, NULL,
482 -lfs_btofsb(fs, (ULFS_NIADDR + 1) << fs->lfs_bshift));
484 #endif
487 * If we successfully wrote any data, and we are not the superuser
488 * we clear the setuid and setgid bits as a precaution against
489 * tampering.
491 out:
492 ip->i_flag |= IN_CHANGE | IN_UPDATE;
493 if (vp->v_mount->mnt_flag & MNT_RELATIME)
494 ip->i_flag |= IN_ACCESS;
495 if (resid > uio->uio_resid && ap->a_cred) {
496 if (ip->i_mode & ISUID) {
497 if (kauth_authorize_vnode(ap->a_cred,
498 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) {
499 ip->i_mode &= ~ISUID;
500 DIP_ASSIGN(ip, mode, ip->i_mode);
504 if (ip->i_mode & ISGID) {
505 if (kauth_authorize_vnode(ap->a_cred,
506 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) {
507 ip->i_mode &= ~ISGID;
508 DIP_ASSIGN(ip, mode, ip->i_mode);
512 if (resid > uio->uio_resid)
513 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
514 if (error) {
515 (void) lfs_truncate(vp, osize, ioflag & IO_SYNC, ap->a_cred);
516 uio->uio_offset -= resid - uio->uio_resid;
517 uio->uio_resid = resid;
518 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC) {
519 error = lfs_update(vp, NULL, NULL, UPDATE_WAIT);
520 } else {
521 /* nothing */
523 KASSERT(vp->v_size == ip->i_size);
524 fstrans_done(vp->v_mount);
526 return (error);