dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / ufs / ufs_extvnops.c
blob1c22b1e4bdfa33605a1ddcb0d25631c066930ebd
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/conf.h>
32 #include <sys/fssnap_if.h>
33 #include <sys/fs/ufs_inode.h>
34 #include <sys/fs/ufs_lockfs.h>
35 #include <sys/fs/ufs_log.h>
36 #include <sys/fs/ufs_trans.h>
37 #include <sys/cmn_err.h>
38 #include <vm/pvn.h>
39 #include <vm/seg_map.h>
40 #include <sys/fdbuffer.h>
42 #ifdef DEBUG
43 int evn_ufs_debug = 0;
44 #define DEBUGF(args) { if (evn_ufs_debug) cmn_err args; }
45 #else
46 #define DEBUGF(args)
47 #endif
50 * ufs_rdwr_data - supports reading or writing data when
51 * no changes are permitted in file size or space allocation.
53 * Inputs:
54 * fdb - The mandatory fdbuffer supports
55 * the read or write operation.
56 * flags - defaults (zero value) to synchronous write
57 * B_READ - indicates read operation
58 * B_ASYNC - indicates perform operation asynchronously
60 /*ARGSUSED*/
61 int
62 ufs_rdwr_data(
63 vnode_t *vnodep,
64 uoff_t offset,
65 size_t len,
66 fdbuffer_t *fdbp,
67 int flags,
68 cred_t *credp)
70 struct inode *ip = VTOI(vnodep);
71 struct fs *fs;
72 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
73 struct buf *bp;
74 krw_t rwtype = RW_READER;
75 uoff_t offset1 = offset; /* Initial offset */
76 size_t iolen;
77 int curlen = 0;
78 int pplen;
79 daddr_t bn;
80 int contig = 0;
81 int error = 0;
82 int nbytes; /* Number bytes this IO */
83 int offsetn; /* Start point this IO */
84 int iswrite = flags & B_WRITE;
85 int io_started = 0; /* No IO started */
86 struct ulockfs *ulp;
87 uint_t protp = PROT_ALL;
89 error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, !iswrite,
90 &protp);
91 if (error) {
92 if (flags & B_ASYNC) {
93 fdb_ioerrdone(fdbp, error);
95 return (error);
97 fs = ufsvfsp->vfs_fs;
98 iolen = len;
100 DEBUGF((CE_CONT, "?ufs_rdwr: %s vp: %p off %llx len %lx"
101 " isize: %llx fdb: %p\n",
102 flags & B_READ ? "READ" : "WRITE", (void *)vnodep,
103 offset1, iolen, ip->i_size, (void *)fdbp));
105 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
106 rw_enter(&ip->i_contents, rwtype);
108 ASSERT(offset1 < ip->i_size);
110 if ((offset1 + iolen) > ip->i_size) {
111 iolen = ip->i_size - offset1;
113 while (!error && curlen < iolen) {
115 contig = 0;
117 if ((error = bmap_read(ip, offset1, &bn, &contig)) != 0) {
118 break;
120 ASSERT(!(bn == UFS_HOLE && iswrite));
121 if (bn == UFS_HOLE) {
123 * If the above assertion is true,
124 * then the following if statement can never be true.
126 if (iswrite && (rwtype == RW_READER)) {
127 rwtype = RW_WRITER;
128 if (!rw_tryupgrade(&ip->i_contents)) {
129 rw_exit(&ip->i_contents);
130 rw_enter(&ip->i_contents, rwtype);
131 continue;
134 offsetn = blkoff(fs, offset1);
135 pplen = P2ROUNDUP(len, PAGESIZE);
136 nbytes = MIN((pplen - curlen),
137 (fs->fs_bsize - offsetn));
138 ASSERT(nbytes > 0);
141 * We may be reading or writing.
143 DEBUGF((CE_CONT, "?ufs_rdwr_data: hole %llx - %lx\n",
144 offset1, (iolen - curlen)));
146 if (iswrite) {
147 printf("**WARNING: ignoring hole in write\n");
148 error = ENOSPC;
149 } else {
150 fdb_add_hole(fdbp, offset1 - offset, nbytes);
152 offset1 += nbytes;
153 curlen += nbytes;
154 continue;
157 ASSERT(contig > 0);
158 pplen = P2ROUNDUP(len, PAGESIZE);
160 contig = MIN(contig, len - curlen);
161 contig = P2ROUNDUP(contig, DEV_BSIZE);
163 bp = fdb_iosetup(fdbp, offset1 - offset, contig, vnodep, flags);
165 bp->b_edev = ip->i_dev;
166 bp->b_dev = cmpdev(ip->i_dev);
167 bp->b_blkno = bn;
168 bp->b_file = ip->i_vnode;
169 bp->b_offset = (offset_t)offset1;
171 if (ufsvfsp->vfs_snapshot) {
172 fssnap_strategy(&ufsvfsp->vfs_snapshot, bp);
173 } else {
174 (void) bdev_strategy(bp);
176 io_started = 1;
178 offset1 += contig;
179 curlen += contig;
180 if (iswrite)
181 lwp_stat_update(LWP_STAT_OUBLK, 1);
182 else
183 lwp_stat_update(LWP_STAT_INBLK, 1);
185 if ((flags & B_ASYNC) == 0) {
186 error = biowait(bp);
187 fdb_iodone(bp);
190 DEBUGF((CE_CONT, "?loop ufs_rdwr_data.. off %llx len %lx\n",
191 offset1, (iolen - curlen)));
194 DEBUGF((CE_CONT, "?ufs_rdwr_data: off %llx len %lx ------\n",
195 offset1, (iolen - curlen)));
197 rw_exit(&ip->i_contents);
198 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);
200 if (flags & B_ASYNC) {
202 * Show that no more asynchronous IO will be added
204 fdb_ioerrdone(fdbp, error);
206 if (ulp) {
207 ufs_lockfs_end(ulp);
209 if (io_started && flags & B_ASYNC) {
210 return (0);
211 } else {
212 return (error);
217 * ufs_alloc_data - supports allocating space and reads or writes
218 * that involve changes to file length or space allocation.
220 * This function is more expensive, because of the UFS log transaction,
221 * so ufs_rdwr_data() should be used when space or file length changes
222 * will not occur.
224 * Inputs:
225 * fdb - A null pointer instructs this function to only allocate
226 * space for the specified offset and length.
227 * An actual fdbuffer instructs this function to perform
228 * the read or write operation.
229 * flags - defaults (zero value) to synchronous write
230 * B_READ - indicates read operation
231 * B_ASYNC - indicates perform operation asynchronously
234 ufs_alloc_data(
235 vnode_t *vnodep,
236 uoff_t offset,
237 size_t *len,
238 fdbuffer_t *fdbp,
239 int flags,
240 cred_t *credp)
242 struct inode *ip = VTOI(vnodep);
243 size_t done_len, io_len;
244 int contig;
245 uoff_t uoff, io_off;
246 int error = 0; /* No error occurred */
247 int offsetn; /* Start point this IO */
248 int nbytes; /* Number bytes in this IO */
249 daddr_t bn;
250 struct fs *fs;
251 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
252 int i_size_changed = 0;
253 uoff_t old_i_size;
254 struct ulockfs *ulp;
255 int trans_size;
256 int issync; /* UFS Log transaction */
257 /* synchronous when non-zero */
259 int io_started = 0; /* No IO started */
260 uint_t protp = PROT_ALL;
262 ASSERT((flags & B_WRITE) == 0);
265 * Obey the lockfs protocol
267 error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, 0, &protp);
268 if (error) {
269 if ((fdbp != NULL) && (flags & B_ASYNC)) {
270 fdb_ioerrdone(fdbp, error);
272 return (error);
274 if (ulp) {
276 * Try to begin a UFS log transaction
278 trans_size = TOP_GETPAGE_SIZE(ip);
279 TRANS_TRY_BEGIN_CSYNC(ufsvfsp, &issync, TOP_GETPAGE,
280 trans_size, &error);
281 if (error == EWOULDBLOCK) {
282 ufs_lockfs_end(ulp);
283 if ((fdbp != NULL) && (flags & B_ASYNC)) {
284 fdb_ioerrdone(fdbp, EDEADLK);
286 return (EDEADLK);
290 uoff = offset;
291 io_off = offset;
292 io_len = *len;
293 done_len = 0;
295 DEBUGF((CE_CONT, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n",
296 uoff, (io_len - done_len), ip->i_size, (void *)fdbp));
298 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
299 rw_enter(&ip->i_contents, RW_WRITER);
301 ASSERT((ip->i_mode & IFMT) == IFREG);
303 fs = ip->i_fs;
305 while (error == 0 && done_len < io_len) {
306 uoff = (uoff_t)(io_off + done_len);
307 offsetn = (int)blkoff(fs, uoff);
308 nbytes = (int)MIN(fs->fs_bsize - offsetn, io_len - done_len);
310 DEBUGF((CE_CONT, "?ufs_alloc_data: offset: %llx len %x\n",
311 uoff, nbytes));
313 if (uoff + nbytes > ip->i_size) {
315 * We are extending the length of the file.
316 * bmap is used so that we are sure that
317 * if we need to allocate new blocks, that it
318 * is done here before we up the file size.
320 DEBUGF((CE_CONT, "?ufs_alloc_data: grow %llx -> %llx\n",
321 ip->i_size, uoff + nbytes));
323 error = bmap_write(ip, uoff, (offsetn + nbytes),
324 BI_ALLOC_ONLY, NULL, credp);
325 if (ip->i_flag & (ICHG|IUPD))
326 ip->i_seq++;
327 if (error) {
328 DEBUGF((CE_CONT, "?ufs_alloc_data: grow "
329 "failed err: %d\n", error));
330 break;
332 if (fdbp != NULL) {
333 if (uoff >= ip->i_size) {
335 * Desired offset is past end of bytes
336 * in file, so we have a hole.
338 fdb_add_hole(fdbp, uoff - offset,
339 nbytes);
340 } else {
341 int contig;
342 buf_t *bp;
344 error = bmap_read(ip, uoff, &bn,
345 &contig);
346 if (error) {
347 break;
350 contig = ip->i_size - uoff;
351 contig = P2ROUNDUP(contig, DEV_BSIZE);
353 bp = fdb_iosetup(fdbp, uoff - offset,
354 contig, vnodep, flags);
356 bp->b_edev = ip->i_dev;
357 bp->b_dev = cmpdev(ip->i_dev);
358 bp->b_blkno = bn;
359 bp->b_file = ip->i_vnode;
360 bp->b_offset = (offset_t)uoff;
362 if (ufsvfsp->vfs_snapshot) {
363 fssnap_strategy(
364 &ufsvfsp->vfs_snapshot, bp);
365 } else {
366 (void) bdev_strategy(bp);
368 io_started = 1;
370 lwp_stat_update(LWP_STAT_OUBLK, 1);
372 if ((flags & B_ASYNC) == 0) {
373 error = biowait(bp);
374 fdb_iodone(bp);
375 if (error) {
376 break;
379 if (contig > (ip->i_size - uoff)) {
380 contig -= ip->i_size - uoff;
382 fdb_add_hole(fdbp,
383 ip->i_size - offset,
384 contig);
389 i_size_changed = 1;
390 old_i_size = ip->i_size;
391 UFS_SET_ISIZE(uoff + nbytes, ip);
392 TRANS_INODE(ip->i_ufsvfs, ip);
394 * file has grown larger than 2GB. Set flag
395 * in superblock to indicate this, if it
396 * is not already set.
398 if ((ip->i_size > MAXOFF32_T) &&
399 !(fs->fs_flags & FSLARGEFILES)) {
400 ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES);
401 mutex_enter(&ufsvfsp->vfs_lock);
402 fs->fs_flags |= FSLARGEFILES;
403 ufs_sbwrite(ufsvfsp);
404 mutex_exit(&ufsvfsp->vfs_lock);
406 } else {
408 * The file length is not being extended.
410 error = bmap_read(ip, uoff, &bn, &contig);
411 if (error) {
412 DEBUGF((CE_CONT, "?ufs_alloc_data: "
413 "bmap_read err: %d\n", error));
414 break;
417 if (bn != UFS_HOLE) {
419 * Did not map a hole in the file
421 int contig = P2ROUNDUP(nbytes, DEV_BSIZE);
422 buf_t *bp;
424 if (fdbp != NULL) {
425 bp = fdb_iosetup(fdbp, uoff - offset,
426 contig, vnodep, flags);
428 bp->b_edev = ip->i_dev;
429 bp->b_dev = cmpdev(ip->i_dev);
430 bp->b_blkno = bn;
431 bp->b_file = ip->i_vnode;
432 bp->b_offset = (offset_t)uoff;
434 if (ufsvfsp->vfs_snapshot) {
435 fssnap_strategy(
436 &ufsvfsp->vfs_snapshot, bp);
437 } else {
438 (void) bdev_strategy(bp);
440 io_started = 1;
442 lwp_stat_update(LWP_STAT_OUBLK, 1);
444 if ((flags & B_ASYNC) == 0) {
445 error = biowait(bp);
446 fdb_iodone(bp);
447 if (error) {
448 break;
452 } else {
454 * We read a hole in the file.
455 * We have to allocate blocks for the hole.
457 error = bmap_write(ip, uoff, (offsetn + nbytes),
458 BI_ALLOC_ONLY, NULL, credp);
459 if (ip->i_flag & (ICHG|IUPD))
460 ip->i_seq++;
461 if (error) {
462 DEBUGF((CE_CONT, "?ufs_alloc_data: fill"
463 " hole failed error: %d\n", error));
464 break;
466 if (fdbp != NULL) {
467 fdb_add_hole(fdbp, uoff - offset,
468 nbytes);
472 done_len += nbytes;
475 if (error) {
476 if (i_size_changed) {
478 * Allocation of the blocks for the file failed.
479 * So truncate the file size back to its original size.
481 (void) ufs_itrunc(ip, old_i_size, 0, credp);
485 DEBUGF((CE_CONT, "?ufs_alloc: uoff %llx len %lx\n",
486 uoff, (io_len - done_len)));
488 if ((offset + *len) < (NDADDR * fs->fs_bsize)) {
489 *len = (size_t)(roundup(offset + *len, fs->fs_fsize) - offset);
490 } else {
491 *len = (size_t)(roundup(offset + *len, fs->fs_bsize) - offset);
495 * Flush cached pages.
497 * XXX - There should be no pages involved, since the I/O was performed
498 * through the device strategy routine and the page cache was bypassed.
499 * However, testing has demonstrated that this fop_putpage is
500 * necessary. Without this, data might not always be read back as it
501 * was written.
504 (void) fop_putpage(vnodep, 0, 0, B_INVAL, credp, NULL);
506 rw_exit(&ip->i_contents);
507 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);
509 if ((fdbp != NULL) && (flags & B_ASYNC)) {
511 * Show that no more asynchronous IO will be added
513 fdb_ioerrdone(fdbp, error);
515 if (ulp) {
517 * End the UFS Log transaction
519 TRANS_END_CSYNC(ufsvfsp, &error, issync, TOP_GETPAGE,
520 trans_size);
521 ufs_lockfs_end(ulp);
523 if (io_started && (flags & B_ASYNC)) {
524 return (0);
525 } else {
526 return (error);