4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
32 #include <sys/fssnap_if.h>
33 #include <sys/fs/ufs_inode.h>
34 #include <sys/fs/ufs_lockfs.h>
35 #include <sys/fs/ufs_log.h>
36 #include <sys/fs/ufs_trans.h>
37 #include <sys/cmn_err.h>
39 #include <vm/seg_map.h>
40 #include <sys/fdbuffer.h>
43 int evn_ufs_debug
= 0;
44 #define DEBUGF(args) { if (evn_ufs_debug) cmn_err args; }
50 * ufs_rdwr_data - supports reading or writing data when
51 * no changes are permitted in file size or space allocation.
54 * fdb - The mandatory fdbuffer supports
55 * the read or write operation.
56 * flags - defaults (zero value) to synchronous write
57 * B_READ - indicates read operation
58 * B_ASYNC - indicates perform operation asynchronously
70 struct inode
*ip
= VTOI(vnodep
);
72 struct ufsvfs
*ufsvfsp
= ip
->i_ufsvfs
;
74 krw_t rwtype
= RW_READER
;
75 uoff_t offset1
= offset
; /* Initial offset */
82 int nbytes
; /* Number bytes this IO */
83 int offsetn
; /* Start point this IO */
84 int iswrite
= flags
& B_WRITE
;
85 int io_started
= 0; /* No IO started */
87 uint_t protp
= PROT_ALL
;
89 error
= ufs_lockfs_begin_getpage(ufsvfsp
, &ulp
, segkmap
, !iswrite
,
92 if (flags
& B_ASYNC
) {
93 fdb_ioerrdone(fdbp
, error
);
100 DEBUGF((CE_CONT
, "?ufs_rdwr: %s vp: %p off %llx len %lx"
101 " isize: %llx fdb: %p\n",
102 flags
& B_READ
? "READ" : "WRITE", (void *)vnodep
,
103 offset1
, iolen
, ip
->i_size
, (void *)fdbp
));
105 rw_enter(&ip
->i_ufsvfs
->vfs_dqrwlock
, RW_READER
);
106 rw_enter(&ip
->i_contents
, rwtype
);
108 ASSERT(offset1
< ip
->i_size
);
110 if ((offset1
+ iolen
) > ip
->i_size
) {
111 iolen
= ip
->i_size
- offset1
;
113 while (!error
&& curlen
< iolen
) {
117 if ((error
= bmap_read(ip
, offset1
, &bn
, &contig
)) != 0) {
120 ASSERT(!(bn
== UFS_HOLE
&& iswrite
));
121 if (bn
== UFS_HOLE
) {
123 * If the above assertion is true,
124 * then the following if statement can never be true.
126 if (iswrite
&& (rwtype
== RW_READER
)) {
128 if (!rw_tryupgrade(&ip
->i_contents
)) {
129 rw_exit(&ip
->i_contents
);
130 rw_enter(&ip
->i_contents
, rwtype
);
134 offsetn
= blkoff(fs
, offset1
);
135 pplen
= P2ROUNDUP(len
, PAGESIZE
);
136 nbytes
= MIN((pplen
- curlen
),
137 (fs
->fs_bsize
- offsetn
));
141 * We may be reading or writing.
143 DEBUGF((CE_CONT
, "?ufs_rdwr_data: hole %llx - %lx\n",
144 offset1
, (iolen
- curlen
)));
147 printf("**WARNING: ignoring hole in write\n");
150 fdb_add_hole(fdbp
, offset1
- offset
, nbytes
);
158 pplen
= P2ROUNDUP(len
, PAGESIZE
);
160 contig
= MIN(contig
, len
- curlen
);
161 contig
= P2ROUNDUP(contig
, DEV_BSIZE
);
163 bp
= fdb_iosetup(fdbp
, offset1
- offset
, contig
, vnodep
, flags
);
165 bp
->b_edev
= ip
->i_dev
;
166 bp
->b_dev
= cmpdev(ip
->i_dev
);
168 bp
->b_file
= ip
->i_vnode
;
169 bp
->b_offset
= (offset_t
)offset1
;
171 if (ufsvfsp
->vfs_snapshot
) {
172 fssnap_strategy(&ufsvfsp
->vfs_snapshot
, bp
);
174 (void) bdev_strategy(bp
);
181 lwp_stat_update(LWP_STAT_OUBLK
, 1);
183 lwp_stat_update(LWP_STAT_INBLK
, 1);
185 if ((flags
& B_ASYNC
) == 0) {
190 DEBUGF((CE_CONT
, "?loop ufs_rdwr_data.. off %llx len %lx\n",
191 offset1
, (iolen
- curlen
)));
194 DEBUGF((CE_CONT
, "?ufs_rdwr_data: off %llx len %lx ------\n",
195 offset1
, (iolen
- curlen
)));
197 rw_exit(&ip
->i_contents
);
198 rw_exit(&ip
->i_ufsvfs
->vfs_dqrwlock
);
200 if (flags
& B_ASYNC
) {
202 * Show that no more asynchronous IO will be added
204 fdb_ioerrdone(fdbp
, error
);
209 if (io_started
&& flags
& B_ASYNC
) {
217 * ufs_alloc_data - supports allocating space and reads or writes
218 * that involve changes to file length or space allocation.
220 * This function is more expensive, because of the UFS log transaction,
221 * so ufs_rdwr_data() should be used when space or file length changes
225 * fdb - A null pointer instructs this function to only allocate
226 * space for the specified offset and length.
227 * An actual fdbuffer instructs this function to perform
228 * the read or write operation.
229 * flags - defaults (zero value) to synchronous write
230 * B_READ - indicates read operation
231 * B_ASYNC - indicates perform operation asynchronously
242 struct inode
*ip
= VTOI(vnodep
);
243 size_t done_len
, io_len
;
246 int error
= 0; /* No error occurred */
247 int offsetn
; /* Start point this IO */
248 int nbytes
; /* Number bytes in this IO */
251 struct ufsvfs
*ufsvfsp
= ip
->i_ufsvfs
;
252 int i_size_changed
= 0;
256 int issync
; /* UFS Log transaction */
257 /* synchronous when non-zero */
259 int io_started
= 0; /* No IO started */
260 uint_t protp
= PROT_ALL
;
262 ASSERT((flags
& B_WRITE
) == 0);
265 * Obey the lockfs protocol
267 error
= ufs_lockfs_begin_getpage(ufsvfsp
, &ulp
, segkmap
, 0, &protp
);
269 if ((fdbp
!= NULL
) && (flags
& B_ASYNC
)) {
270 fdb_ioerrdone(fdbp
, error
);
276 * Try to begin a UFS log transaction
278 trans_size
= TOP_GETPAGE_SIZE(ip
);
279 TRANS_TRY_BEGIN_CSYNC(ufsvfsp
, &issync
, TOP_GETPAGE
,
281 if (error
== EWOULDBLOCK
) {
283 if ((fdbp
!= NULL
) && (flags
& B_ASYNC
)) {
284 fdb_ioerrdone(fdbp
, EDEADLK
);
295 DEBUGF((CE_CONT
, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n",
296 uoff
, (io_len
- done_len
), ip
->i_size
, (void *)fdbp
));
298 rw_enter(&ip
->i_ufsvfs
->vfs_dqrwlock
, RW_READER
);
299 rw_enter(&ip
->i_contents
, RW_WRITER
);
301 ASSERT((ip
->i_mode
& IFMT
) == IFREG
);
305 while (error
== 0 && done_len
< io_len
) {
306 uoff
= (uoff_t
)(io_off
+ done_len
);
307 offsetn
= (int)blkoff(fs
, uoff
);
308 nbytes
= (int)MIN(fs
->fs_bsize
- offsetn
, io_len
- done_len
);
310 DEBUGF((CE_CONT
, "?ufs_alloc_data: offset: %llx len %x\n",
313 if (uoff
+ nbytes
> ip
->i_size
) {
315 * We are extending the length of the file.
316 * bmap is used so that we are sure that
317 * if we need to allocate new blocks, that it
318 * is done here before we up the file size.
320 DEBUGF((CE_CONT
, "?ufs_alloc_data: grow %llx -> %llx\n",
321 ip
->i_size
, uoff
+ nbytes
));
323 error
= bmap_write(ip
, uoff
, (offsetn
+ nbytes
),
324 BI_ALLOC_ONLY
, NULL
, credp
);
325 if (ip
->i_flag
& (ICHG
|IUPD
))
328 DEBUGF((CE_CONT
, "?ufs_alloc_data: grow "
329 "failed err: %d\n", error
));
333 if (uoff
>= ip
->i_size
) {
335 * Desired offset is past end of bytes
336 * in file, so we have a hole.
338 fdb_add_hole(fdbp
, uoff
- offset
,
344 error
= bmap_read(ip
, uoff
, &bn
,
350 contig
= ip
->i_size
- uoff
;
351 contig
= P2ROUNDUP(contig
, DEV_BSIZE
);
353 bp
= fdb_iosetup(fdbp
, uoff
- offset
,
354 contig
, vnodep
, flags
);
356 bp
->b_edev
= ip
->i_dev
;
357 bp
->b_dev
= cmpdev(ip
->i_dev
);
359 bp
->b_file
= ip
->i_vnode
;
360 bp
->b_offset
= (offset_t
)uoff
;
362 if (ufsvfsp
->vfs_snapshot
) {
364 &ufsvfsp
->vfs_snapshot
, bp
);
366 (void) bdev_strategy(bp
);
370 lwp_stat_update(LWP_STAT_OUBLK
, 1);
372 if ((flags
& B_ASYNC
) == 0) {
379 if (contig
> (ip
->i_size
- uoff
)) {
380 contig
-= ip
->i_size
- uoff
;
390 old_i_size
= ip
->i_size
;
391 UFS_SET_ISIZE(uoff
+ nbytes
, ip
);
392 TRANS_INODE(ip
->i_ufsvfs
, ip
);
394 * file has grown larger than 2GB. Set flag
395 * in superblock to indicate this, if it
396 * is not already set.
398 if ((ip
->i_size
> MAXOFF32_T
) &&
399 !(fs
->fs_flags
& FSLARGEFILES
)) {
400 ASSERT(ufsvfsp
->vfs_lfflags
& UFS_LARGEFILES
);
401 mutex_enter(&ufsvfsp
->vfs_lock
);
402 fs
->fs_flags
|= FSLARGEFILES
;
403 ufs_sbwrite(ufsvfsp
);
404 mutex_exit(&ufsvfsp
->vfs_lock
);
408 * The file length is not being extended.
410 error
= bmap_read(ip
, uoff
, &bn
, &contig
);
412 DEBUGF((CE_CONT
, "?ufs_alloc_data: "
413 "bmap_read err: %d\n", error
));
417 if (bn
!= UFS_HOLE
) {
419 * Did not map a hole in the file
421 int contig
= P2ROUNDUP(nbytes
, DEV_BSIZE
);
425 bp
= fdb_iosetup(fdbp
, uoff
- offset
,
426 contig
, vnodep
, flags
);
428 bp
->b_edev
= ip
->i_dev
;
429 bp
->b_dev
= cmpdev(ip
->i_dev
);
431 bp
->b_file
= ip
->i_vnode
;
432 bp
->b_offset
= (offset_t
)uoff
;
434 if (ufsvfsp
->vfs_snapshot
) {
436 &ufsvfsp
->vfs_snapshot
, bp
);
438 (void) bdev_strategy(bp
);
442 lwp_stat_update(LWP_STAT_OUBLK
, 1);
444 if ((flags
& B_ASYNC
) == 0) {
454 * We read a hole in the file.
455 * We have to allocate blocks for the hole.
457 error
= bmap_write(ip
, uoff
, (offsetn
+ nbytes
),
458 BI_ALLOC_ONLY
, NULL
, credp
);
459 if (ip
->i_flag
& (ICHG
|IUPD
))
462 DEBUGF((CE_CONT
, "?ufs_alloc_data: fill"
463 " hole failed error: %d\n", error
));
467 fdb_add_hole(fdbp
, uoff
- offset
,
476 if (i_size_changed
) {
478 * Allocation of the blocks for the file failed.
479 * So truncate the file size back to its original size.
481 (void) ufs_itrunc(ip
, old_i_size
, 0, credp
);
485 DEBUGF((CE_CONT
, "?ufs_alloc: uoff %llx len %lx\n",
486 uoff
, (io_len
- done_len
)));
488 if ((offset
+ *len
) < (NDADDR
* fs
->fs_bsize
)) {
489 *len
= (size_t)(roundup(offset
+ *len
, fs
->fs_fsize
) - offset
);
491 *len
= (size_t)(roundup(offset
+ *len
, fs
->fs_bsize
) - offset
);
495 * Flush cached pages.
497 * XXX - There should be no pages involved, since the I/O was performed
498 * through the device strategy routine and the page cache was bypassed.
499 * However, testing has demonstrated that this fop_putpage is
500 * necessary. Without this, data might not always be read back as it
504 (void) fop_putpage(vnodep
, 0, 0, B_INVAL
, credp
, NULL
);
506 rw_exit(&ip
->i_contents
);
507 rw_exit(&ip
->i_ufsvfs
->vfs_dqrwlock
);
509 if ((fdbp
!= NULL
) && (flags
& B_ASYNC
)) {
511 * Show that no more asynchronous IO will be added
513 fdb_ioerrdone(fdbp
, error
);
517 * End the UFS Log transaction
519 TRANS_END_CSYNC(ufsvfsp
, &error
, issync
, TOP_GETPAGE
,
523 if (io_started
&& (flags
& B_ASYNC
)) {