1 /* $NetBSD: lfs_pages.c,v 1.7 2015/08/12 18:26:27 dholland Exp $ */
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 * Copyright (c) 1986, 1989, 1991, 1993, 1995
33 * The Regents of the University of California. All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.7 2015/08/12 18:26:27 dholland Exp $");
66 #include "opt_compat_netbsd.h"
67 #include "opt_uvm_page_trkown.h"
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/namei.h>
73 #include <sys/resourcevar.h>
74 #include <sys/kernel.h>
79 #include <sys/mount.h>
80 #include <sys/vnode.h>
82 #include <sys/signalvar.h>
83 #include <sys/kauth.h>
84 #include <sys/syslog.h>
85 #include <sys/fstrans.h>
87 #include <miscfs/fifofs/fifo.h>
88 #include <miscfs/genfs/genfs.h>
89 #include <miscfs/specfs/specdev.h>
91 #include <ufs/lfs/ulfs_inode.h>
92 #include <ufs/lfs/ulfsmount.h>
93 #include <ufs/lfs/ulfs_bswap.h>
94 #include <ufs/lfs/ulfs_extern.h>
97 #include <uvm/uvm_pmap.h>
98 #include <uvm/uvm_stat.h>
99 #include <uvm/uvm_pager.h>
101 #include <ufs/lfs/lfs.h>
102 #include <ufs/lfs/lfs_accessors.h>
103 #include <ufs/lfs/lfs_kernel.h>
104 #include <ufs/lfs/lfs_extern.h>
106 extern pid_t lfs_writer_daemon
;
108 static int check_dirty(struct lfs
*, struct vnode
*, off_t
, off_t
, off_t
, int, int, struct vm_page
**);
111 lfs_getpages(void *v
)
113 struct vop_getpages_args
/* {
116 struct vm_page **a_m;
119 vm_prot_t a_access_type;
124 if (VTOI(ap
->a_vp
)->i_number
== LFS_IFILE_INUM
&&
125 (ap
->a_access_type
& VM_PROT_WRITE
) != 0) {
128 if ((ap
->a_access_type
& VM_PROT_WRITE
) != 0) {
129 mutex_enter(&lfs_lock
);
130 LFS_SET_UINO(VTOI(ap
->a_vp
), IN_MODIFIED
);
131 mutex_exit(&lfs_lock
);
135 * we're relying on the fact that genfs_getpages() always read in
136 * entire filesystem blocks.
138 return genfs_getpages(v
);
142 * Wait for a page to become unbusy, possibly printing diagnostic messages
145 * Called with vp->v_interlock held; return with it held.
148 wait_for_page(struct vnode
*vp
, struct vm_page
*pg
, const char *label
)
150 KASSERT(mutex_owned(vp
->v_interlock
));
151 if ((pg
->flags
& PG_BUSY
) == 0)
152 return; /* Nothing to wait for! */
154 #if defined(DEBUG) && defined(UVM_PAGE_TRKOWN)
155 static struct vm_page
*lastpg
;
157 if (label
!= NULL
&& pg
!= lastpg
) {
159 printf("lfs_putpages[%d.%d]: %s: page %p owner %d.%d [%s]\n",
160 curproc
->p_pid
, curlwp
->l_lid
, label
,
161 pg
, pg
->owner
, pg
->lowner
, pg
->owner_tag
);
163 printf("lfs_putpages[%d.%d]: %s: page %p unowned?!\n",
164 curproc
->p_pid
, curlwp
->l_lid
, label
, pg
);
170 pg
->flags
|= PG_WANTED
;
171 UVM_UNLOCK_AND_WAIT(pg
, vp
->v_interlock
, 0, "lfsput", 0);
172 mutex_enter(vp
->v_interlock
);
176 * This routine is called by lfs_putpages() when it can't complete the
177 * write because a page is busy. This means that either (1) someone,
178 * possibly the pagedaemon, is looking at this page, and will give it up
179 * presently; or (2) we ourselves are holding the page busy in the
180 * process of being written (either gathered or actually on its way to
181 * disk). We don't need to give up the segment lock, but we might need
182 * to call lfs_writeseg() to expedite the page's journey to disk.
184 * Called with vp->v_interlock held; return with it held.
186 /* #define BUSYWAIT */
188 write_and_wait(struct lfs
*fs
, struct vnode
*vp
, struct vm_page
*pg
,
189 int seglocked
, const char *label
)
191 KASSERT(mutex_owned(vp
->v_interlock
));
193 struct inode
*ip
= VTOI(vp
);
194 struct segment
*sp
= fs
->lfs_sp
;
200 while (pg
->flags
& PG_BUSY
&&
201 pg
->uobject
== &vp
->v_uobj
) {
202 mutex_exit(vp
->v_interlock
);
203 if (sp
->cbpp
- sp
->bpp
> 1) {
204 /* Write gathered pages */
206 lfs_release_finfo(fs
);
207 (void) lfs_writeseg(fs
, sp
);
212 KASSERT(sp
->vp
== vp
);
213 lfs_acquire_finfo(fs
, ip
->i_number
,
217 mutex_enter(vp
->v_interlock
);
218 wait_for_page(vp
, pg
, label
);
220 if (label
!= NULL
&& count
> 1) {
221 DLOG((DLOG_PAGE
, "lfs_putpages[%d]: %s: %sn = %d\n",
222 curproc
->p_pid
, label
, (count
> 0 ? "looping, " : ""),
228 KASSERT(mutex_owned(vp
->v_interlock
));
232 * Make sure that for all pages in every block in the given range,
233 * either all are dirty or all are clean. If any of the pages
234 * we've seen so far are dirty, put the vnode on the paging chain,
235 * and mark it IN_PAGING.
237 * If checkfirst != 0, don't check all the pages but return at the
241 check_dirty(struct lfs
*fs
, struct vnode
*vp
,
242 off_t startoffset
, off_t endoffset
, off_t blkeof
,
243 int flags
, int checkfirst
, struct vm_page
**pgp
)
246 struct vm_page
*curpg
= NULL
; /* XXX: gcc */
247 struct vm_page
*pgs
[MAXBSIZE
/ PAGE_SIZE
], *pg
;
248 off_t soff
= 0; /* XXX: gcc */
252 int any_dirty
; /* number of dirty pages */
253 int dirty
; /* number of dirty pages in a block */
255 int pages_per_block
= lfs_sb_getbsize(fs
) >> PAGE_SHIFT
;
256 int pagedaemon
= (curlwp
== uvm
.pagedaemon_lwp
);
258 KASSERT(mutex_owned(vp
->v_interlock
));
259 ASSERT_MAYBE_SEGLOCK(fs
);
261 by_list
= (vp
->v_uobj
.uo_npages
<=
262 ((endoffset
- startoffset
) >> PAGE_SHIFT
) *
263 UVM_PAGE_TREE_PENALTY
);
267 curpg
= TAILQ_FIRST(&vp
->v_uobj
.memq
);
271 while (by_list
|| soff
< MIN(blkeof
, endoffset
)) {
274 * Find the first page in a block. Skip
275 * blocks outside our area of interest or beyond
278 KASSERT(curpg
== NULL
279 || (curpg
->flags
& PG_MARKER
) == 0);
280 if (pages_per_block
> 1) {
282 ((curpg
->offset
& lfs_sb_getbmask(fs
)) ||
283 curpg
->offset
>= vp
->v_size
||
284 curpg
->offset
>= endoffset
)) {
285 curpg
= TAILQ_NEXT(curpg
, listq
.queue
);
286 KASSERT(curpg
== NULL
||
287 (curpg
->flags
& PG_MARKER
) == 0);
292 soff
= curpg
->offset
;
296 * Mark all pages in extended range busy; find out if any
299 nonexistent
= dirty
= 0;
300 for (i
= 0; i
== 0 || i
< pages_per_block
; i
++) {
301 KASSERT(mutex_owned(vp
->v_interlock
));
302 if (by_list
&& pages_per_block
<= 1) {
305 off
= soff
+ (i
<< PAGE_SHIFT
);
306 pgs
[i
] = pg
= uvm_pagelookup(&vp
->v_uobj
, off
);
315 * If we're holding the segment lock, we can deadlock
316 * against a process that has our page and is waiting
317 * for the cleaner, while the cleaner waits for the
318 * segment lock. Just bail in that case.
320 if ((pg
->flags
& PG_BUSY
) &&
321 (pagedaemon
|| LFS_SEGLOCK_HELD(fs
))) {
323 uvm_page_unbusy(pgs
, i
);
324 DLOG((DLOG_PAGE
, "lfs_putpages: avoiding 3-way or pagedaemon deadlock\n"));
327 KASSERT(mutex_owned(vp
->v_interlock
));
331 while (pg
->flags
& PG_BUSY
) {
332 wait_for_page(vp
, pg
, NULL
);
333 KASSERT(mutex_owned(vp
->v_interlock
));
335 uvm_page_unbusy(pgs
, i
);
336 KASSERT(mutex_owned(vp
->v_interlock
));
339 pg
->flags
|= PG_BUSY
;
340 UVM_PAGE_OWN(pg
, "lfs_putpages");
342 pmap_page_protect(pg
, VM_PROT_NONE
);
343 tdirty
= (pmap_clear_modify(pg
) ||
344 (pg
->flags
& PG_CLEAN
) == 0);
347 if (pages_per_block
> 0 && nonexistent
>= pages_per_block
) {
349 curpg
= TAILQ_NEXT(curpg
, listq
.queue
);
351 soff
+= lfs_sb_getbsize(fs
);
357 KASSERT(nonexistent
== 0);
358 KASSERT(mutex_owned(vp
->v_interlock
));
361 * If any are dirty make all dirty; unbusy them,
362 * but if we were asked to clean, wire them so that
363 * the pagedaemon doesn't bother us about them while
364 * they're on their way to disk.
366 for (i
= 0; i
== 0 || i
< pages_per_block
; i
++) {
367 KASSERT(mutex_owned(vp
->v_interlock
));
369 KASSERT(!((pg
->flags
& PG_CLEAN
) && (pg
->flags
& PG_DELWRI
)));
370 KASSERT(pg
->flags
& PG_BUSY
);
372 pg
->flags
&= ~PG_CLEAN
;
373 if (flags
& PGO_FREE
) {
375 * Wire the page so that
376 * pdaemon doesn't see it again.
378 mutex_enter(&uvm_pageqlock
);
380 mutex_exit(&uvm_pageqlock
);
382 /* Suspended write flag */
383 pg
->flags
|= PG_DELWRI
;
386 if (pg
->flags
& PG_WANTED
)
388 pg
->flags
&= ~(PG_WANTED
|PG_BUSY
);
389 UVM_PAGE_OWN(pg
, NULL
);
392 if (checkfirst
&& any_dirty
)
396 curpg
= TAILQ_NEXT(curpg
, listq
.queue
);
398 soff
+= MAX(PAGE_SIZE
, lfs_sb_getbsize(fs
));
402 KASSERT(mutex_owned(vp
->v_interlock
));
407 * lfs_putpages functions like genfs_putpages except that
409 * (1) It needs to bounds-check the incoming requests to ensure that
410 * they are block-aligned; if they are not, expand the range and
411 * do the right thing in case, e.g., the requested range is clean
412 * but the expanded range is dirty.
414 * (2) It needs to explicitly send blocks to be written when it is done.
415 * If VOP_PUTPAGES is called without the seglock held, we simply take
416 * the seglock and let lfs_segunlock wait for us.
417 * XXX There might be a bad situation if we have to flush a vnode while
418 * XXX lfs_markv is in operation. As of this writing we panic in this
423 * (1) The caller does not hold any pages in this vnode busy. If it does,
424 * there is a danger that when we expand the page range and busy the
425 * pages we will deadlock.
427 * (2) We are called with vp->v_interlock held; we must return with it
430 * (3) We don't absolutely have to free pages right away, provided that
431 * the request does not have PGO_SYNCIO. When the pagedaemon gives
432 * us a request with PGO_FREE, we take the pages out of the paging
433 * queue and wake up the writer, which will handle freeing them for us.
435 * We ensure that for any filesystem block, all pages for that
436 * block are either resident or not, even if those pages are higher
437 * than EOF; that means that we will be getting requests to free
438 * "unused" pages above EOF all the time, and should ignore them.
440 * (4) If we are called with PGO_LOCKED, the finfo array we are to write
441 * into has been set up for us by lfs_writefile. If not, we will
442 * have to handle allocating and/or freeing an finfo entry.
444 * XXX note that we're (ab)using PGO_LOCKED as "seglock held".
447 /* How many times to loop before we should start to worry */
451 lfs_putpages(void *v
)
454 struct vop_putpages_args
/* {
464 off_t origoffset
, startoffset
, endoffset
, origendoffset
, blkeof
;
465 off_t off
, max_endoffset
;
466 bool seglocked
, sync
, pagedaemon
, reclaim
;
467 struct vm_page
*pg
, *busypg
;
468 UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist
);
472 int debug_n_again
, debug_n_dirtyclean
;
478 sync
= (ap
->a_flags
& PGO_SYNCIO
) != 0;
479 reclaim
= (ap
->a_flags
& PGO_RECLAIM
) != 0;
480 pagedaemon
= (curlwp
== uvm
.pagedaemon_lwp
);
482 KASSERT(mutex_owned(vp
->v_interlock
));
484 /* Putpages does nothing for metadata. */
485 if (vp
== fs
->lfs_ivnode
|| vp
->v_type
!= VREG
) {
486 mutex_exit(vp
->v_interlock
);
491 * If there are no pages, don't do anything.
493 if (vp
->v_uobj
.uo_npages
== 0) {
494 if (TAILQ_EMPTY(&vp
->v_uobj
.memq
) &&
495 (vp
->v_iflag
& VI_ONWORKLST
) &&
496 LIST_FIRST(&vp
->v_dirtyblkhd
) == NULL
) {
497 vp
->v_iflag
&= ~VI_WRMAPDIRTY
;
498 vn_syncer_remove_from_worklist(vp
);
500 mutex_exit(vp
->v_interlock
);
502 /* Remove us from paging queue, if we were on it */
503 mutex_enter(&lfs_lock
);
504 if (ip
->i_flags
& IN_PAGING
) {
505 ip
->i_flags
&= ~IN_PAGING
;
506 TAILQ_REMOVE(&fs
->lfs_pchainhd
, ip
, i_lfs_pchain
);
508 mutex_exit(&lfs_lock
);
510 KASSERT(!mutex_owned(vp
->v_interlock
));
514 blkeof
= lfs_blkroundup(fs
, ip
->i_size
);
517 * Ignore requests to free pages past EOF but in the same block
518 * as EOF, unless the vnode is being reclaimed or the request
519 * is synchronous. (If the request is sync, it comes from
522 * To avoid being flooded with this request, make these pages
525 if (!sync
&& !reclaim
&&
526 ap
->a_offlo
>= ip
->i_size
&& ap
->a_offlo
< blkeof
) {
527 origoffset
= ap
->a_offlo
;
528 for (off
= origoffset
; off
< blkeof
; off
+= lfs_sb_getbsize(fs
)) {
529 pg
= uvm_pagelookup(&vp
->v_uobj
, off
);
531 while (pg
->flags
& PG_BUSY
) {
532 pg
->flags
|= PG_WANTED
;
533 UVM_UNLOCK_AND_WAIT(pg
, vp
->v_interlock
, 0,
535 mutex_enter(vp
->v_interlock
);
537 mutex_enter(&uvm_pageqlock
);
538 uvm_pageactivate(pg
);
539 mutex_exit(&uvm_pageqlock
);
541 ap
->a_offlo
= blkeof
;
542 if (ap
->a_offhi
> 0 && ap
->a_offhi
<= ap
->a_offlo
) {
543 mutex_exit(vp
->v_interlock
);
549 * Extend page range to start and end at block boundaries.
550 * (For the purposes of VOP_PUTPAGES, fragments don't exist.)
552 origoffset
= ap
->a_offlo
;
553 origendoffset
= ap
->a_offhi
;
554 startoffset
= origoffset
& ~(lfs_sb_getbmask(fs
));
555 max_endoffset
= (trunc_page(LLONG_MAX
) >> lfs_sb_getbshift(fs
))
556 << lfs_sb_getbshift(fs
);
558 if (origendoffset
== 0 || ap
->a_flags
& PGO_ALLPAGES
) {
559 endoffset
= max_endoffset
;
560 origendoffset
= endoffset
;
562 origendoffset
= round_page(ap
->a_offhi
);
563 endoffset
= round_page(lfs_blkroundup(fs
, origendoffset
));
566 KASSERT(startoffset
> 0 || endoffset
>= startoffset
);
567 if (startoffset
== endoffset
) {
568 /* Nothing to do, why were we called? */
569 mutex_exit(vp
->v_interlock
);
570 DLOG((DLOG_PAGE
, "lfs_putpages: startoffset = endoffset = %"
571 PRId64
"\n", startoffset
));
575 ap
->a_offlo
= startoffset
;
576 ap
->a_offhi
= endoffset
;
579 * If not cleaning, just send the pages through genfs_putpages
580 * to be returned to the pool.
582 if (!(ap
->a_flags
& PGO_CLEANIT
)) {
583 DLOG((DLOG_PAGE
, "lfs_putpages: no cleanit vn %p ino %d (flags %x)\n",
584 vp
, (int)ip
->i_number
, ap
->a_flags
));
585 int r
= genfs_putpages(v
);
586 KASSERT(!mutex_owned(vp
->v_interlock
));
590 /* Set PGO_BUSYFAIL to avoid deadlocks */
591 ap
->a_flags
|= PGO_BUSYFAIL
;
594 * Likewise, if we are asked to clean but the pages are not
595 * dirty, we can just free them using genfs_putpages.
598 debug_n_dirtyclean
= 0;
602 KASSERT(mutex_owned(vp
->v_interlock
));
604 /* Count the number of dirty pages */
605 r
= check_dirty(fs
, vp
, startoffset
, endoffset
, blkeof
,
606 ap
->a_flags
, 1, NULL
);
608 /* Pages are busy with another process */
609 mutex_exit(vp
->v_interlock
);
612 if (r
> 0) /* Some pages are dirty */
616 * Sometimes pages are dirtied between the time that
617 * we check and the time we try to clean them.
618 * Instruct lfs_gop_write to return EDEADLK in this case
619 * so we can write them properly.
621 ip
->i_lfs_iflags
|= LFSI_NO_GOP_WRITE
;
622 r
= genfs_do_putpages(vp
, startoffset
, endoffset
,
623 ap
->a_flags
& ~PGO_SYNCIO
, &busypg
);
624 ip
->i_lfs_iflags
&= ~LFSI_NO_GOP_WRITE
;
626 KASSERT(!mutex_owned(vp
->v_interlock
));
630 /* One of the pages was busy. Start over. */
631 mutex_enter(vp
->v_interlock
);
632 wait_for_page(vp
, busypg
, "dirtyclean");
634 ++debug_n_dirtyclean
;
639 if (debug_n_dirtyclean
> TOOMANY
)
640 DLOG((DLOG_PAGE
, "lfs_putpages: dirtyclean: looping, n = %d\n",
641 debug_n_dirtyclean
));
645 * Dirty and asked to clean.
647 * Pagedaemon can't actually write LFS pages; wake up
648 * the writer to take care of that. The writer will
649 * notice the pager inode queue and act on that.
651 * XXX We must drop the vp->interlock before taking the lfs_lock or we
652 * get a nasty deadlock with lfs_flush_pchain().
655 mutex_exit(vp
->v_interlock
);
656 mutex_enter(&lfs_lock
);
657 if (!(ip
->i_flags
& IN_PAGING
)) {
658 ip
->i_flags
|= IN_PAGING
;
659 TAILQ_INSERT_TAIL(&fs
->lfs_pchainhd
, ip
, i_lfs_pchain
);
661 wakeup(&lfs_writer_daemon
);
662 mutex_exit(&lfs_lock
);
664 KASSERT(!mutex_owned(vp
->v_interlock
));
669 * If this is a file created in a recent dirop, we can't flush its
670 * inode until the dirop is complete. Drain dirops, then flush the
671 * filesystem (taking care of any other pending dirops while we're
674 if ((ap
->a_flags
& (PGO_CLEANIT
|PGO_LOCKED
)) == PGO_CLEANIT
&&
675 (vp
->v_uflag
& VU_DIROP
)) {
676 DLOG((DLOG_PAGE
, "lfs_putpages: flushing VU_DIROP\n"));
678 lfs_writer_enter(fs
, "ppdirop");
680 /* Note if we hold the vnode locked */
681 if (VOP_ISLOCKED(vp
) == LK_EXCLUSIVE
)
683 DLOG((DLOG_PAGE
, "lfs_putpages: dirop inode already locked\n"));
685 DLOG((DLOG_PAGE
, "lfs_putpages: dirop inode not locked\n"));
687 mutex_exit(vp
->v_interlock
);
689 mutex_enter(&lfs_lock
);
690 lfs_flush_fs(fs
, sync
? SEGM_SYNC
: 0);
691 mutex_exit(&lfs_lock
);
693 mutex_enter(vp
->v_interlock
);
694 lfs_writer_leave(fs
);
696 /* The flush will have cleaned out this vnode as well,
697 no need to do more to it. */
701 * This is it. We are going to write some pages. From here on
702 * down it's all just mechanics.
704 * Don't let genfs_putpages wait; lfs_segunlock will wait for us.
706 ap
->a_flags
&= ~PGO_SYNCIO
;
709 * If we've already got the seglock, flush the node and return.
710 * The FIP has already been set up for us by lfs_writefile,
711 * and FIP cleanup and lfs_updatemeta will also be done there,
712 * unless genfs_putpages returns EDEADLK; then we must flush
713 * what we have, and correct FIP and segment header accounting.
717 * If we are not called with the segment locked, lock it.
718 * Account for a new FIP in the segment header, and set sp->vp.
719 * (This should duplicate the setup at the top of lfs_writefile().)
721 seglocked
= (ap
->a_flags
& PGO_LOCKED
) != 0;
723 mutex_exit(vp
->v_interlock
);
724 error
= lfs_seglock(fs
, SEGM_PROT
| (sync
? SEGM_SYNC
: 0));
726 KASSERT(!mutex_owned(vp
->v_interlock
));
729 mutex_enter(vp
->v_interlock
);
730 lfs_acquire_finfo(fs
, ip
->i_number
, ip
->i_gen
);
733 KASSERT(sp
->vp
== NULL
);
736 /* Note segments written by reclaim; only for debugging */
737 if (vdead_check(vp
, VDEAD_NOWAIT
) != 0) {
738 sp
->seg_flags
|= SEGM_RECLAIM
;
739 fs
->lfs_reclino
= ip
->i_number
;
743 * Ensure that the partial segment is marked SS_DIROP if this
746 if (!seglocked
&& vp
->v_uflag
& VU_DIROP
) {
747 SEGSUM
*ssp
= sp
->segsum
;
749 lfs_ss_setflags(fs
, ssp
,
750 lfs_ss_getflags(fs
, ssp
) | (SS_DIROP
|SS_CONT
));
754 * Loop over genfs_putpages until all pages are gathered.
755 * genfs_putpages() drops the interlock, so reacquire it if necessary.
756 * Whenever we lose the interlock we have to rerun check_dirty, as
757 * well, since more pages might have been dirtied in our absence.
764 KASSERT(mutex_owned(vp
->v_interlock
));
765 if (check_dirty(fs
, vp
, startoffset
, endoffset
, blkeof
,
766 ap
->a_flags
, 0, &busypg
) < 0) {
767 mutex_exit(vp
->v_interlock
);
769 mutex_enter(vp
->v_interlock
);
770 write_and_wait(fs
, vp
, busypg
, seglocked
, NULL
);
772 mutex_exit(vp
->v_interlock
);
773 lfs_release_finfo(fs
);
775 mutex_enter(vp
->v_interlock
);
782 KASSERT(!mutex_owned(&uvm_pageqlock
));
783 oreclaim
= (ap
->a_flags
& PGO_RECLAIM
);
784 ap
->a_flags
&= ~PGO_RECLAIM
;
785 error
= genfs_do_putpages(vp
, startoffset
, endoffset
,
786 ap
->a_flags
, &busypg
);
787 ap
->a_flags
|= oreclaim
;
789 if (error
== EDEADLK
|| error
== EAGAIN
) {
790 DLOG((DLOG_PAGE
, "lfs_putpages: genfs_putpages returned"
791 " %d ino %d off %jx (seg %d)\n", error
,
792 ip
->i_number
, (uintmax_t)lfs_sb_getoffset(fs
),
793 lfs_dtosn(fs
, lfs_sb_getoffset(fs
))));
796 mutex_enter(vp
->v_interlock
);
797 write_and_wait(fs
, vp
, busypg
, seglocked
, "again");
798 mutex_exit(vp
->v_interlock
);
800 if ((sp
->seg_flags
& SEGM_SINGLE
) &&
801 lfs_sb_getcurseg(fs
) != fs
->lfs_startseg
)
805 DLOG((DLOG_PAGE
, "lfs_putpages: genfs_putpages returned"
806 " %d ino %d off %jx (seg %d)\n", error
,
807 (int)ip
->i_number
, (uintmax_t)lfs_sb_getoffset(fs
),
808 lfs_dtosn(fs
, lfs_sb_getoffset(fs
))));
810 /* genfs_do_putpages loses the interlock */
814 if (oreclaim
&& error
== EAGAIN
) {
815 DLOG((DLOG_PAGE
, "vp %p ino %d vi_flags %x a_flags %x avoiding vclean panic\n",
816 vp
, (int)ip
->i_number
, vp
->v_iflag
, ap
->a_flags
));
817 mutex_enter(vp
->v_interlock
);
819 if (error
== EDEADLK
)
820 mutex_enter(vp
->v_interlock
);
821 } while (error
== EDEADLK
|| (oreclaim
&& error
== EAGAIN
));
823 if (debug_n_again
> TOOMANY
)
824 DLOG((DLOG_PAGE
, "lfs_putpages: again: looping, n = %d\n", debug_n_again
));
827 KASSERT(sp
!= NULL
&& sp
->vp
== vp
);
828 if (!seglocked
&& !donewriting
) {
831 /* Write indirect blocks as well */
832 lfs_gather(fs
, fs
->lfs_sp
, vp
, lfs_match_indir
);
833 lfs_gather(fs
, fs
->lfs_sp
, vp
, lfs_match_dindir
);
834 lfs_gather(fs
, fs
->lfs_sp
, vp
, lfs_match_tindir
);
836 KASSERT(sp
->vp
== NULL
);
841 * Blocks are now gathered into a segment waiting to be written.
842 * All that's left to do is update metadata, and write them.
845 KASSERT(sp
->vp
== vp
);
849 * If we were called from lfs_writefile, we don't need to clean up
850 * the FIP or unlock the segment lock. We're done.
853 KASSERT(!mutex_owned(vp
->v_interlock
));
857 /* Clean up FIP and send it to disk. */
858 lfs_release_finfo(fs
);
859 lfs_writeseg(fs
, fs
->lfs_sp
);
862 * Remove us from paging queue if we wrote all our pages.
864 if (origendoffset
== 0 || ap
->a_flags
& PGO_ALLPAGES
) {
865 mutex_enter(&lfs_lock
);
866 if (ip
->i_flags
& IN_PAGING
) {
867 ip
->i_flags
&= ~IN_PAGING
;
868 TAILQ_REMOVE(&fs
->lfs_pchainhd
, ip
, i_lfs_pchain
);
870 mutex_exit(&lfs_lock
);
874 * XXX - with the malloc/copy writeseg, the pages are freed by now
875 * even if we don't wait (e.g. if we hold a nested lock). This
876 * will not be true if we stop using malloc/copy.
878 KASSERT(fs
->lfs_sp
->seg_flags
& SEGM_PROT
);
882 * Wait for v_numoutput to drop to zero. The seglock should
883 * take care of this, but there is a slight possibility that
884 * aiodoned might not have got around to our buffers yet.
887 mutex_enter(vp
->v_interlock
);
888 while (vp
->v_numoutput
> 0) {
889 DLOG((DLOG_PAGE
, "lfs_putpages: ino %d sleeping on"
890 " num %d\n", ip
->i_number
, vp
->v_numoutput
));
891 cv_wait(&vp
->v_cv
, vp
->v_interlock
);
893 mutex_exit(vp
->v_interlock
);
895 KASSERT(!mutex_owned(vp
->v_interlock
));