4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2015 Joyent, Inc.
27 * Copyright (c) 1987, 2010, Oracle and/or its affiliates. All rights reserved.
30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
31 /* All Rights Reserved */
34 * University Copyright- Copyright (c) 1982, 1986, 1988
35 * The Regents of the University of California
38 * University Acknowledgment- Portions of this document are derived from
39 * software developed by the University of California, Berkeley, and its
44 * Each physical swap area has an associated bitmap representing
45 * its physical storage. The bitmap records which swap slots are
46 * currently allocated or freed. Allocation is done by searching
47 * through the bitmap for the first free slot. Thus, there's
48 * no linear relation between offset within the swap device and the
49 * address (within its segment(s)) of the page that the slot backs;
50 * instead, it's an arbitrary one-to-one mapping.
52 * Associated with each swap area is a swapinfo structure. These
53 * structures are linked into a linear list that determines the
54 * ordering of swap areas in the logical swap device. Each contains a
55 * pointer to the corresponding bitmap, the area's size, and its
59 #include <sys/types.h>
60 #include <sys/inttypes.h>
61 #include <sys/param.h>
62 #include <sys/t_lock.h>
63 #include <sys/sysmacros.h>
64 #include <sys/systm.h>
65 #include <sys/errno.h>
68 #include <sys/vnode.h>
69 #include <sys/pathname.h>
70 #include <sys/cmn_err.h>
71 #include <sys/vtrace.h>
73 #include <sys/dumphdr.h>
74 #include <sys/debug.h>
75 #include <sys/fs/snode.h>
76 #include <sys/fs/swapnode.h>
77 #include <sys/policy.h>
83 #include <vm/seg_vn.h>
86 #include <vm/seg_map.h>
89 * To balance the load among multiple swap areas, we don't allow
90 * more than swap_maxcontig allocations to be satisfied from a
91 * single swap area before moving on to the next swap area. This
92 * effectively "interleaves" allocations among the many swap areas.
94 int swap_maxcontig
; /* set by anon_init() to 1 Mb */
96 #define MINIROOTSIZE 12000 /* ~6 Meg XXX */
99 * XXX - this lock is a kludge. It serializes some aspects of swapadd() and
100 * swapdel() (namely fop_open, fop_close, VN_RELE). It protects against
101 * somebody swapadd'ing and getting swap slots from a vnode, while someone
102 * else is in the process of closing or rele'ing it.
104 static kmutex_t swap_lock
;
106 kmutex_t swapinfo_lock
;
109 * protected by the swapinfo_lock
111 struct swapinfo
*swapinfo
;
113 static struct swapinfo
*silast
;
114 static int nswapfiles
;
116 static uoff_t
swap_getoff(struct swapinfo
*);
117 static int swapadd(struct vnode
*, ulong_t
, ulong_t
, char *);
118 static int swapdel(struct vnode
*, ulong_t
);
119 static int swapslot_free(struct vnode
*, uoff_t
, struct swapinfo
*);
122 * swap device bitmap allocation macros
125 #define NBBW (sizeof(int) * NBBY) /* number of bits per word */
126 #define TESTBIT(map, i) (((map)[(i) >> MAPSHIFT] & (1 << (i) % NBBW)))
127 #define SETBIT(map, i) (((map)[(i) >> MAPSHIFT] |= (1 << (i) % NBBW)))
128 #define CLEARBIT(map, i) (((map)[(i) >> MAPSHIFT] &= ~(1 << (i) % NBBW)))
130 int swap_debug
= 0; /* set for debug printf's */
131 int swap_verify
= 0; /* set to verify slots when freeing and allocating */
133 uint_t swapalloc_maxcontig
;
136 * Allocate a range of up to *lenp contiguous slots (page) from a physical
137 * swap device. Flags are one of:
138 * SA_NOT Must have a slot from a physical swap device other than the
139 * the one containing input (*vpp, *offp).
140 * Less slots than requested may be returned. *lenp allocated slots are
141 * returned starting at *offp on *vpp.
142 * Returns 1 for a successful allocation, 0 for couldn't allocate any slots.
151 struct swapinfo
*sip
;
155 mutex_enter(&swapinfo_lock
);
158 /* Find a desirable physical device and allocate from it. */
162 if (!(sip
->si_flags
& ST_INDEL
) &&
163 (spgcnt_t
)sip
->si_nfpgs
> 0) {
164 /* Caller wants other than specified swap device */
165 if (flags
& SA_NOT
) {
166 if (*vpp
!= sip
->si_vp
||
167 *offp
< sip
->si_soff
||
168 *offp
>= sip
->si_eoff
)
170 /* Caller is loose, will take anything */
173 } else if (sip
->si_nfpgs
== 0)
175 if ((sip
= sip
->si_next
) == NULL
)
177 } while (sip
!= silast
);
178 mutex_exit(&swapinfo_lock
);
181 soff
= swap_getoff(sip
);
184 panic("swap_alloc: swap_getoff failed!");
186 for (len
= PAGESIZE
; len
< *lenp
; len
+= PAGESIZE
) {
187 if (sip
->si_nfpgs
== 0)
189 if (swapalloc_maxcontig
&& len
>= swapalloc_maxcontig
)
191 noff
= swap_getoff(sip
);
194 } else if (noff
!= soff
+ len
) {
195 CLEARBIT(sip
->si_swapslots
, btop(noff
- sip
->si_soff
));
203 ASSERT((spgcnt_t
)sip
->si_nfpgs
>= 0);
204 sip
->si_allocs
+= btop(len
);
205 if (sip
->si_allocs
>= swap_maxcontig
) {
207 if ((silast
= sip
->si_next
) == NULL
)
210 mutex_exit(&swapinfo_lock
);
214 int swap_backsearch
= 0;
217 * Get a free offset on swap device sip.
218 * Return >=0 offset if succeeded, -1 for failure.
221 swap_getoff(struct swapinfo
*sip
)
224 size_t aoff
, boff
, poff
, slotnumber
;
226 ASSERT(MUTEX_HELD(&swapinfo_lock
));
229 for (sp
= &sip
->si_swapslots
[sip
->si_hint
>> MAPSHIFT
],
230 ep
= &sip
->si_swapslots
[sip
->si_mapsize
/ sizeof(int)]; sp
< ep
; sp
++) {
231 if (*sp
!= (uint_t
)0xffffffff)
237 "swap_getoff: couldn't find slot from hint %ld to end\n",
238 sip
->si_hint
, 0, 0, 0, 0);
240 * Go backwards? Check for faster method XXX
242 if (swap_backsearch
) {
243 for (sp
= &sip
->si_swapslots
[sip
->si_hint
>> MAPSHIFT
],
244 ep
= sip
->si_swapslots
; sp
> ep
; sp
--) {
245 if (*sp
!= (uint_t
)0xffffffff)
251 for (sp
= sip
->si_swapslots
,
252 ep
= &sip
->si_swapslots
[sip
->si_hint
>> MAPSHIFT
];
254 if (*sp
!= (uint_t
)0xffffffff)
260 if (*sp
== 0xffffffff) {
261 cmn_err(CE_WARN
, "No free swap slots!");
267 * aoff is the page number offset (in bytes) of the si_swapslots
268 * array element containing a free page
270 * boff is the page number offset of the free page
271 * (i.e. cleared bit) in si_swapslots[aoff].
273 aoff
= ((char *)sp
- (char *)sip
->si_swapslots
) * NBBY
;
275 for (boff
= (sip
->si_hint
% NBBW
); boff
< NBBW
; boff
++) {
276 if (!TESTBIT(sip
->si_swapslots
, aoff
+ boff
))
281 for (boff
= 0; boff
< (sip
->si_hint
% NBBW
); boff
++) {
282 if (!TESTBIT(sip
->si_swapslots
, aoff
+ boff
))
287 panic("swap_getoff: didn't find slot in word hint %ld", sip
->si_hint
);
291 * Return the offset of the free page in swap device.
292 * Convert page number of byte offset and add starting
293 * offset of swap device.
295 slotnumber
= aoff
+ boff
;
296 SWAP_PRINT(SW_ALLOC
, "swap_getoff: allocating slot %ld\n",
297 slotnumber
, 0, 0, 0, 0);
298 poff
= ptob(slotnumber
);
299 if (poff
+ sip
->si_soff
>= sip
->si_eoff
)
300 printf("ptob(aoff(%ld) + boff(%ld))(%ld) >= eoff(%ld)\n",
301 aoff
, boff
, ptob(slotnumber
), (long)sip
->si_eoff
);
302 ASSERT(poff
< sip
->si_eoff
);
304 * We could verify here that the slot isn't already allocated
305 * by looking through all the anon slots.
307 SETBIT(sip
->si_swapslots
, slotnumber
);
308 sip
->si_hint
= slotnumber
+ 1; /* hint = next slot */
309 return (poff
+ sip
->si_soff
);
316 swap_phys_free(struct vnode
*vp
, uoff_t off
, size_t len
)
318 struct swapinfo
*sip
;
319 ssize_t pagenumber
, npage
;
321 mutex_enter(&swapinfo_lock
);
325 if (sip
->si_vp
== vp
&&
326 sip
->si_soff
<= off
&& off
< sip
->si_eoff
) {
327 for (pagenumber
= btop(off
- sip
->si_soff
),
328 npage
= btop(len
) + pagenumber
;
329 pagenumber
< npage
; pagenumber
++) {
331 "swap_phys_free: freeing slot %ld on "
333 pagenumber
, sip
, 0, 0, 0);
334 if (!TESTBIT(sip
->si_swapslots
, pagenumber
)) {
336 "swap_phys_free: freeing free slot "
337 "%p,%lx\n", (void *)vp
,
338 ptob(pagenumber
) + sip
->si_soff
);
340 CLEARBIT(sip
->si_swapslots
, pagenumber
);
343 ASSERT(sip
->si_nfpgs
<= sip
->si_npgs
);
344 mutex_exit(&swapinfo_lock
);
347 } while ((sip
= sip
->si_next
) != NULL
);
348 panic("swap_phys_free");
353 * Return the anon struct corresponding for the given
354 * <vnode, off> if it is part of the virtual swap device.
355 * Return the anon struct if found, otherwise NULL.
358 swap_anon(struct vnode
*vp
, uoff_t off
)
362 ASSERT(MUTEX_HELD(AH_MUTEX(vp
, off
)));
364 for (ap
= anon_hash
[ANON_HASH(vp
, off
)]; ap
!= NULL
; ap
= ap
->an_hash
) {
365 if (ap
->an_vp
== vp
&& ap
->an_off
== off
)
373 * Determine if the vp offset range overlap a swap device.
376 swap_in_range(struct vnode
*vp
, uoff_t offset
, size_t len
)
378 struct swapinfo
*sip
;
382 ASSERT(eoff
> offset
);
384 mutex_enter(&swapinfo_lock
);
388 if (vp
!= sip
->si_vp
|| eoff
<= sip
->si_soff
||
389 offset
>= sip
->si_eoff
)
391 mutex_exit(&swapinfo_lock
);
393 } while ((sip
= sip
->si_next
) != NULL
);
395 mutex_exit(&swapinfo_lock
);
400 * See if name is one of our swap files
401 * even though lookupname failed.
402 * This can be used by swapdel to delete
403 * swap resources on remote machines
404 * where the link has gone down.
406 static struct vnode
*
408 char *name
, /* pathname to delete */
409 ulong_t lowblk
) /* Low block number of area to delete */
411 struct swapinfo
**sipp
, *osip
;
415 * Find the swap file entry for the file to
416 * be deleted. Skip any entries that are in
420 soff
= ptob(btopr(lowblk
<< SCTRSHFT
)); /* must be page aligned */
422 mutex_enter(&swapinfo_lock
);
423 for (sipp
= &swapinfo
; (osip
= *sipp
) != NULL
; sipp
= &osip
->si_next
) {
424 if ((strcmp(osip
->si_pname
, name
) == 0) &&
425 (osip
->si_soff
== soff
) && (osip
->si_flags
== 0)) {
426 struct vnode
*vp
= osip
->si_vp
;
429 mutex_exit(&swapinfo_lock
);
433 mutex_exit(&swapinfo_lock
);
439 * New system call to manipulate swap files.
442 swapctl(int sc_cmd
, void *sc_arg
, int *rv
)
444 struct swapinfo
*sip
, *csip
, *tsip
;
446 struct swapent st
, *ust
;
459 int global
= INGLOBALZONE(curproc
);
460 struct zone
*zp
= curproc
->p_zone
;
463 * When running in a zone we want to hide the details of the swap
464 * devices: we report there only being one swap device named "swap"
465 * having a size equal to the sum of the sizes of all real swap devices
478 * Return anoninfo information with these changes:
479 * ani_max = maximum amount of swap space
480 * (including potentially available physical memory)
481 * ani_free = amount of unallocated anonymous memory
482 * (some of which might be reserved and including
483 * potentially available physical memory)
484 * ani_resv = amount of claimed (reserved) anonymous memory
486 avail
= MAX((spgcnt_t
)(availrmem
- swapfs_minfree
), 0);
487 ai
.ani_max
= (k_anoninfo
.ani_max
+
488 k_anoninfo
.ani_mem_resv
) + avail
;
490 /* Update ani_free */
492 ai
.ani_free
= k_anoninfo
.ani_free
+ avail
;
494 ai
.ani_resv
= k_anoninfo
.ani_phys_resv
+
495 k_anoninfo
.ani_mem_resv
;
497 if (!global
&& zp
->zone_max_swap_ctl
!= UINT64_MAX
) {
499 * We're in a non-global zone with a swap cap. We
500 * always report the system-wide values for the global
501 * zone, even though it too can have a swap cap.
505 * For a swap-capped zone, the numbers are contrived
506 * since we don't have a correct value of 'reserved'
509 * The ani_max value is always the zone's swap cap.
511 * The ani_free value is always the difference between
512 * the cap and the amount of swap in use by the zone.
514 * The ani_resv value is typically set to be the amount
515 * of swap in use by the zone, but can be adjusted
516 * upwards to indicate how much swap is currently
517 * unavailable to that zone due to usage by entities
520 * This works as follows.
522 * In the 'swap -s' output, the data is displayed
524 * allocated = ani_max - ani_free
525 * reserved = ani_resv - allocated
526 * available = ani_max - ani_resv
528 * Taking a contrived example, if the swap cap is 100
529 * and the amount of swap used by the zone is 75, this
531 * allocated = ani_max - ani_free = 100 - 25 = 75
532 * reserved = ani_resv - allocated = 75 - 75 = 0
533 * available = ani_max - ani_resv = 100 - 75 = 25
535 * In this typical case, you can see that the 'swap -s'
536 * 'reserved' will always be 0 inside a swap capped
539 * However, if the system as a whole has less free
540 * swap than the zone limits allow, then we adjust
541 * the ani_resv value up so that it is the difference
542 * between the zone cap and the amount of free system
543 * swap. Taking the above example, but when the
544 * system as a whole only has 20 of swap available, we
545 * get an ani_resv of 100 - 20 = 80. This gives:
546 * allocated = ani_max - ani_free = 100 - 25 = 75
547 * reserved = ani_resv - allocated = 80 - 75 = 5
548 * available = ani_max - ani_resv = 100 - 80 = 20
550 * In this case, you can see how the ani_resv value is
551 * tweaked up to make the 'swap -s' numbers work inside
554 rctl_qty_t cap
, used
;
555 pgcnt_t pgcap
, sys_avail
;
557 mutex_enter(&zp
->zone_mem_lock
);
558 cap
= zp
->zone_max_swap_ctl
;
559 used
= zp
->zone_max_swap
;
560 mutex_exit(&zp
->zone_mem_lock
);
562 pgcap
= MIN(btop(cap
), ai
.ani_max
);
563 ai
.ani_free
= pgcap
- btop(used
);
565 /* Get the system-wide swap currently available. */
566 sys_avail
= ai
.ani_max
- ai
.ani_resv
;
567 if (sys_avail
< ai
.ani_free
)
568 ai
.ani_resv
= pgcap
- sys_avail
;
570 ai
.ani_resv
= btop(used
);
575 if (copyout(&ai
, sc_arg
, sizeof (struct anoninfo
)) != 0)
580 if (copyin(sc_arg
, &length
, sizeof (int)) != 0)
584 char *swappath
= "swap";
588 ust
= (swapent_t
*)((swaptbl_t
*)sc_arg
)->swt_ent
;
589 if (copyin(ust
, &st
, sizeof (swapent_t
)) != 0)
591 st
.ste_start
= PAGESIZE
>> SCTRSHFT
;
592 st
.ste_length
= (off_t
)0;
597 mutex_enter(&swapinfo_lock
);
598 for (sip
= swapinfo
, nswap
= 0;
599 sip
!= NULL
&& nswap
< nswapfiles
;
600 sip
= sip
->si_next
, nswap
++) {
602 (sip
->si_eoff
- sip
->si_soff
) >> SCTRSHFT
;
603 st
.ste_pages
+= sip
->si_npgs
;
604 st
.ste_free
+= sip
->si_nfpgs
;
606 mutex_exit(&swapinfo_lock
);
608 if (zp
->zone_max_swap_ctl
!= UINT64_MAX
) {
609 rctl_qty_t cap
, used
;
611 mutex_enter(&zp
->zone_mem_lock
);
612 cap
= zp
->zone_max_swap_ctl
;
613 used
= zp
->zone_max_swap
;
614 mutex_exit(&zp
->zone_mem_lock
);
616 st
.ste_length
= MIN(cap
, st
.ste_length
);
617 st
.ste_pages
= MIN(btop(cap
), st
.ste_pages
);
618 st
.ste_free
= MIN(st
.ste_pages
- btop(used
),
622 if (copyout(&st
, ust
, sizeof (swapent_t
)) != 0 ||
623 copyout(swappath
, st
.ste_path
,
624 strlen(swappath
) + 1) != 0) {
631 mutex_enter(&swapinfo_lock
);
632 tmp_nswapfiles
= nswapfiles
;
633 mutex_exit(&swapinfo_lock
);
636 * Return early if there are no swap entries to report:
638 if (tmp_nswapfiles
< 1) {
643 /* Return an error if not enough space for the whole table. */
644 if (length
< tmp_nswapfiles
)
647 * Get memory to hold the swap entries and their names. We'll
648 * copy the real entries into these and then copy these out.
649 * Allocating the pathname memory is only a guess so we may
650 * find that we need more and have to do it again.
651 * All this is because we have to hold the anon lock while
652 * traversing the swapinfo list, and we can't be doing copyouts
653 * and/or kmem_alloc()s during this.
655 csip
= kmem_zalloc(tmp_nswapfiles
* sizeof (struct swapinfo
),
658 nlen
= tmp_nswapfiles
* (gplen
+= 100);
659 pname
= kmem_zalloc(nlen
, KM_SLEEP
);
661 mutex_enter(&swapinfo_lock
);
663 if (tmp_nswapfiles
!= nswapfiles
) {
664 mutex_exit(&swapinfo_lock
);
665 kmem_free(pname
, nlen
);
667 tmp_nswapfiles
* sizeof (struct swapinfo
));
671 for (sip
= swapinfo
, tsip
= csip
, tpname
= pname
, nswap
= 0;
672 sip
&& nswap
< tmp_nswapfiles
;
673 sip
= sip
->si_next
, tsip
++, tpname
+= plen
, nswap
++) {
674 plen
= sip
->si_pnamelen
;
675 if (tpname
+ plen
- pname
> nlen
) {
676 mutex_exit(&swapinfo_lock
);
677 kmem_free(pname
, nlen
);
681 tsip
->si_pname
= tpname
;
682 (void) strcpy(tsip
->si_pname
, sip
->si_pname
);
684 mutex_exit(&swapinfo_lock
);
690 ust
= (swapent_t
*)((swaptbl_t
*)sc_arg
)->swt_ent
;
691 for (tsip
= csip
, cnt
= 0; cnt
< nswap
; tsip
++, ust
++, cnt
++) {
692 if (copyin(ust
, &st
, sizeof (swapent_t
)) != 0) {
696 st
.ste_flags
= tsip
->si_flags
;
698 (tsip
->si_eoff
- tsip
->si_soff
) >> SCTRSHFT
;
699 st
.ste_start
= tsip
->si_soff
>> SCTRSHFT
;
700 st
.ste_pages
= tsip
->si_npgs
;
701 st
.ste_free
= tsip
->si_nfpgs
;
702 if (copyout(&st
, ust
, sizeof (swapent_t
)) != 0) {
706 if (!tsip
->si_pnamelen
)
708 if (copyout(tsip
->si_pname
, st
.ste_path
,
709 tsip
->si_pnamelen
) != 0) {
716 kmem_free(csip
, tmp_nswapfiles
* sizeof (struct swapinfo
));
717 kmem_free(pname
, nlen
);
726 if ((error
= secpolicy_swapctl(CRED())) != 0)
729 if (copyin(sc_arg
, &sr
, sizeof (swapres_t
)))
732 /* Allocate the space to read in pathname */
733 if ((swapname
= kmem_alloc(MAXPATHLEN
, KM_NOSLEEP
)) == NULL
)
736 error
= copyinstr(sr
.sr_name
, swapname
, MAXPATHLEN
, 0);
740 error
= lookupname(swapname
, UIO_SYSSPACE
, FOLLOW
, NULLVPP
, &vp
);
742 if (sc_cmd
== SC_ADD
)
744 /* see if we match by name */
745 vp
= swapdel_byname(swapname
, (size_t)sr
.sr_start
);
750 if (vp
->v_flag
& (VNOMAP
| VNOSWAP
)) {
755 switch (vp
->v_type
) {
760 if (vp
->v_vfsp
&& vn_is_readonly(vp
))
763 error
= fop_access(vp
, VREAD
|VWRITE
, 0, CRED(), NULL
);
774 if (sc_cmd
== SC_REMOVE
)
775 error
= swapdel(vp
, sr
.sr_start
);
777 error
= swapadd(vp
, sr
.sr_start
,
778 sr
.sr_length
, swapname
);
782 kmem_free(swapname
, MAXPATHLEN
);
786 #if defined(_LP64) && defined(_SYSCALL32)
789 swapctl32(int sc_cmd
, void *sc_arg
, int *rv
)
791 struct swapinfo
*sip
, *csip
, *tsip
;
793 struct swapent32 st
, *ust
;
804 struct anoninfo32 ai
;
807 int global
= INGLOBALZONE(curproc
);
808 struct zone
*zp
= curproc
->p_zone
;
811 * When running in a zone we want to hide the details of the swap
812 * devices: we report there only being one swap device named "swap"
813 * having a size equal to the sum of the sizes of all real swap devices
826 * Return anoninfo information with these changes:
827 * ani_max = maximum amount of swap space
828 * (including potentially available physical memory)
829 * ani_free = amount of unallocated anonymous memory
830 * (some of which might be reserved and including
831 * potentially available physical memory)
832 * ani_resv = amount of claimed (reserved) anonymous memory
834 avail
= MAX((spgcnt_t
)(availrmem
- swapfs_minfree
), 0);
835 s
= (k_anoninfo
.ani_max
+ k_anoninfo
.ani_mem_resv
) + avail
;
840 /* Update ani_free */
842 s
= k_anoninfo
.ani_free
+ avail
;
847 s
= k_anoninfo
.ani_phys_resv
+ k_anoninfo
.ani_mem_resv
;
852 if (!global
&& zp
->zone_max_swap_ctl
!= UINT64_MAX
) {
854 * We're in a non-global zone with a swap cap. We
855 * always report the system-wide values for the global
856 * zone, even though it too can have a swap cap.
857 * See the comment for the SC_AINFO case in swapctl()
858 * which explains the following logic.
860 rctl_qty_t cap
, used
;
861 pgcnt_t pgcap
, sys_avail
;
863 mutex_enter(&zp
->zone_mem_lock
);
864 cap
= zp
->zone_max_swap_ctl
;
865 used
= zp
->zone_max_swap
;
866 mutex_exit(&zp
->zone_mem_lock
);
868 pgcap
= MIN(btop(cap
), ai
.ani_max
);
869 ai
.ani_free
= pgcap
- btop(used
);
871 /* Get the system-wide swap currently available. */
872 sys_avail
= ai
.ani_max
- ai
.ani_resv
;
873 if (sys_avail
< ai
.ani_free
)
874 ai
.ani_resv
= pgcap
- sys_avail
;
876 ai
.ani_resv
= btop(used
);
881 if (copyout(&ai
, sc_arg
, sizeof (ai
)) != 0)
886 if (copyin(sc_arg
, &length
, sizeof (int32_t)) != 0)
890 char *swappath
= "swap";
894 ust
= (swapent32_t
*)((swaptbl32_t
*)sc_arg
)->swt_ent
;
895 if (copyin(ust
, &st
, sizeof (swapent32_t
)) != 0)
897 st
.ste_start
= PAGESIZE
>> SCTRSHFT
;
898 st
.ste_length
= (off_t
)0;
903 mutex_enter(&swapinfo_lock
);
904 for (sip
= swapinfo
, nswap
= 0;
905 sip
!= NULL
&& nswap
< nswapfiles
;
906 sip
= sip
->si_next
, nswap
++) {
908 (sip
->si_eoff
- sip
->si_soff
) >> SCTRSHFT
;
909 st
.ste_pages
+= sip
->si_npgs
;
910 st
.ste_free
+= sip
->si_nfpgs
;
912 mutex_exit(&swapinfo_lock
);
914 if (zp
->zone_max_swap_ctl
!= UINT64_MAX
) {
915 rctl_qty_t cap
, used
;
917 mutex_enter(&zp
->zone_mem_lock
);
918 cap
= zp
->zone_max_swap_ctl
;
919 used
= zp
->zone_max_swap
;
920 mutex_exit(&zp
->zone_mem_lock
);
922 st
.ste_length
= MIN(cap
, st
.ste_length
);
923 st
.ste_pages
= MIN(btop(cap
), st
.ste_pages
);
924 st
.ste_free
= MIN(st
.ste_pages
- btop(used
),
928 if (copyout(&st
, ust
, sizeof (swapent32_t
)) != 0 ||
929 copyout(swappath
, (caddr_t
)(uintptr_t)st
.ste_path
,
930 strlen(swappath
) + 1) != 0) {
937 mutex_enter(&swapinfo_lock
);
938 tmp_nswapfiles
= nswapfiles
;
939 mutex_exit(&swapinfo_lock
);
942 * Return early if there are no swap entries to report:
944 if (tmp_nswapfiles
< 1) {
949 /* Return an error if not enough space for the whole table. */
950 if (length
< tmp_nswapfiles
)
953 * Get memory to hold the swap entries and their names. We'll
954 * copy the real entries into these and then copy these out.
955 * Allocating the pathname memory is only a guess so we may
956 * find that we need more and have to do it again.
957 * All this is because we have to hold the anon lock while
958 * traversing the swapinfo list, and we can't be doing copyouts
959 * and/or kmem_alloc()s during this.
961 csip
= kmem_zalloc(tmp_nswapfiles
* sizeof (*csip
), KM_SLEEP
);
963 nlen
= tmp_nswapfiles
* (gplen
+= 100);
964 pname
= kmem_zalloc(nlen
, KM_SLEEP
);
966 mutex_enter(&swapinfo_lock
);
968 if (tmp_nswapfiles
!= nswapfiles
) {
969 mutex_exit(&swapinfo_lock
);
970 kmem_free(pname
, nlen
);
971 kmem_free(csip
, tmp_nswapfiles
* sizeof (*csip
));
975 for (sip
= swapinfo
, tsip
= csip
, tpname
= pname
, nswap
= 0;
976 (sip
!= NULL
) && (nswap
< tmp_nswapfiles
);
977 sip
= sip
->si_next
, tsip
++, tpname
+= plen
, nswap
++) {
978 plen
= sip
->si_pnamelen
;
979 if (tpname
+ plen
- pname
> nlen
) {
980 mutex_exit(&swapinfo_lock
);
981 kmem_free(pname
, nlen
);
985 tsip
->si_pname
= tpname
;
986 (void) strcpy(tsip
->si_pname
, sip
->si_pname
);
988 mutex_exit(&swapinfo_lock
);
994 ust
= (swapent32_t
*)((swaptbl32_t
*)sc_arg
)->swt_ent
;
995 for (tsip
= csip
, cnt
= 0; cnt
< nswap
; tsip
++, ust
++, cnt
++) {
996 if (copyin(ust
, &st
, sizeof (*ust
)) != 0) {
1000 st
.ste_flags
= tsip
->si_flags
;
1002 (tsip
->si_eoff
- tsip
->si_soff
) >> SCTRSHFT
;
1003 st
.ste_start
= tsip
->si_soff
>> SCTRSHFT
;
1004 st
.ste_pages
= tsip
->si_npgs
;
1005 st
.ste_free
= tsip
->si_nfpgs
;
1006 if (copyout(&st
, ust
, sizeof (st
)) != 0) {
1010 if (!tsip
->si_pnamelen
)
1012 if (copyout(tsip
->si_pname
,
1013 (caddr_t
)(uintptr_t)st
.ste_path
,
1014 tsip
->si_pnamelen
) != 0) {
1021 kmem_free(csip
, tmp_nswapfiles
* sizeof (*csip
));
1022 kmem_free(pname
, nlen
);
1031 if ((error
= secpolicy_swapctl(CRED())) != 0)
1034 if (copyin(sc_arg
, &sr
, sizeof (sr
)))
1037 /* Allocate the space to read in pathname */
1038 if ((swapname
= kmem_alloc(MAXPATHLEN
, KM_NOSLEEP
)) == NULL
)
1041 error
= copyinstr((caddr_t
)(uintptr_t)sr
.sr_name
,
1042 swapname
, MAXPATHLEN
, NULL
);
1046 error
= lookupname(swapname
, UIO_SYSSPACE
, FOLLOW
, NULLVPP
, &vp
);
1048 if (sc_cmd
== SC_ADD
)
1050 /* see if we match by name */
1051 vp
= swapdel_byname(swapname
, (uint_t
)sr
.sr_start
);
1056 if (vp
->v_flag
& (VNOMAP
| VNOSWAP
)) {
1061 switch (vp
->v_type
) {
1066 if (vp
->v_vfsp
&& vn_is_readonly(vp
))
1069 error
= fop_access(vp
, VREAD
|VWRITE
, 0, CRED(), NULL
);
1080 if (sc_cmd
== SC_REMOVE
)
1081 error
= swapdel(vp
, sr
.sr_start
);
1083 error
= swapadd(vp
, sr
.sr_start
, sr
.sr_length
,
1088 kmem_free(swapname
, MAXPATHLEN
);
1092 #endif /* _LP64 && _SYSCALL32 */
1095 * Add a new swap file.
1098 swapadd(struct vnode
*vp
, ulong_t lowblk
, ulong_t nblks
, char *swapname
)
1100 struct swapinfo
**sipp
, *nsip
= NULL
, *esip
= NULL
;
1106 ssize_t i
, start
, end
;
1109 size_t returned_mem
;
1111 SWAP_PRINT(SW_CTL
, "swapadd: vp %p lowblk %ld nblks %ld swapname %s\n",
1112 vp
, lowblk
, nblks
, swapname
, 0);
1114 * Get the real vnode. (If vp is not a specnode it just returns vp, so
1115 * it does the right thing, but having this code know about specnodes
1116 * violates the spirit of having it be indepedent of vnode type.)
1118 cvp
= common_specvp(vp
);
1121 * Or in VISSWAP so file system has chance to deny swap-ons during open.
1123 mutex_enter(&cvp
->v_lock
);
1124 wasswap
= cvp
->v_flag
& VISSWAP
;
1125 cvp
->v_flag
|= VISSWAP
;
1126 mutex_exit(&cvp
->v_lock
);
1128 mutex_enter(&swap_lock
);
1129 if (error
= fop_open(&cvp
, FREAD
|FWRITE
, CRED(), NULL
)) {
1130 mutex_exit(&swap_lock
);
1131 /* restore state of v_flag */
1133 mutex_enter(&cvp
->v_lock
);
1134 cvp
->v_flag
&= ~VISSWAP
;
1135 mutex_exit(&cvp
->v_lock
);
1139 mutex_exit(&swap_lock
);
1142 * Get partition size. Return error if empty partition,
1143 * or if request does not fit within the partition.
1144 * If this is the first swap device, we can reduce
1145 * the size of the swap area to match what is
1146 * available. This can happen if the system was built
1147 * on a machine with a different size swap partition.
1149 vattr
.va_mask
= VATTR_SIZE
;
1150 if (error
= fop_getattr(cvp
, &vattr
, ATTR_COMM
, CRED(), NULL
))
1154 * Specfs returns a va_size of MAXOFFSET_T (UNKNOWN_SIZE) when the
1155 * size of the device can't be determined.
1157 if ((vattr
.va_size
== 0) || (vattr
.va_size
== MAXOFFSET_T
)) {
1164 * No support for large swap in 32-bit OS, if the size of the swap is
1165 * bigger than INT32_MAX then the size used by swapfs must be limited.
1166 * This limitation is imposed by the swap subsystem itself, a D_64BIT
1167 * driver as the target of swap operation should be able to field
1170 if (vattr
.va_size
> INT32_MAX
) {
1172 "!swap device %s truncated from 0x%llx to 0x%x bytes",
1173 swapname
, vattr
.va_size
, INT32_MAX
);
1174 vattr
.va_size
= INT32_MAX
;
1178 /* Fail if file not writeable (try to set size to current size) */
1179 vattr
.va_mask
= VATTR_SIZE
;
1180 if (error
= fop_setattr(cvp
, &vattr
, 0, CRED(), NULL
))
1183 /* Fail if fs does not support fop_pageio */
1184 error
= fop_pageio(cvp
, NULL
, 0, 0, 0, CRED(),
1187 if (error
== ENOSYS
)
1192 * If swapping on the root filesystem don't put swap blocks that
1193 * correspond to the miniroot filesystem on the swap free list.
1196 startblk
= roundup(MINIROOTSIZE
<<SCTRSHFT
, klustsize
)>>SCTRSHFT
;
1197 else /* Skip 1st page (disk label) */
1198 startblk
= (ulong_t
)(lowblk
? lowblk
: 1);
1200 soff
= startblk
<< SCTRSHFT
;
1201 if (soff
>= vattr
.va_size
) {
1207 * If user specified 0 blks, use the size of the device
1209 eoff
= nblks
? soff
+ (nblks
- (startblk
- lowblk
) << SCTRSHFT
) :
1212 SWAP_PRINT(SW_CTL
, "swapadd: va_size %ld soff %ld eoff %ld\n",
1213 vattr
.va_size
, soff
, eoff
, 0, 0);
1215 if (eoff
> vattr
.va_size
) {
1221 * The starting and ending offsets must be page aligned.
1222 * Round soff up to next page boundary, round eoff
1223 * down to previous page boundary.
1225 soff
= ptob(btopr(soff
));
1226 eoff
= ptob(btop(eoff
));
1228 SWAP_PRINT(SW_CTL
, "swapadd: soff %ld >= eoff %ld\n",
1229 soff
, eoff
, 0, 0, 0);
1234 pages
= btop(eoff
- soff
);
1236 /* Allocate and partially set up the new swapinfo */
1237 nsip
= kmem_zalloc(sizeof (struct swapinfo
), KM_SLEEP
);
1240 nsip
->si_soff
= soff
;
1241 nsip
->si_eoff
= eoff
;
1243 nsip
->si_checkcnt
= nsip
->si_alloccnt
= 0;
1245 nsip
->si_pnamelen
= (int)strlen(swapname
) + 1;
1246 nsip
->si_pname
= kmem_zalloc(nsip
->si_pnamelen
, KM_SLEEP
);
1247 bcopy(swapname
, nsip
->si_pname
, nsip
->si_pnamelen
- 1);
1248 SWAP_PRINT(SW_CTL
, "swapadd: allocating swapinfo for %s, %ld pages\n",
1249 swapname
, pages
, 0, 0, 0);
1251 * Size of swapslots map in bytes
1253 nsip
->si_mapsize
= P2ROUNDUP(pages
, NBBW
) / NBBY
;
1254 nsip
->si_swapslots
= kmem_zalloc(nsip
->si_mapsize
, KM_SLEEP
);
1257 * Permanently set the bits that can't ever be allocated,
1258 * i.e. those from the ending offset to the round up slot for the
1259 * swapslots bit map.
1262 end
= P2ROUNDUP(pages
, NBBW
);
1263 for (i
= start
; i
< end
; i
++) {
1264 SWAP_PRINT(SW_CTL
, "swapadd: set bit for page %ld\n", i
,
1266 SETBIT(nsip
->si_swapslots
, i
);
1268 nsip
->si_npgs
= nsip
->si_nfpgs
= pages
;
1270 * Now check to see if we can add it. We wait til now to check because
1271 * we need the swapinfo_lock and we don't want sleep with it (e.g.,
1272 * during kmem_alloc()) while we're setting up the swapinfo.
1274 mutex_enter(&swapinfo_lock
);
1275 for (sipp
= &swapinfo
; (esip
= *sipp
) != NULL
; sipp
= &esip
->si_next
) {
1276 if (esip
->si_vp
== cvp
) {
1277 if (esip
->si_soff
== soff
&& esip
->si_npgs
== pages
&&
1278 (esip
->si_flags
& ST_DOINGDEL
)) {
1280 * We are adding a device that we are in the
1281 * middle of deleting. Just clear the
1282 * ST_DOINGDEL flag to signal this and
1283 * the deletion routine will eventually notice
1284 * it and add it back.
1286 esip
->si_flags
&= ~ST_DOINGDEL
;
1287 mutex_exit(&swapinfo_lock
);
1290 /* disallow overlapping swap files */
1291 if ((soff
< esip
->si_eoff
) && (eoff
> esip
->si_soff
)) {
1293 mutex_exit(&swapinfo_lock
);
1302 * add new swap device to list and shift allocations to it
1303 * before updating the anoninfo counters
1309 * Update the total amount of reservable swap space
1310 * accounting properly for swap space from physical memory
1312 /* New swap device soaks up currently reserved memory swap */
1313 mutex_enter(&anoninfo_lock
);
1315 ASSERT(k_anoninfo
.ani_mem_resv
>= k_anoninfo
.ani_locked_swap
);
1316 ASSERT(k_anoninfo
.ani_max
>= k_anoninfo
.ani_phys_resv
);
1318 k_anoninfo
.ani_max
+= pages
;
1320 if (k_anoninfo
.ani_mem_resv
> k_anoninfo
.ani_locked_swap
) {
1321 returned_mem
= MIN(k_anoninfo
.ani_mem_resv
-
1322 k_anoninfo
.ani_locked_swap
,
1323 k_anoninfo
.ani_max
- k_anoninfo
.ani_phys_resv
);
1325 ANI_ADD(-returned_mem
);
1326 k_anoninfo
.ani_free
-= returned_mem
;
1327 k_anoninfo
.ani_mem_resv
-= returned_mem
;
1328 k_anoninfo
.ani_phys_resv
+= returned_mem
;
1330 mutex_enter(&freemem_lock
);
1331 availrmem
+= returned_mem
;
1332 mutex_exit(&freemem_lock
);
1335 * At boot time, to permit booting small memory machines using
1336 * only physical memory as swap space, we allowed a dangerously
1337 * large amount of memory to be used as swap space; now that
1338 * more physical backing store is available bump down the amount
1339 * we can get from memory to a safer size.
1341 if (swapfs_minfree
< swapfs_desfree
) {
1342 mutex_enter(&freemem_lock
);
1343 if (availrmem
> swapfs_desfree
|| !k_anoninfo
.ani_mem_resv
)
1344 swapfs_minfree
= swapfs_desfree
;
1345 mutex_exit(&freemem_lock
);
1348 SWAP_PRINT(SW_CTL
, "swapadd: ani_max %ld ani_free %ld\n",
1349 k_anoninfo
.ani_free
, k_anoninfo
.ani_free
, 0, 0, 0);
1351 mutex_exit(&anoninfo_lock
);
1353 mutex_exit(&swapinfo_lock
);
1355 /* Initialize the dump device */
1356 mutex_enter(&dump_lock
);
1358 (void) dumpinit(vp
, swapname
, 0);
1359 mutex_exit(&dump_lock
);
1363 if (error
|| esip
) {
1364 SWAP_PRINT(SW_CTL
, "swapadd: error (%d)\n", error
, 0, 0, 0, 0);
1367 mutex_enter(&cvp
->v_lock
);
1368 cvp
->v_flag
&= ~VISSWAP
;
1369 mutex_exit(&cvp
->v_lock
);
1372 kmem_free(nsip
->si_swapslots
, (size_t)nsip
->si_mapsize
);
1373 kmem_free(nsip
->si_pname
, nsip
->si_pnamelen
);
1374 kmem_free(nsip
, sizeof (*nsip
));
1376 mutex_enter(&swap_lock
);
1377 (void) fop_close(cvp
, FREAD
|FWRITE
, 1, 0, CRED(),
1379 mutex_exit(&swap_lock
);
1385 * Delete a swap file.
1390 ulong_t lowblk
) /* Low block number of area to delete. */
1392 struct swapinfo
**sipp
, *osip
= NULL
;
1397 struct vnode
*tvp
= NULL
;
1399 struct anon
**app
, *ap
;
1401 pgcnt_t adjust_swap
= 0;
1403 /* Find the swap file entry for the file to be deleted */
1404 cvp
= common_specvp(vp
);
1407 lowblk
= lowblk
? lowblk
: 1; /* Skip first page (disk label) */
1408 soff
= ptob(btopr(lowblk
<< SCTRSHFT
)); /* must be page aligned */
1410 mutex_enter(&swapinfo_lock
);
1411 for (sipp
= &swapinfo
; (osip
= *sipp
) != NULL
; sipp
= &osip
->si_next
) {
1412 if ((osip
->si_vp
== cvp
) &&
1413 (osip
->si_soff
== soff
) && (osip
->si_flags
== 0))
1417 /* If the file was not found, error. */
1420 mutex_exit(&swapinfo_lock
);
1424 pages
= osip
->si_npgs
;
1427 * Do not delete if we will be low on swap pages.
1429 mutex_enter(&anoninfo_lock
);
1431 ASSERT(k_anoninfo
.ani_mem_resv
>= k_anoninfo
.ani_locked_swap
);
1432 ASSERT(k_anoninfo
.ani_max
>= k_anoninfo
.ani_phys_resv
);
1434 mutex_enter(&freemem_lock
);
1435 if (((k_anoninfo
.ani_max
- k_anoninfo
.ani_phys_resv
) +
1436 MAX((spgcnt_t
)(availrmem
- swapfs_minfree
), 0)) < pages
) {
1437 mutex_exit(&freemem_lock
);
1438 mutex_exit(&anoninfo_lock
);
1440 cmn_err(CE_WARN
, "swapdel - too few free pages");
1441 mutex_exit(&swapinfo_lock
);
1444 mutex_exit(&freemem_lock
);
1446 k_anoninfo
.ani_max
-= pages
;
1448 /* If needed, reserve memory swap to replace old device */
1449 if (k_anoninfo
.ani_phys_resv
> k_anoninfo
.ani_max
) {
1450 adjust_swap
= k_anoninfo
.ani_phys_resv
- k_anoninfo
.ani_max
;
1451 k_anoninfo
.ani_phys_resv
-= adjust_swap
;
1452 k_anoninfo
.ani_mem_resv
+= adjust_swap
;
1453 mutex_enter(&freemem_lock
);
1454 availrmem
-= adjust_swap
;
1455 mutex_exit(&freemem_lock
);
1456 ANI_ADD(adjust_swap
);
1458 ASSERT(k_anoninfo
.ani_mem_resv
>= k_anoninfo
.ani_locked_swap
);
1459 ASSERT(k_anoninfo
.ani_max
>= k_anoninfo
.ani_phys_resv
);
1460 mutex_exit(&anoninfo_lock
);
1465 * Set the delete flag. This prevents anyone from allocating more
1466 * pages from this file. Also set ST_DOINGDEL. Someone who wants to
1467 * add the file back while we're deleting it will signify by clearing
1470 osip
->si_flags
|= ST_INDEL
|ST_DOINGDEL
;
1471 mutex_exit(&swapinfo_lock
);
1474 * Free all the allocated physical slots for this file. We do this
1475 * by walking through the entire anon hash array, because we need
1476 * to update all the anon slots that have physical swap slots on
1477 * this file, and this is the only way to find them all. We go back
1478 * to the beginning of a bucket after each slot is freed because the
1479 * anonhash_lock is not held during the free and thus the hash table
1480 * may change under us.
1482 for (app
= anon_hash
; app
< &anon_hash
[ANON_HASH_SIZE
]; app
++) {
1483 ahm
= &anonhash_lock
[(app
- anon_hash
) &
1484 (AH_LOCK_SIZE
- 1)].pad_mutex
;
1487 for (ap
= *app
; ap
!= NULL
; ap
= ap
->an_hash
) {
1488 if (ap
->an_pvp
== cvp
&&
1489 ap
->an_poff
>= osip
->si_soff
&&
1490 ap
->an_poff
< osip
->si_eoff
) {
1491 ASSERT(TESTBIT(osip
->si_swapslots
,
1492 btop((size_t)(ap
->an_poff
-
1499 error
= swapslot_free(tvp
, toff
, osip
);
1503 if (!error
&& (osip
->si_flags
& ST_DOINGDEL
)) {
1508 "swapslot_free failed %d",
1513 * Add device back before making it
1516 mutex_enter(&swapinfo_lock
);
1518 ~(ST_INDEL
| ST_DOINGDEL
);
1519 mutex_exit(&swapinfo_lock
);
1522 * Update the anon space available
1524 mutex_enter(&anoninfo_lock
);
1526 k_anoninfo
.ani_phys_resv
+= adjust_swap
;
1527 k_anoninfo
.ani_mem_resv
-= adjust_swap
;
1528 k_anoninfo
.ani_max
+= pages
;
1530 mutex_enter(&freemem_lock
);
1531 availrmem
+= adjust_swap
;
1532 mutex_exit(&freemem_lock
);
1534 mutex_exit(&anoninfo_lock
);
1546 /* All done, they'd better all be free! */
1547 mutex_enter(&swapinfo_lock
);
1548 ASSERT(osip
->si_nfpgs
== osip
->si_npgs
);
1550 /* Now remove it from the swapinfo list */
1551 for (sipp
= &swapinfo
; *sipp
!= NULL
; sipp
= &(*sipp
)->si_next
) {
1556 *sipp
= osip
->si_next
;
1558 if ((silast
= osip
->si_next
) == NULL
)
1561 mutex_exit(&swapinfo_lock
);
1563 kmem_free(osip
->si_swapslots
, osip
->si_mapsize
);
1564 kmem_free(osip
->si_pname
, osip
->si_pnamelen
);
1565 kmem_free(osip
, sizeof (*osip
));
1567 mutex_enter(&dump_lock
);
1570 mutex_exit(&dump_lock
);
1572 /* Release the vnode */
1574 mutex_enter(&swap_lock
);
1575 (void) fop_close(cvp
, FREAD
|FWRITE
, 1, 0, CRED(), NULL
);
1576 mutex_enter(&cvp
->v_lock
);
1577 cvp
->v_flag
&= ~VISSWAP
;
1578 mutex_exit(&cvp
->v_lock
);
1580 mutex_exit(&swap_lock
);
1586 * Free up a physical swap slot on swapinfo sip, currently in use by the
1587 * anonymous page whose name is (vp, off).
1593 struct swapinfo
*sip
)
1595 struct page
*pp
= NULL
;
1596 struct anon
*ap
= NULL
;
1599 struct vnode
*pvp
= NULL
;
1603 ASSERT(sip
->si_vp
!= NULL
);
1605 * Get the page for the old swap slot if exists or create a new one.
1608 if ((pp
= page_lookup(&vp
->v_object
, off
, SE_SHARED
)) == NULL
) {
1609 pp
= page_create_va(&vp
->v_object
, off
, PAGESIZE
,
1610 PG_WAIT
| PG_EXCL
, segkmap
, NULL
);
1615 error
= swap_getphysname(vp
, off
, &pvp
, &poff
);
1616 if (error
|| pvp
!= sip
->si_vp
|| poff
< sip
->si_soff
||
1617 poff
>= sip
->si_eoff
) {
1619 VN_DISPOSE(pp
, B_INVAL
, 0, kcred
);
1623 error
= fop_pageio(pvp
, pp
, poff
, PAGESIZE
, B_READ
,
1627 if (error
== EFAULT
)
1630 VN_DISPOSE(pp
, B_INVAL
, 0, kcred
);
1636 * The anon could have been removed by anon_decref* and/or reallocated
1637 * by anon layer (an_pvp == NULL) with the same vp, off.
1638 * In this case the page which has been allocated needs to
1643 ahm
= AH_MUTEX(vp
, off
);
1645 ap
= swap_anon(vp
, off
);
1646 if ((ap
== NULL
|| ap
->an_pvp
== NULL
) && alloc_pg
) {
1649 VN_DISPOSE(pp
, B_INVAL
, 0, kcred
);
1654 * Free the physical slot. It may have been freed up and replaced with
1655 * another one while we were getting the page so we have to re-verify
1656 * that this is really one we want. If we do free the slot we have
1657 * to mark the page modified, as its backing store is now gone.
1659 if ((ap
!= NULL
) && (ap
->an_pvp
== sip
->si_vp
&& ap
->an_poff
>=
1660 sip
->si_soff
&& ap
->an_poff
< sip
->si_eoff
)) {
1661 swap_phys_free(ap
->an_pvp
, ap
->an_poff
, PAGESIZE
);
1676 * Get contig physical backing store for vp, in the range
1677 * [*offp, *offp + *lenp), May back a subrange of this, but must
1678 * always include the requested offset or fail. Returns the offsets
1679 * backed as [*offp, *offp + *lenp) and the physical offsets used to
1680 * back them from *pvpp in the range [*pstartp, *pstartp + *lenp).
1681 * Returns 0 for success
1682 * SE_NOANON -- no anon slot for requested paged
1683 * SE_NOSWAP -- no physical swap space available
1691 struct vnode
**pvpp
,
1694 struct anon
*ap
= NULL
; /* anon slot for vp, off */
1697 uoff_t poff
, pstart
, prem
;
1702 ASSERT(*offp
<= offset
&& offset
< *offp
+ *lenp
);
1704 /* Get new physical swap slots. */
1706 if (!swap_phys_alloc(&pvp
, &pstart
, &plen
, 0)) {
1708 * No swap available so return error unless requested
1709 * offset is already backed in which case return that.
1711 ahm
= AH_MUTEX(vp
, offset
);
1713 if ((ap
= swap_anon(vp
, offset
)) == NULL
) {
1718 error
= (ap
->an_pvp
? 0 : SE_NOSWAP
);
1722 *poffp
= ap
->an_poff
;
1728 * We got plen (<= *lenp) contig slots. Use these to back a
1729 * subrange of [*offp, *offp + *lenp) which includes offset.
1730 * For now we just put offset at the end of the kluster.
1731 * Clearly there are other possible choices - which is best?
1734 (offset
+ PAGESIZE
> plen
) ? (offset
+ PAGESIZE
- plen
) : 0);
1735 ASSERT(start
+ plen
<= *offp
+ *lenp
);
1737 for (off
= start
, poff
= pstart
; poff
< pstart
+ plen
;
1738 off
+= PAGESIZE
, poff
+= PAGESIZE
) {
1739 ahm
= AH_MUTEX(vp
, off
);
1741 if ((ap
= swap_anon(vp
, off
)) != NULL
) {
1742 /* Free old slot if any, and assign new one */
1744 swap_phys_free(ap
->an_pvp
, ap
->an_poff
,
1748 } else { /* No anon slot for a klustered page, quit. */
1749 prem
= (pstart
+ plen
) - poff
;
1750 /* Already did requested page, do partial kluster */
1752 plen
= poff
- pstart
;
1754 /* Fail on requested page, error */
1755 } else if (off
== offset
) {
1757 /* Fail on prior page, fail on requested page, error */
1758 } else if ((ap
= swap_anon(vp
, offset
)) == NULL
) {
1760 /* Fail on prior page, got requested page, do only it */
1762 /* Free old slot if any, and assign new one */
1764 swap_phys_free(ap
->an_pvp
, ap
->an_poff
,
1768 /* One page kluster */
1775 /* Free unassigned slots */
1776 swap_phys_free(pvp
, poff
, prem
);
1782 ASSERT(*offp
<= start
&& start
+ plen
<= *offp
+ *lenp
);
1783 ASSERT(start
<= offset
&& offset
< start
+ plen
);
1793 * Get the physical swap backing store location for a given anonymous page
1794 * named (vp, off). The backing store name is returned in (*pvpp, *poffp).
1796 * EIDRM -- no anon slot (page is not allocated)
1802 struct vnode
**pvpp
,
1809 ahm
= AH_MUTEX(vp
, off
);
1812 /* Get anon slot for vp, off */
1813 ap
= swap_anon(vp
, off
);
1819 *poffp
= ap
->an_poff
;