4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * Portions of this source code were derived from Berkeley 4.3 BSD
31 * under license from the Regents of the University of California.
35 * VM - generic vnode mapping segment.
37 * The segmap driver is used only by the kernel to get faster (than seg_vn)
38 * mappings [lower routine overhead; more persistent cache] to random
39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well.
42 #include <sys/types.h>
43 #include <sys/t_lock.h>
44 #include <sys/param.h>
45 #include <sys/sysmacros.h>
47 #include <sys/systm.h>
48 #include <sys/vnode.h>
50 #include <sys/errno.h>
53 #include <sys/vtrace.h>
54 #include <sys/cmn_err.h>
55 #include <sys/debug.h>
56 #include <sys/thread.h>
57 #include <sys/dumphdr.h>
58 #include <sys/bitmap.h>
61 #include <vm/seg_kmem.h>
65 #include <vm/seg_kpm.h>
66 #include <vm/seg_map.h>
72 * Private seg op routines.
74 static void segmap_free(struct seg
*seg
);
75 faultcode_t
segmap_fault(struct hat
*hat
, struct seg
*seg
, caddr_t addr
,
76 size_t len
, enum fault_type type
, enum seg_rw rw
);
77 static faultcode_t
segmap_faulta(struct seg
*seg
, caddr_t addr
);
78 static int segmap_checkprot(struct seg
*seg
, caddr_t addr
, size_t len
,
80 static int segmap_kluster(struct seg
*seg
, caddr_t addr
, ssize_t
);
81 static int segmap_getprot(struct seg
*seg
, caddr_t addr
, size_t len
,
83 static uoff_t
segmap_getoffset(struct seg
*seg
, caddr_t addr
);
84 static int segmap_gettype(struct seg
*seg
, caddr_t addr
);
85 static int segmap_getvp(struct seg
*seg
, caddr_t addr
, struct vnode
**vpp
);
86 static void segmap_dump(struct seg
*seg
);
87 static int segmap_pagelock(struct seg
*seg
, caddr_t addr
, size_t len
,
88 struct page
***ppp
, enum lock_type type
,
90 static void segmap_badop(void);
91 static int segmap_getmemid(struct seg
*seg
, caddr_t addr
, memid_t
*memidp
);
94 static caddr_t
segmap_pagecreate_kpm(struct seg
*, vnode_t
*, uoff_t
,
95 struct smap
*, enum seg_rw
);
96 struct smap
*get_smap_kpm(caddr_t
, page_t
**);
98 #define SEGMAP_BADOP(t) (t(*)())segmap_badop
100 static const struct seg_ops segmap_ops
= {
101 .dup
= SEGMAP_BADOP(int),
102 .unmap
= SEGMAP_BADOP(int),
104 .fault
= segmap_fault
,
105 .faulta
= segmap_faulta
,
106 .setprot
= SEGMAP_BADOP(int),
107 .checkprot
= segmap_checkprot
,
108 .kluster
= segmap_kluster
,
109 .sync
= SEGMAP_BADOP(int),
110 .incore
= SEGMAP_BADOP(size_t),
111 .lockop
= SEGMAP_BADOP(int),
112 .getprot
= segmap_getprot
,
113 .getoffset
= segmap_getoffset
,
114 .gettype
= segmap_gettype
,
115 .getvp
= segmap_getvp
,
116 .advise
= SEGMAP_BADOP(int),
118 .pagelock
= segmap_pagelock
,
119 .setpagesize
= SEGMAP_BADOP(int),
120 .getmemid
= segmap_getmemid
,
124 * Private segmap routines.
126 static void segmap_unlock(struct hat
*hat
, struct seg
*seg
, caddr_t addr
,
127 size_t len
, enum seg_rw rw
, struct smap
*smp
);
128 static void segmap_smapadd(struct smap
*smp
);
129 static struct smap
*segmap_hashin(struct smap
*smp
, struct vnode
*vp
,
130 uoff_t off
, int hashid
);
131 static void segmap_hashout(struct smap
*smp
);
135 * Statistics for segmap operations.
137 * No explicit locking to protect these stats.
139 struct segmapcnt segmapcnt
= {
140 { "fault", KSTAT_DATA_ULONG
},
141 { "faulta", KSTAT_DATA_ULONG
},
142 { "getmap", KSTAT_DATA_ULONG
},
143 { "get_use", KSTAT_DATA_ULONG
},
144 { "get_reclaim", KSTAT_DATA_ULONG
},
145 { "get_reuse", KSTAT_DATA_ULONG
},
146 { "get_unused", KSTAT_DATA_ULONG
},
147 { "get_nofree", KSTAT_DATA_ULONG
},
148 { "rel_async", KSTAT_DATA_ULONG
},
149 { "rel_write", KSTAT_DATA_ULONG
},
150 { "rel_free", KSTAT_DATA_ULONG
},
151 { "rel_abort", KSTAT_DATA_ULONG
},
152 { "rel_dontneed", KSTAT_DATA_ULONG
},
153 { "release", KSTAT_DATA_ULONG
},
154 { "pagecreate", KSTAT_DATA_ULONG
},
155 { "free_notfree", KSTAT_DATA_ULONG
},
156 { "free_dirty", KSTAT_DATA_ULONG
},
157 { "free", KSTAT_DATA_ULONG
},
158 { "stolen", KSTAT_DATA_ULONG
},
159 { "get_nomtx", KSTAT_DATA_ULONG
}
162 kstat_named_t
*segmapcnt_ptr
= (kstat_named_t
*)&segmapcnt
;
163 uint_t segmapcnt_ndata
= sizeof (segmapcnt
) / sizeof (kstat_named_t
);
166 * Return number of map pages in segment.
168 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT)
171 * Translate addr into smap number within segment.
173 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT)
176 * Translate addr in seg into struct smap pointer.
178 #define GET_SMAP(seg, addr) \
179 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
182 * Bit in map (16 bit bitmap).
184 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf))
186 static int smd_colormsk
= 0;
187 static int smd_ncolor
= 0;
188 static int smd_nfree
= 0;
189 static int smd_freemsk
= 0;
191 static int *colors_used
;
193 static struct smap
*smd_smap
;
194 static struct smaphash
*smd_hash
;
195 #ifdef SEGMAP_HASHSTATS
196 static unsigned int *smd_hash_len
;
198 static struct smfree
*smd_free
;
199 static ulong_t smd_hashmsk
= 0;
201 #define SEGMAP_MAXCOLOR 2
202 #define SEGMAP_CACHE_PAD 64
206 uint32_t scpu_free_ndx
[SEGMAP_MAXCOLOR
];
207 struct smap
*scpu_last_smap
;
209 ulong_t scpu_release
;
210 ulong_t scpu_get_reclaim
;
212 ulong_t scpu_pagecreate
;
213 ulong_t scpu_get_reuse
;
215 char scpu_pad
[SEGMAP_CACHE_PAD
];
217 static union segmap_cpu
*smd_cpu
;
220 * There are three locks in seg_map:
221 * - per freelist mutexes
222 * - per hashchain mutexes
225 * The lock ordering is to get the smap mutex to lock down the slot
226 * first then the hash lock (for hash in/out (vp, off) list) or the
227 * freelist lock to put the slot back on the free list.
229 * The hash search is done by only holding the hashchain lock, when a wanted
230 * slot is found, we drop the hashchain lock then lock the slot so there
231 * is no overlapping of hashchain and smap locks. After the slot is
232 * locked, we verify again if the slot is still what we are looking
235 * Allocation of a free slot is done by holding the freelist lock,
236 * then locking the smap slot at the head of the freelist. This is
237 * in reversed lock order so mutex_tryenter() is used.
239 * The smap lock protects all fields in smap structure except for
240 * the link fields for hash/free lists which are protected by
241 * hashchain and freelist locks.
244 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx)
246 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk])
247 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk)
249 #define SMAPMTX(smp) (&smp->sm_mtx)
251 #define SMAP_HASHFUNC(vp, off, hashid) \
253 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
254 ((off) >> MAXBSHIFT)) & smd_hashmsk); \
258 * The most frequently updated kstat counters are kept in the
259 * per cpu array to avoid hot cache blocks. The update function
260 * sums the cpu local counters to update the global counters.
265 segmap_kstat_update(kstat_t
*ksp
, int rw
)
268 ulong_t getmap
, release
, get_reclaim
;
269 ulong_t fault
, pagecreate
, get_reuse
;
271 if (rw
== KSTAT_WRITE
)
273 getmap
= release
= get_reclaim
= (ulong_t
)0;
274 fault
= pagecreate
= get_reuse
= (ulong_t
)0;
275 for (i
= 0; i
< max_ncpus
; i
++) {
276 getmap
+= smd_cpu
[i
].scpu
.scpu_getmap
;
277 release
+= smd_cpu
[i
].scpu
.scpu_release
;
278 get_reclaim
+= smd_cpu
[i
].scpu
.scpu_get_reclaim
;
279 fault
+= smd_cpu
[i
].scpu
.scpu_fault
;
280 pagecreate
+= smd_cpu
[i
].scpu
.scpu_pagecreate
;
281 get_reuse
+= smd_cpu
[i
].scpu
.scpu_get_reuse
;
283 segmapcnt
.smp_getmap
.value
.ul
= getmap
;
284 segmapcnt
.smp_release
.value
.ul
= release
;
285 segmapcnt
.smp_get_reclaim
.value
.ul
= get_reclaim
;
286 segmapcnt
.smp_fault
.value
.ul
= fault
;
287 segmapcnt
.smp_pagecreate
.value
.ul
= pagecreate
;
288 segmapcnt
.smp_get_reuse
.value
.ul
= get_reuse
;
293 segmap_create(struct seg
*seg
, void *argsp
)
295 struct segmap_data
*smd
;
298 struct segmap_crargs
*a
= (struct segmap_crargs
*)argsp
;
299 struct smaphash
*shashp
;
300 union segmap_cpu
*scpu
;
304 extern void prefetch_smap_w(void *);
305 extern int max_ncpus
;
307 ASSERT(seg
->s_as
&& RW_WRITE_HELD(&seg
->s_as
->a_lock
));
309 if (((uintptr_t)seg
->s_base
| seg
->s_size
) & MAXBOFFSET
) {
310 panic("segkmap not MAXBSIZE aligned");
314 smd
= kmem_zalloc(sizeof (struct segmap_data
), KM_SLEEP
);
316 seg
->s_data
= (void *)smd
;
317 seg
->s_ops
= &segmap_ops
;
318 smd
->smd_prot
= a
->prot
;
321 * Scale the number of smap freelists to be
322 * proportional to max_ncpus * number of virtual colors.
323 * The caller can over-ride this scaling by providing
324 * a non-zero a->nfreelist argument.
326 nfreelist
= a
->nfreelist
;
328 nfreelist
= max_ncpus
;
329 else if (nfreelist
< 0 || nfreelist
> 4 * max_ncpus
) {
330 cmn_err(CE_WARN
, "segmap_create: nfreelist out of range "
331 "%d, using %d", nfreelist
, max_ncpus
);
332 nfreelist
= max_ncpus
;
334 if (!ISP2(nfreelist
)) {
335 /* round up nfreelist to the next power of two. */
336 nfreelist
= 1 << (highbit(nfreelist
));
340 * Get the number of virtual colors - must be a power of 2.
343 smd_ncolor
= a
->shmsize
>> MAXBSHIFT
;
346 ASSERT((smd_ncolor
& (smd_ncolor
- 1)) == 0);
347 ASSERT(smd_ncolor
<= SEGMAP_MAXCOLOR
);
348 smd_colormsk
= smd_ncolor
- 1;
349 smd
->smd_nfree
= smd_nfree
= smd_ncolor
* nfreelist
;
350 smd_freemsk
= smd_nfree
- 1;
353 * Allocate and initialize the freelist headers.
354 * Note that sm_freeq[1] starts out as the release queue. This
355 * is known when the smap structures are initialized below.
357 smd_free
= smd
->smd_free
=
358 kmem_zalloc(smd_nfree
* sizeof (struct smfree
), KM_SLEEP
);
359 for (i
= 0; i
< smd_nfree
; i
++) {
360 sm
= &smd
->smd_free
[i
];
361 mutex_init(&sm
->sm_freeq
[0].smq_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
362 mutex_init(&sm
->sm_freeq
[1].smq_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
363 sm
->sm_allocq
= &sm
->sm_freeq
[0];
364 sm
->sm_releq
= &sm
->sm_freeq
[1];
368 * Allocate and initialize the smap hash chain headers.
369 * Compute hash size rounding down to the next power of two.
371 npages
= MAP_PAGES(seg
);
372 smd
->smd_npages
= npages
;
373 hashsz
= npages
/ SMAP_HASHAVELEN
;
374 hashsz
= 1 << (highbit(hashsz
)-1);
375 smd_hashmsk
= hashsz
- 1;
376 smd_hash
= smd
->smd_hash
=
377 kmem_alloc(hashsz
* sizeof (struct smaphash
), KM_SLEEP
);
378 #ifdef SEGMAP_HASHSTATS
380 kmem_zalloc(hashsz
* sizeof (unsigned int), KM_SLEEP
);
382 for (i
= 0, shashp
= smd_hash
; i
< hashsz
; i
++, shashp
++) {
383 shashp
->sh_hash_list
= NULL
;
384 mutex_init(&shashp
->sh_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
388 * Allocate and initialize the smap structures.
389 * Link all slots onto the appropriate freelist.
390 * The smap array is large enough to affect boot time
391 * on large systems, so use memory prefetching and only
392 * go through the array 1 time. Inline a optimized version
393 * of segmap_smapadd to add structures to freelists with
394 * knowledge that no locks are needed here.
396 smd_smap
= smd
->smd_sm
=
397 kmem_alloc(sizeof (struct smap
) * npages
, KM_SLEEP
);
399 for (smp
= &smd
->smd_sm
[MAP_PAGES(seg
) - 1];
400 smp
>= smd
->smd_sm
; smp
--) {
401 struct smap
*smpfreelist
;
402 struct sm_freeq
*releq
;
404 prefetch_smap_w((char *)smp
);
411 mutex_init(&smp
->sm_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
412 smp
->sm_free_ndx
= SMP2SMF_NDX(smp
);
415 releq
= sm
->sm_releq
;
417 smpfreelist
= releq
->smq_free
;
418 if (smpfreelist
== 0) {
419 releq
->smq_free
= smp
->sm_next
= smp
->sm_prev
= smp
;
421 smp
->sm_next
= smpfreelist
;
422 smp
->sm_prev
= smpfreelist
->sm_prev
;
423 smpfreelist
->sm_prev
= smp
;
424 smp
->sm_prev
->sm_next
= smp
;
425 releq
->smq_free
= smp
->sm_next
;
429 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
433 #ifdef SEGKPM_SUPPORT
435 * Due to the fragile prefetch loop no
436 * separate function is used here.
438 smp
->sm_kpme_next
= NULL
;
439 smp
->sm_kpme_prev
= NULL
;
440 smp
->sm_kpme_page
= NULL
;
445 * Allocate the per color indices that distribute allocation
446 * requests over the free lists. Each cpu will have a private
447 * rotor index to spread the allocations even across the available
448 * smap freelists. Init the scpu_last_smap field to the first
449 * smap element so there is no need to check for NULL.
452 kmem_zalloc(sizeof (union segmap_cpu
) * max_ncpus
, KM_SLEEP
);
453 for (i
= 0, scpu
= smd_cpu
; i
< max_ncpus
; i
++, scpu
++) {
455 for (j
= 0; j
< smd_ncolor
; j
++)
456 scpu
->scpu
.scpu_free_ndx
[j
] = j
;
457 scpu
->scpu
.scpu_last_smap
= smd_smap
;
464 * Keep track of which colors are used more often.
466 colors_used
= kmem_zalloc(smd_nfree
* sizeof (int), KM_SLEEP
);
476 ASSERT(seg
->s_as
&& RW_WRITE_HELD(&seg
->s_as
->a_lock
));
480 * Do a F_SOFTUNLOCK call over the range requested.
481 * The range must have already been F_SOFTLOCK'ed.
498 ASSERT(smp
->sm_refcnt
> 0);
501 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
504 * We're called only from segmap_fault and this was a
505 * NOP in case of a kpm based smap, so dangerous things
506 * must have happened in the meantime. Pages are prefaulted
507 * and locked in segmap_getmapflt and they will not be
508 * unlocked until segmap_release.
510 panic("segmap_unlock: called with kpm addr %p", (void *)addr
);
515 off
= smp
->sm_off
+ (uoff_t
)((uintptr_t)addr
& MAXBOFFSET
);
517 hat_unlock(hat
, addr
, P2ROUNDUP(len
, PAGESIZE
));
518 for (adr
= addr
; adr
< addr
+ len
; adr
+= PAGESIZE
, off
+= PAGESIZE
) {
522 * Use page_find() instead of page_lookup() to
523 * find the page since we know that it has
526 pp
= page_find(&vp
->v_object
, off
);
528 panic("segmap_unlock: page not found");
534 } else if (rw
!= S_OTHER
) {
539 * Clear bitmap, if the bit corresponding to "off" is set,
540 * since the page and translation are being unlocked.
542 bitmask
= SMAP_BIT_MASK((off
- smp
->sm_off
) >> PAGESHIFT
);
545 * Large Files: Following assertion is to verify
546 * the correctness of the cast to (int) above.
548 ASSERT((uoff_t
)(off
- smp
->sm_off
) <= INT_MAX
);
551 if (smp
->sm_bitmap
& bitmask
) {
552 smp
->sm_bitmap
&= ~bitmask
;
560 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */
563 * This routine is called via a machine specific fault handling
564 * routine. It is also called by software routines wishing to
565 * lock or unlock a range of addresses.
567 * Note that this routine expects a page-aligned "addr".
575 enum fault_type type
,
578 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
583 page_t
*pl
[MAXPPB
+ 1];
591 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
596 * Pages are successfully prefaulted and locked in
597 * segmap_getmapflt and can't be unlocked until
598 * segmap_release. No hat mappings have to be locked
599 * and they also can't be unlocked as long as the
600 * caller owns an active kpm addr.
603 if (type
!= F_SOFTUNLOCK
)
607 if ((smp
= get_smap_kpm(addr
, NULL
)) == NULL
) {
608 panic("segmap_fault: smap not found "
609 "for addr %p", (void *)addr
);
615 newpage
= smp
->sm_flags
& SM_KPM_NEWPAGE
;
617 cmn_err(CE_WARN
, "segmap_fault: newpage? smp %p",
621 if (type
!= F_SOFTUNLOCK
) {
628 sm_off
= smp
->sm_off
;
631 return (FC_MAKE_ERR(EIO
));
633 ASSERT(smp
->sm_refcnt
> 0);
635 addroff
= (uoff_t
)((uintptr_t)addr
& MAXBOFFSET
);
636 if (addroff
+ len
> MAXBSIZE
)
637 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
638 (void *)(addr
+ len
));
640 off
= sm_off
+ addroff
;
642 pp
= page_find(&vp
->v_object
, off
);
645 panic("segmap_fault: softunlock page not found");
648 * Set ref bit also here in case of S_OTHER to avoid the
649 * overhead of supporting other cases than F_SOFTUNLOCK
650 * with segkpm. We can do this because the underlying
651 * pages are locked anyway.
662 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_fault
++;
663 smp
= GET_SMAP(seg
, addr
);
665 sm_off
= smp
->sm_off
;
668 return (FC_MAKE_ERR(EIO
));
670 ASSERT(smp
->sm_refcnt
> 0);
672 addroff
= (uoff_t
)((uintptr_t)addr
& MAXBOFFSET
);
673 if (addroff
+ len
> MAXBSIZE
) {
674 panic("segmap_fault: endaddr %p "
675 "exceeds MAXBSIZE chunk", (void *)(addr
+ len
));
678 off
= sm_off
+ addroff
;
681 * First handle the easy stuff
683 if (type
== F_SOFTUNLOCK
) {
684 segmap_unlock(hat
, seg
, addr
, len
, rw
, smp
);
688 err
= fop_getpage(vp
, (offset_t
)off
, len
, &prot
, pl
, MAXBSIZE
,
689 seg
, addr
, rw
, CRED(), NULL
);
692 return (FC_MAKE_ERR(err
));
694 prot
&= smd
->smd_prot
;
697 * Handle all pages returned in the pl[] array.
698 * This loop is coded on the assumption that if
699 * there was no error from the fop_getpage routine,
700 * that the page list returned will contain all the
701 * needed pages for the vp from [off..off + len].
704 while ((pp
= *ppp
++) != NULL
) {
706 VERIFY(pp
->p_object
== &vp
->v_object
);
707 ASSERT(pp
->p_vnode
== vp
);
711 * Verify that the pages returned are within the range
712 * of this segmap region. Note that it is theoretically
713 * possible for pages outside this range to be returned,
714 * but it is not very likely. If we cannot use the
715 * page here, just release it and go on to the next one.
717 if (pp
->p_offset
< sm_off
||
718 pp
->p_offset
>= sm_off
+ MAXBSIZE
) {
719 (void) page_release(pp
, 1);
723 ASSERT(hat
== kas
.a_hat
);
725 adr
= addr
+ (poff
- off
);
726 if (adr
>= addr
&& adr
< addr
+ len
) {
728 if (type
== F_SOFTLOCK
)
729 hat_flag
= HAT_LOAD_LOCK
;
733 * Deal with VMODSORT pages here. If we know this is a write
734 * do the setmod now and allow write protection.
735 * As long as it's modified or not S_OTHER, remove write
736 * protection. With S_OTHER it's up to the FS to deal with this.
738 if (IS_VMODSORT(vp
)) {
741 else if (rw
!= S_OTHER
&& !hat_ismod(pp
))
745 hat_memload(hat
, adr
, pp
, prot
, hat_flag
);
746 if (hat_flag
!= HAT_LOAD_LOCK
)
753 * This routine is used to start I/O on pages asynchronously.
756 segmap_faulta(struct seg
*seg
, caddr_t addr
)
763 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
768 * Pages are successfully prefaulted and locked in
769 * segmap_getmapflt and can't be unlocked until
770 * segmap_release. No hat mappings have to be locked
771 * and they also can't be unlocked as long as the
772 * caller owns an active kpm addr.
775 if ((smp
= get_smap_kpm(addr
, NULL
)) == NULL
) {
776 panic("segmap_faulta: smap not found "
777 "for addr %p", (void *)addr
);
782 newpage
= smp
->sm_flags
& SM_KPM_NEWPAGE
;
785 cmn_err(CE_WARN
, "segmap_faulta: newpage? smp %p",
791 segmapcnt
.smp_faulta
.value
.ul
++;
792 smp
= GET_SMAP(seg
, addr
);
794 ASSERT(smp
->sm_refcnt
> 0);
800 cmn_err(CE_WARN
, "segmap_faulta - no vp");
801 return (FC_MAKE_ERR(EIO
));
804 err
= fop_getpage(vp
, (offset_t
)(off
+ ((offset_t
)((uintptr_t)addr
805 & MAXBOFFSET
))), PAGESIZE
, (uint_t
*)NULL
, (page_t
**)NULL
, 0,
806 seg
, addr
, S_READ
, CRED(), NULL
);
809 return (FC_MAKE_ERR(err
));
815 segmap_checkprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t prot
)
817 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
819 ASSERT(seg
->s_as
&& RW_LOCK_HELD(&seg
->s_as
->a_lock
));
822 * Need not acquire the segment lock since
823 * "smd_prot" is a read-only field.
825 return (((smd
->smd_prot
& prot
) != prot
) ? EACCES
: 0);
829 segmap_getprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t
*protv
)
831 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
832 size_t pgno
= seg_page(seg
, addr
+ len
) - seg_page(seg
, addr
) + 1;
834 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
838 protv
[--pgno
] = smd
->smd_prot
;
845 segmap_getoffset(struct seg
*seg
, caddr_t addr
)
847 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
849 ASSERT(seg
->s_as
&& RW_READ_HELD(&seg
->s_as
->a_lock
));
851 return ((uoff_t
)smd
->smd_sm
->sm_off
+ (addr
- seg
->s_base
));
856 segmap_gettype(struct seg
*seg
, caddr_t addr
)
858 ASSERT(seg
->s_as
&& RW_READ_HELD(&seg
->s_as
->a_lock
));
865 segmap_getvp(struct seg
*seg
, caddr_t addr
, struct vnode
**vpp
)
867 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
869 ASSERT(seg
->s_as
&& RW_READ_HELD(&seg
->s_as
->a_lock
));
871 /* XXX - This doesn't make any sense */
872 *vpp
= smd
->smd_sm
->sm_vp
;
877 * Check to see if it makes sense to do kluster/read ahead to
878 * addr + delta relative to the mapping at addr. We assume here
879 * that delta is a signed PAGESIZE'd multiple (which can be negative).
881 * For segmap we always "approve" of this action from our standpoint.
885 segmap_kluster(struct seg
*seg
, caddr_t addr
, ssize_t delta
)
893 panic("segmap_badop");
898 * Special private segmap operations
902 * Add smap to the appropriate free list.
905 segmap_smapadd(struct smap
*smp
)
908 struct smap
*smpfreelist
;
909 struct sm_freeq
*releq
;
911 ASSERT(MUTEX_HELD(SMAPMTX(smp
)));
913 if (smp
->sm_refcnt
!= 0) {
914 panic("segmap_smapadd");
918 sm
= &smd_free
[smp
->sm_free_ndx
];
920 * Add to the tail of the release queue
921 * Note that sm_releq and sm_allocq could toggle
922 * before we get the lock. This does not affect
923 * correctness as the 2 queues are only maintained
924 * to reduce lock pressure.
926 releq
= sm
->sm_releq
;
927 if (releq
== &sm
->sm_freeq
[0])
928 smp
->sm_flags
|= SM_QNDX_ZERO
;
930 smp
->sm_flags
&= ~SM_QNDX_ZERO
;
931 mutex_enter(&releq
->smq_mtx
);
932 smpfreelist
= releq
->smq_free
;
933 if (smpfreelist
== 0) {
936 releq
->smq_free
= smp
->sm_next
= smp
->sm_prev
= smp
;
938 * Both queue mutexes held to set sm_want;
939 * snapshot the value before dropping releq mutex.
940 * If sm_want appears after the releq mutex is dropped,
941 * then the smap just freed is already gone.
944 mutex_exit(&releq
->smq_mtx
);
946 * See if there was a waiter before dropping the releq mutex
947 * then recheck after obtaining sm_freeq[0] mutex as
948 * the another thread may have already signaled.
951 mutex_enter(&sm
->sm_freeq
[0].smq_mtx
);
953 cv_signal(&sm
->sm_free_cv
);
954 mutex_exit(&sm
->sm_freeq
[0].smq_mtx
);
957 smp
->sm_next
= smpfreelist
;
958 smp
->sm_prev
= smpfreelist
->sm_prev
;
959 smpfreelist
->sm_prev
= smp
;
960 smp
->sm_prev
->sm_next
= smp
;
961 mutex_exit(&releq
->smq_mtx
);
967 segmap_hashin(struct smap
*smp
, struct vnode
*vp
, uoff_t off
, int hashid
)
973 ASSERT(MUTEX_HELD(SMAPMTX(smp
)));
974 ASSERT(smp
->sm_vp
== NULL
);
975 ASSERT(smp
->sm_hash
== NULL
);
976 ASSERT(smp
->sm_prev
== NULL
);
977 ASSERT(smp
->sm_next
== NULL
);
978 ASSERT(hashid
>= 0 && hashid
<= smd_hashmsk
);
980 hmtx
= SHASHMTX(hashid
);
984 * First we need to verify that no one has created a smp
985 * with (vp,off) as its tag before we us.
987 for (tmp
= smd_hash
[hashid
].sh_hash_list
;
988 tmp
!= NULL
; tmp
= tmp
->sm_hash
)
989 if (tmp
->sm_vp
== vp
&& tmp
->sm_off
== off
)
994 * No one created one yet.
996 * Funniness here - we don't increment the ref count on the
997 * vnode * even though we have another pointer to it here.
998 * The reason for this is that we don't want the fact that
999 * a seg_map entry somewhere refers to a vnode to prevent the
1000 * vnode * itself from going away. This is because this
1001 * reference to the vnode is a "soft one". In the case where
1002 * a mapping is being used by a rdwr [or directory routine?]
1003 * there already has to be a non-zero ref count on the vnode.
1004 * In the case where the vp has been freed and the the smap
1005 * structure is on the free list, there are no pages in memory
1006 * that can refer to the vnode. Thus even if we reuse the same
1007 * vnode/smap structure for a vnode which has the same
1008 * address but represents a different object, we are ok.
1013 hpp
= &smd_hash
[hashid
].sh_hash_list
;
1014 smp
->sm_hash
= *hpp
;
1016 #ifdef SEGMAP_HASHSTATS
1017 smd_hash_len
[hashid
]++;
1026 segmap_hashout(struct smap
*smp
)
1028 struct smap
**hpp
, *hp
;
1034 ASSERT(MUTEX_HELD(SMAPMTX(smp
)));
1039 SMAP_HASHFUNC(vp
, off
, hashid
); /* macro assigns hashid */
1040 mtx
= SHASHMTX(hashid
);
1043 hpp
= &smd_hash
[hashid
].sh_hash_list
;
1047 panic("segmap_hashout");
1055 *hpp
= smp
->sm_hash
;
1056 smp
->sm_hash
= NULL
;
1057 #ifdef SEGMAP_HASHSTATS
1058 smd_hash_len
[hashid
]--;
1068 * Attempt to free unmodified, unmapped, and non locked segmap
1072 segmap_pagefree(struct vnode
*vp
, uoff_t off
)
1077 for (pgoff
= off
; pgoff
< off
+ MAXBSIZE
; pgoff
+= PAGESIZE
) {
1079 if ((pp
= page_lookup_nowait(&vp
->v_object
, pgoff
, SE_EXCL
)) == NULL
)
1082 switch (page_release(pp
, 1)) {
1084 segmapcnt
.smp_free_notfree
.value
.ul
++;
1087 segmapcnt
.smp_free_dirty
.value
.ul
++;
1090 segmapcnt
.smp_free
.value
.ul
++;
1097 * Locks held on entry: smap lock
1098 * Locks held on exit : smap lock.
1102 grab_smp(struct smap
*smp
, page_t
*pp
)
1104 ASSERT(MUTEX_HELD(SMAPMTX(smp
)));
1105 ASSERT(smp
->sm_refcnt
== 0);
1107 if (smp
->sm_vp
!= NULL
) {
1108 struct vnode
*vp
= smp
->sm_vp
;
1109 uoff_t off
= smp
->sm_off
;
1111 * Destroy old vnode association and
1112 * unload any hardware translations to
1115 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_get_reuse
++;
1116 segmap_hashout(smp
);
1119 * This node is off freelist and hashlist,
1120 * so there is no reason to drop/reacquire sm_mtx
1121 * across calls to hat_unload.
1125 int hat_unload_needed
= 0;
1128 * unload kpm mapping
1131 vaddr
= hat_kpm_page2va(pp
, 1);
1132 hat_kpm_mapout(pp
, GET_KPME(smp
), vaddr
);
1137 * Check if we have (also) the rare case of a
1140 if (smp
->sm_flags
& SM_NOTKPM_RELEASED
) {
1141 hat_unload_needed
= 1;
1142 smp
->sm_flags
&= ~SM_NOTKPM_RELEASED
;
1145 if (hat_unload_needed
) {
1146 hat_unload(kas
.a_hat
, segkmap
->s_base
+
1147 ((smp
- smd_smap
) * MAXBSIZE
),
1148 MAXBSIZE
, HAT_UNLOAD
);
1152 ASSERT(smp
->sm_flags
& SM_NOTKPM_RELEASED
);
1153 smp
->sm_flags
&= ~SM_NOTKPM_RELEASED
;
1154 hat_unload(kas
.a_hat
, segkmap
->s_base
+
1155 ((smp
- smd_smap
) * MAXBSIZE
),
1156 MAXBSIZE
, HAT_UNLOAD
);
1158 segmap_pagefree(vp
, off
);
1162 static struct smap
*
1163 get_free_smp(int free_ndx
)
1167 struct smap
*smp
, *first
;
1168 struct sm_freeq
*allocq
, *releq
;
1171 int end_ndx
, page_locked
= 0;
1174 sm
= &smd_free
[free_ndx
];
1177 allocq
= sm
->sm_allocq
;
1178 mutex_enter(&allocq
->smq_mtx
);
1180 if ((smp
= allocq
->smq_free
) == NULL
) {
1184 * The alloc list is empty or this queue is being skipped;
1185 * first see if the allocq toggled.
1187 if (sm
->sm_allocq
!= allocq
) {
1189 mutex_exit(&allocq
->smq_mtx
);
1192 releq
= sm
->sm_releq
;
1193 if (!mutex_tryenter(&releq
->smq_mtx
)) {
1194 /* cannot get releq; a free smp may be there now */
1195 mutex_exit(&allocq
->smq_mtx
);
1198 * This loop could spin forever if this thread has
1199 * higher priority than the thread that is holding
1200 * releq->smq_mtx. In order to force the other thread
1201 * to run, we'll lock/unlock the mutex which is safe
1202 * since we just unlocked the allocq mutex.
1204 mutex_enter(&releq
->smq_mtx
);
1205 mutex_exit(&releq
->smq_mtx
);
1208 if (releq
->smq_free
== NULL
) {
1210 * This freelist is empty.
1211 * This should not happen unless clients
1212 * are failing to release the segmap
1213 * window after accessing the data.
1214 * Before resorting to sleeping, try
1215 * the next list of the same color.
1217 free_ndx
= (free_ndx
+ smd_ncolor
) & smd_freemsk
;
1218 if (free_ndx
!= end_ndx
) {
1219 mutex_exit(&releq
->smq_mtx
);
1220 mutex_exit(&allocq
->smq_mtx
);
1221 sm
= &smd_free
[free_ndx
];
1225 * Tried all freelists of the same color once,
1226 * wait on this list and hope something gets freed.
1228 segmapcnt
.smp_get_nofree
.value
.ul
++;
1230 mutex_exit(&sm
->sm_freeq
[1].smq_mtx
);
1231 cv_wait(&sm
->sm_free_cv
,
1232 &sm
->sm_freeq
[0].smq_mtx
);
1234 mutex_exit(&sm
->sm_freeq
[0].smq_mtx
);
1235 sm
= &smd_free
[free_ndx
];
1239 * Something on the rele queue; flip the alloc
1240 * and rele queues and retry.
1242 sm
->sm_allocq
= releq
;
1243 sm
->sm_releq
= allocq
;
1244 mutex_exit(&allocq
->smq_mtx
);
1245 mutex_exit(&releq
->smq_mtx
);
1254 * Fastpath the case we get the smap mutex
1259 smtx
= SMAPMTX(smp
);
1260 if (!mutex_tryenter(smtx
)) {
1262 * Another thread is trying to reclaim this slot.
1263 * Skip to the next queue or smap.
1265 if ((smp
= smp
->sm_next
) == first
) {
1272 * if kpme exists, get shared lock on the page
1274 if (segmap_kpm
&& smp
->sm_vp
!= NULL
) {
1276 kpme
= GET_KPME(smp
);
1277 pp
= kpme
->kpe_page
;
1280 if (!page_trylock(pp
, SE_SHARED
)) {
1293 if (kpme
->kpe_page
== NULL
) {
1302 * At this point, we've selected smp. Remove smp
1303 * from its freelist. If smp is the first one in
1304 * the freelist, update the head of the freelist.
1307 ASSERT(first
== allocq
->smq_free
);
1308 allocq
->smq_free
= smp
->sm_next
;
1312 * if the head of the freelist still points to smp,
1313 * then there are no more free smaps in that list.
1315 if (allocq
->smq_free
== smp
)
1319 allocq
->smq_free
= NULL
;
1321 smp
->sm_prev
->sm_next
= smp
->sm_next
;
1322 smp
->sm_next
->sm_prev
= smp
->sm_prev
;
1324 mutex_exit(&allocq
->smq_mtx
);
1325 smp
->sm_prev
= smp
->sm_next
= NULL
;
1328 * if pp != NULL, pp must have been locked;
1329 * grab_smp() unlocks pp.
1331 ASSERT((pp
== NULL
) || PAGE_LOCKED(pp
));
1333 /* return smp locked. */
1334 ASSERT(SMAPMTX(smp
) == smtx
);
1335 ASSERT(MUTEX_HELD(smtx
));
1342 * Special public segmap operations
1346 * Create pages (without using fop_getpage) and load up translations to them.
1347 * If softlock is TRUE, then set things up so that it looks like a call
1348 * to segmap_fault with F_SOFTLOCK.
1350 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
1352 * All fields in the generic segment (struct seg) are considered to be
1353 * read-only for "segmap" even though the kernel address space (kas) may
1354 * not be locked, hence no lock is needed to access them.
1357 segmap_pagecreate(struct seg
*seg
, caddr_t addr
, size_t len
, int softlock
)
1359 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
1370 ASSERT(seg
->s_as
== &kas
);
1372 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
1374 * Pages are successfully prefaulted and locked in
1375 * segmap_getmapflt and can't be unlocked until
1376 * segmap_release. The SM_KPM_NEWPAGE flag is set
1377 * in segmap_pagecreate_kpm when new pages are created.
1378 * and it is returned as "newpage" indication here.
1380 if ((smp
= get_smap_kpm(addr
, NULL
)) == NULL
) {
1381 panic("segmap_pagecreate: smap not found "
1382 "for addr %p", (void *)addr
);
1386 smtx
= SMAPMTX(smp
);
1387 newpage
= smp
->sm_flags
& SM_KPM_NEWPAGE
;
1388 smp
->sm_flags
&= ~SM_KPM_NEWPAGE
;
1394 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_pagecreate
++;
1397 addr
= (caddr_t
)((uintptr_t)addr
& (uintptr_t)PAGEMASK
);
1399 smp
= GET_SMAP(seg
, addr
);
1402 * We don't grab smp mutex here since we assume the smp
1403 * has a refcnt set already which prevents the slot from
1406 ASSERT(smp
->sm_refcnt
> 0);
1409 off
= smp
->sm_off
+ ((uoff_t
)((uintptr_t)addr
& MAXBOFFSET
));
1410 prot
= smd
->smd_prot
;
1412 for (; addr
< eaddr
; addr
+= PAGESIZE
, off
+= PAGESIZE
) {
1413 hat_flag
= HAT_LOAD
;
1414 pp
= page_lookup(&vp
->v_object
, off
, SE_SHARED
);
1418 if ((pp
= page_create_va(&vp
->v_object
, off
,
1419 PAGESIZE
, PG_WAIT
, seg
, addr
)) == NULL
) {
1420 panic("segmap_pagecreate: page_create failed");
1427 * Since pages created here do not contain valid
1428 * data until the caller writes into them, the
1429 * "exclusive" lock will not be dropped to prevent
1430 * other users from accessing the page. We also
1431 * have to lock the translation to prevent a fault
1432 * from occurring when the virtual address mapped by
1433 * this page is written into. This is necessary to
1434 * avoid a deadlock since we haven't dropped the
1437 bitindex
= (ushort_t
)((off
- smp
->sm_off
) >> PAGESHIFT
);
1440 * Large Files: The following assertion is to
1441 * verify the cast above.
1443 ASSERT((uoff_t
)(off
- smp
->sm_off
) <= INT_MAX
);
1444 smtx
= SMAPMTX(smp
);
1446 smp
->sm_bitmap
|= SMAP_BIT_MASK(bitindex
);
1449 hat_flag
= HAT_LOAD_LOCK
;
1450 } else if (softlock
) {
1451 hat_flag
= HAT_LOAD_LOCK
;
1454 if (IS_VMODSORT(pp
->p_vnode
) && (prot
& PROT_WRITE
))
1457 hat_memload(kas
.a_hat
, addr
, pp
, prot
, hat_flag
);
1459 if (hat_flag
!= HAT_LOAD_LOCK
)
1467 segmap_pageunlock(struct seg
*seg
, caddr_t addr
, size_t len
, enum seg_rw rw
)
1477 ASSERT(seg
->s_as
== &kas
);
1480 addr
= (caddr_t
)((uintptr_t)addr
& (uintptr_t)PAGEMASK
);
1482 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
1484 * Pages are successfully prefaulted and locked in
1485 * segmap_getmapflt and can't be unlocked until
1486 * segmap_release, so no pages or hat mappings have
1487 * to be unlocked at this point.
1490 if ((smp
= get_smap_kpm(addr
, NULL
)) == NULL
) {
1491 panic("segmap_pageunlock: smap not found "
1492 "for addr %p", (void *)addr
);
1496 ASSERT(smp
->sm_refcnt
> 0);
1497 mutex_exit(SMAPMTX(smp
));
1502 smp
= GET_SMAP(seg
, addr
);
1503 smtx
= SMAPMTX(smp
);
1505 ASSERT(smp
->sm_refcnt
> 0);
1508 off
= smp
->sm_off
+ ((uoff_t
)((uintptr_t)addr
& MAXBOFFSET
));
1510 for (; addr
< eaddr
; addr
+= PAGESIZE
, off
+= PAGESIZE
) {
1511 bitmask
= SMAP_BIT_MASK((int)(off
- smp
->sm_off
) >> PAGESHIFT
);
1514 * Large Files: Following assertion is to verify
1515 * the correctness of the cast to (int) above.
1517 ASSERT((uoff_t
)(off
- smp
->sm_off
) <= INT_MAX
);
1520 * If the bit corresponding to "off" is set,
1521 * clear this bit in the bitmap, unlock translations,
1522 * and release the "exclusive" lock on the page.
1524 if (smp
->sm_bitmap
& bitmask
) {
1526 smp
->sm_bitmap
&= ~bitmask
;
1529 hat_unlock(kas
.a_hat
, addr
, PAGESIZE
);
1532 * Use page_find() instead of page_lookup() to
1533 * find the page since we know that it has
1536 pp
= page_find(&vp
->v_object
, off
);
1538 panic("segmap_pageunlock: page not found");
1541 if (rw
== S_WRITE
) {
1543 } else if (rw
!= S_OTHER
) {
1553 segmap_getmap(struct seg
*seg
, struct vnode
*vp
, uoff_t off
)
1555 return (segmap_getmapflt(seg
, vp
, off
, MAXBSIZE
, 0, S_OTHER
));
1559 * This is the magic virtual address that offset 0 of an ELF
1560 * file gets mapped to in user space. This is used to pick
1561 * the vac color on the freelist.
1563 #define ELF_OFFZERO_VA (0x10000)
1565 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
1566 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
1567 * The return address is always MAXBSIZE aligned.
1569 * If forcefault is nonzero and the MMU translations haven't yet been created,
1570 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
1581 struct smap
*smp
, *nsmp
;
1582 extern struct vnode
*common_specvp();
1583 caddr_t baseaddr
; /* MAXBSIZE aligned */
1588 kmutex_t
*hashmtx
, *smapmtx
;
1594 page_t
*pl
[MAXPPB
+ 1];
1598 ASSERT(seg
->s_as
== &kas
);
1599 ASSERT(seg
== segkmap
);
1601 baseoff
= off
& (offset_t
)MAXBMASK
;
1602 if (off
+ len
> baseoff
+ MAXBSIZE
) {
1603 panic("segmap_getmap bad len");
1608 * If this is a block device we have to be sure to use the
1609 * "common" block device vnode for the mapping.
1611 if (vp
->v_type
== VBLK
)
1612 vp
= common_specvp(vp
);
1614 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_getmap
++;
1616 if (segmap_kpm
== 0 ||
1617 (forcefault
== SM_PAGECREATE
&& rw
!= S_WRITE
)) {
1621 SMAP_HASHFUNC(vp
, off
, hashid
); /* macro assigns hashid */
1622 hashmtx
= SHASHMTX(hashid
);
1625 mutex_enter(hashmtx
);
1626 for (smp
= smd_hash
[hashid
].sh_hash_list
;
1627 smp
!= NULL
; smp
= smp
->sm_hash
)
1628 if (smp
->sm_vp
== vp
&& smp
->sm_off
== baseoff
)
1630 mutex_exit(hashmtx
);
1635 ASSERT(vp
->v_count
!= 0);
1638 * Get smap lock and recheck its tag. The hash lock
1639 * is dropped since the hash is based on (vp, off)
1640 * and (vp, off) won't change when we have smap mtx.
1642 smapmtx
= SMAPMTX(smp
);
1643 mutex_enter(smapmtx
);
1644 if (smp
->sm_vp
!= vp
|| smp
->sm_off
!= baseoff
) {
1645 mutex_exit(smapmtx
);
1649 if (smp
->sm_refcnt
== 0) {
1651 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_get_reclaim
++;
1654 * Could still be on the free list. However, this
1655 * could also be an smp that is transitioning from
1656 * the free list when we have too much contention
1657 * for the smapmtx's. In this case, we have an
1658 * unlocked smp that is not on the free list any
1659 * longer, but still has a 0 refcnt. The only way
1660 * to be sure is to check the freelist pointers.
1661 * Since we now have the smapmtx, we are guaranteed
1662 * that the (vp, off) won't change, so we are safe
1663 * to reclaim it. get_free_smp() knows that this
1664 * can happen, and it will check the refcnt.
1667 if ((smp
->sm_next
!= NULL
)) {
1668 struct sm_freeq
*freeq
;
1670 ASSERT(smp
->sm_prev
!= NULL
);
1671 sm
= &smd_free
[smp
->sm_free_ndx
];
1673 if (smp
->sm_flags
& SM_QNDX_ZERO
)
1674 freeq
= &sm
->sm_freeq
[0];
1676 freeq
= &sm
->sm_freeq
[1];
1678 mutex_enter(&freeq
->smq_mtx
);
1679 if (freeq
->smq_free
!= smp
) {
1681 * fastpath normal case
1683 smp
->sm_prev
->sm_next
= smp
->sm_next
;
1684 smp
->sm_next
->sm_prev
= smp
->sm_prev
;
1685 } else if (smp
== smp
->sm_next
) {
1687 * Taking the last smap on freelist
1689 freeq
->smq_free
= NULL
;
1692 * Reclaiming 1st smap on list
1694 freeq
->smq_free
= smp
->sm_next
;
1695 smp
->sm_prev
->sm_next
= smp
->sm_next
;
1696 smp
->sm_next
->sm_prev
= smp
->sm_prev
;
1698 mutex_exit(&freeq
->smq_mtx
);
1699 smp
->sm_prev
= smp
->sm_next
= NULL
;
1701 ASSERT(smp
->sm_prev
== NULL
);
1702 segmapcnt
.smp_stolen
.value
.ul
++;
1706 segmapcnt
.smp_get_use
.value
.ul
++;
1708 smp
->sm_refcnt
++; /* another user */
1711 * We don't invoke segmap_fault via TLB miss, so we set ref
1712 * and mod bits in advance. For S_OTHER we set them in
1713 * segmap_fault F_SOFTUNLOCK.
1716 if (rw
== S_WRITE
) {
1717 smp
->sm_flags
|= SM_WRITE_DATA
;
1718 } else if (rw
== S_READ
) {
1719 smp
->sm_flags
|= SM_READ_DATA
;
1722 mutex_exit(smapmtx
);
1727 uint32_t free_ndx
, *free_ndxp
;
1728 union segmap_cpu
*scpu
;
1731 * On a PAC machine or a machine with anti-alias
1732 * hardware, smd_colormsk will be zero.
1734 * On a VAC machine- pick color by offset in the file
1735 * so we won't get VAC conflicts on elf files.
1736 * On data files, color does not matter but we
1737 * don't know what kind of file it is so we always
1738 * pick color by offset. This causes color
1739 * corresponding to file offset zero to be used more
1742 color
= (baseoff
>> MAXBSHIFT
) & smd_colormsk
;
1743 scpu
= smd_cpu
+CPU
->cpu_seqid
;
1744 free_ndxp
= &scpu
->scpu
.scpu_free_ndx
[color
];
1745 free_ndx
= (*free_ndxp
+= smd_ncolor
) & smd_freemsk
;
1747 colors_used
[free_ndx
]++;
1751 * Get a locked smp slot from the free list.
1753 smp
= get_free_smp(free_ndx
);
1754 smapmtx
= SMAPMTX(smp
);
1756 ASSERT(smp
->sm_vp
== NULL
);
1758 if ((nsmp
= segmap_hashin(smp
, vp
, baseoff
, hashid
)) != NULL
) {
1760 * Failed to hashin, there exists one now.
1761 * Return the smp we just allocated.
1763 segmap_smapadd(smp
);
1764 mutex_exit(smapmtx
);
1769 smp
->sm_refcnt
++; /* another user */
1772 * We don't invoke segmap_fault via TLB miss, so we set ref
1773 * and mod bits in advance. For S_OTHER we set them in
1774 * segmap_fault F_SOFTUNLOCK.
1777 if (rw
== S_WRITE
) {
1778 smp
->sm_flags
|= SM_WRITE_DATA
;
1779 } else if (rw
== S_READ
) {
1780 smp
->sm_flags
|= SM_READ_DATA
;
1783 mutex_exit(smapmtx
);
1789 goto use_segmap_range
;
1794 /* Lint directive required until 6746211 is fixed */
1796 ASSERT(PAGESIZE
== MAXBSIZE
);
1799 * remember the last smp faulted on this cpu.
1801 (smd_cpu
+CPU
->cpu_seqid
)->scpu
.scpu_last_smap
= smp
;
1803 if (forcefault
== SM_PAGECREATE
) {
1804 baseaddr
= segmap_pagecreate_kpm(seg
, vp
, baseoff
, smp
, rw
);
1809 (pp
= GET_KPME(smp
)->kpe_page
) != NULL
) {
1815 if (page_trylock(pp
, SE_SHARED
)) {
1816 if (PP_ISFREE(pp
) ||
1817 !(pp
->p_vnode
== vp
&&
1818 pp
->p_offset
== baseoff
)) {
1820 pp
= page_lookup(&vp
->v_object
,
1821 baseoff
, SE_SHARED
);
1824 pp
= page_lookup(&vp
->v_object
, baseoff
,
1829 ASSERT(GET_KPME(smp
)->kpe_page
== NULL
);
1833 if (rw
== S_WRITE
&&
1834 hat_page_getattr(pp
, P_MOD
| P_REF
) !=
1841 * We have the p_selock as reader, grab_smp
1842 * can't hit us, we have bumped the smap
1843 * refcnt and hat_pageunload needs the
1844 * p_selock exclusive.
1846 kpme
= GET_KPME(smp
);
1847 if (kpme
->kpe_page
== pp
) {
1848 baseaddr
= hat_kpm_page2va(pp
, 0);
1849 } else if (kpme
->kpe_page
== NULL
) {
1850 baseaddr
= hat_kpm_mapin(pp
, kpme
);
1852 panic("segmap_getmapflt: stale "
1853 "kpme page, kpme %p", (void *)kpme
);
1858 * We don't invoke segmap_fault via TLB miss,
1859 * so we set ref and mod bits in advance.
1860 * For S_OTHER and we set them in segmap_fault
1863 if (rw
== S_READ
&& !hat_isref(pp
))
1872 base
= segkpm_create_va(baseoff
);
1873 error
= fop_getpage(vp
, (offset_t
)baseoff
, len
, &prot
, pl
, MAXBSIZE
,
1874 seg
, base
, rw
, CRED(), NULL
);
1877 if (error
|| pp
== NULL
) {
1879 * Use segmap address slot and let segmap_fault deal
1880 * with the error cases. There is no error return
1883 goto use_segmap_range
;
1886 ASSERT(pl
[1] == NULL
);
1889 * When prot is not returned w/ PROT_ALL the returned pages
1890 * are not backed by fs blocks. For most of the segmap users
1891 * this is no problem, they don't write to the pages in the
1892 * same request and therefore don't rely on a following
1893 * trap driven segmap_fault. With SM_LOCKPROTO users it
1894 * is more secure to use segkmap adresses to allow
1895 * protection segmap_fault's.
1897 if (prot
!= PROT_ALL
&& forcefault
== SM_LOCKPROTO
) {
1899 * Use segmap address slot and let segmap_fault
1900 * do the error return.
1902 ASSERT(rw
!= S_WRITE
);
1903 ASSERT(PAGE_LOCKED(pp
));
1906 goto use_segmap_range
;
1910 * We have the p_selock as reader, grab_smp can't hit us, we
1911 * have bumped the smap refcnt and hat_pageunload needs the
1912 * p_selock exclusive.
1914 kpme
= GET_KPME(smp
);
1915 if (kpme
->kpe_page
== pp
) {
1916 baseaddr
= hat_kpm_page2va(pp
, 0);
1917 } else if (kpme
->kpe_page
== NULL
) {
1918 baseaddr
= hat_kpm_mapin(pp
, kpme
);
1920 panic("segmap_getmapflt: stale kpme page after "
1921 "fop_getpage, kpme %p", (void *)kpme
);
1925 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_fault
++;
1931 baseaddr
= seg
->s_base
+ ((smp
- smd_smap
) * MAXBSIZE
);
1934 * Prefault the translations
1936 vaddr
= baseaddr
+ (off
- baseoff
);
1937 if (forcefault
&& (newslot
|| !hat_probe(kas
.a_hat
, vaddr
))) {
1939 caddr_t pgaddr
= (caddr_t
)((uintptr_t)vaddr
&
1940 (uintptr_t)PAGEMASK
);
1942 (void) segmap_fault(kas
.a_hat
, seg
, pgaddr
,
1943 (vaddr
+ len
- pgaddr
+ PAGESIZE
- 1) & (uintptr_t)PAGEMASK
,
1951 segmap_release(struct seg
*seg
, caddr_t addr
, uint_t flags
)
1962 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
1964 if (((uintptr_t)addr
& MAXBOFFSET
) != 0) {
1965 panic("segmap_release: addr %p not "
1966 "MAXBSIZE aligned", (void *)addr
);
1970 if ((smp
= get_smap_kpm(addr
, &pp
)) == NULL
) {
1971 panic("segmap_release: smap not found "
1972 "for addr %p", (void *)addr
);
1976 smtx
= SMAPMTX(smp
);
1979 * For compatibility reasons segmap_pagecreate_kpm sets this
1980 * flag to allow a following segmap_pagecreate to return
1981 * this as "newpage" flag. When segmap_pagecreate is not
1982 * called at all we clear it now.
1984 smp
->sm_flags
&= ~SM_KPM_NEWPAGE
;
1986 if (smp
->sm_flags
& SM_WRITE_DATA
) {
1988 } else if (smp
->sm_flags
& SM_READ_DATA
) {
1992 if (addr
< seg
->s_base
|| addr
>= seg
->s_base
+ seg
->s_size
||
1993 ((uintptr_t)addr
& MAXBOFFSET
) != 0) {
1994 panic("segmap_release: bad addr %p", (void *)addr
);
1997 smp
= GET_SMAP(seg
, addr
);
1999 smtx
= SMAPMTX(smp
);
2001 smp
->sm_flags
|= SM_NOTKPM_RELEASED
;
2004 ASSERT(smp
->sm_refcnt
> 0);
2007 * Need to call fop_putpage() if any flags (except SM_DONTNEED)
2010 if ((flags
& ~SM_DONTNEED
) != 0) {
2011 if (flags
& SM_WRITE
)
2012 segmapcnt
.smp_rel_write
.value
.ul
++;
2013 if (flags
& SM_ASYNC
) {
2015 segmapcnt
.smp_rel_async
.value
.ul
++;
2017 if (flags
& SM_INVAL
) {
2019 segmapcnt
.smp_rel_abort
.value
.ul
++;
2021 if (flags
& SM_DESTROY
) {
2022 bflags
|= (B_INVAL
|B_TRUNC
);
2023 segmapcnt
.smp_rel_abort
.value
.ul
++;
2025 if (smp
->sm_refcnt
== 1) {
2027 * We only bother doing the FREE and DONTNEED flags
2028 * if no one else is still referencing this mapping.
2030 if (flags
& SM_FREE
) {
2032 segmapcnt
.smp_rel_free
.value
.ul
++;
2034 if (flags
& SM_DONTNEED
) {
2035 bflags
|= B_DONTNEED
;
2036 segmapcnt
.smp_rel_dontneed
.value
.ul
++;
2040 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_release
++;
2044 offset
= smp
->sm_off
;
2046 if (--smp
->sm_refcnt
== 0) {
2048 smp
->sm_flags
&= ~(SM_WRITE_DATA
| SM_READ_DATA
);
2050 if (flags
& (SM_INVAL
|SM_DESTROY
)) {
2051 segmap_hashout(smp
); /* remove map info */
2053 hat_kpm_mapout(pp
, GET_KPME(smp
), addr
);
2054 if (smp
->sm_flags
& SM_NOTKPM_RELEASED
) {
2055 smp
->sm_flags
&= ~SM_NOTKPM_RELEASED
;
2056 hat_unload(kas
.a_hat
, segkmap
->s_base
+
2057 ((smp
- smd_smap
) * MAXBSIZE
),
2058 MAXBSIZE
, HAT_UNLOAD
);
2063 segkpm_mapout_validkpme(GET_KPME(smp
));
2065 smp
->sm_flags
&= ~SM_NOTKPM_RELEASED
;
2066 hat_unload(kas
.a_hat
, addr
, MAXBSIZE
,
2070 segmap_smapadd(smp
); /* add to free list */
2078 * Now invoke fop_putpage() if any flags (except SM_DONTNEED)
2081 if ((flags
& ~SM_DONTNEED
) != 0) {
2082 error
= fop_putpage(vp
, offset
, MAXBSIZE
,
2083 bflags
, CRED(), NULL
);
2092 * Dump the pages belonging to this segmap segment.
2095 segmap_dump(struct seg
*seg
)
2097 struct segmap_data
*smd
;
2098 struct smap
*smp
, *smp_end
;
2104 smd
= (struct segmap_data
*)seg
->s_data
;
2106 for (smp
= smd
->smd_sm
, smp_end
= smp
+ smd
->smd_npages
;
2107 smp
< smp_end
; smp
++) {
2109 if (smp
->sm_refcnt
) {
2110 for (off
= 0; off
< MAXBSIZE
; off
+= PAGESIZE
) {
2114 * If pp == NULL, the page either does
2115 * not exist or is exclusively locked.
2116 * So determine if it exists before
2119 if ((pp
= page_lookup_nowait(&smp
->sm_vp
->v_object
,
2124 pp
= page_exists(&smp
->sm_vp
->v_object
,
2128 pfn
= page_pptonum(pp
);
2129 dump_addpage(seg
->s_as
,
2134 dump_timeleft
= dump_timeout
;
2143 segmap_pagelock(struct seg
*seg
, caddr_t addr
, size_t len
,
2144 struct page
***ppp
, enum lock_type type
, enum seg_rw rw
)
2150 segmap_getmemid(struct seg
*seg
, caddr_t addr
, memid_t
*memidp
)
2152 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
2154 memidp
->val
[0] = (uintptr_t)smd
->smd_sm
->sm_vp
;
2155 memidp
->val
[1] = smd
->smd_sm
->sm_off
+ (uintptr_t)(addr
- seg
->s_base
);
2160 #ifdef SEGKPM_SUPPORT
2163 * segkpm support routines
2167 segmap_pagecreate_kpm(struct seg
*seg
, vnode_t
*vp
, uoff_t off
,
2168 struct smap
*smp
, enum seg_rw rw
)
2175 ASSERT(smp
->sm_refcnt
> 0);
2177 if ((pp
= page_lookup(&vp
->v_object
, off
, SE_SHARED
)) == NULL
) {
2180 base
= segkpm_create_va(off
);
2182 if ((pp
= page_create_va(&vp
->v_object
, off
, PAGESIZE
, PG_WAIT
,
2183 seg
, base
)) == NULL
) {
2184 panic("segmap_pagecreate_kpm: "
2185 "page_create failed");
2191 ASSERT((uoff_t
)(off
- smp
->sm_off
) <= INT_MAX
);
2194 * Mark this here until the following segmap_pagecreate
2195 * or segmap_release.
2197 smtx
= SMAPMTX(smp
);
2199 smp
->sm_flags
|= SM_KPM_NEWPAGE
;
2203 kpme
= GET_KPME(smp
);
2204 if (!newpage
&& kpme
->kpe_page
== pp
)
2205 base
= hat_kpm_page2va(pp
, 0);
2207 base
= hat_kpm_mapin(pp
, kpme
);
2210 * FS code may decide not to call segmap_pagecreate and we
2211 * don't invoke segmap_fault via TLB miss, so we have to set
2212 * ref and mod bits in advance.
2214 if (rw
== S_WRITE
) {
2217 ASSERT(rw
== S_READ
);
2221 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_pagecreate
++;
2227 * Find the smap structure corresponding to the
2228 * KPM addr and return it locked.
2231 get_smap_kpm(caddr_t addr
, page_t
**ppp
)
2236 caddr_t baseaddr
= (caddr_t
)((uintptr_t)addr
& MAXBMASK
);
2240 union segmap_cpu
*scpu
;
2242 pp
= hat_kpm_vaddr2page(baseaddr
);
2244 ASSERT(pp
&& !PP_ISFREE(pp
));
2245 ASSERT(PAGE_LOCKED(pp
));
2246 ASSERT(((uintptr_t)pp
->p_offset
& MAXBOFFSET
) == 0);
2249 offset
= pp
->p_offset
;
2253 * Assume the last smap used on this cpu is the one needed.
2255 scpu
= smd_cpu
+CPU
->cpu_seqid
;
2256 smp
= scpu
->scpu
.scpu_last_smap
;
2257 mutex_enter(&smp
->sm_mtx
);
2258 if (smp
->sm_vp
== vp
&& smp
->sm_off
== offset
) {
2259 ASSERT(smp
->sm_refcnt
> 0);
2262 * Assumption wrong, find the smap on the hash chain.
2264 mutex_exit(&smp
->sm_mtx
);
2265 SMAP_HASHFUNC(vp
, offset
, hashid
); /* macro assigns hashid */
2266 hashmtx
= SHASHMTX(hashid
);
2268 mutex_enter(hashmtx
);
2269 smp
= smd_hash
[hashid
].sh_hash_list
;
2270 for (; smp
!= NULL
; smp
= smp
->sm_hash
) {
2271 if (smp
->sm_vp
== vp
&& smp
->sm_off
== offset
)
2274 mutex_exit(hashmtx
);
2276 mutex_enter(&smp
->sm_mtx
);
2277 ASSERT(smp
->sm_vp
== vp
&& smp
->sm_off
== offset
);
2282 *ppp
= smp
? pp
: NULL
;
2287 #else /* SEGKPM_SUPPORT */
2293 segmap_pagecreate_kpm(struct seg
*seg
, vnode_t
*vp
, uoff_t off
,
2294 struct smap
*smp
, enum seg_rw rw
)
2301 get_smap_kpm(caddr_t addr
, page_t
**ppp
)
2306 #endif /* SEGKPM_SUPPORT */