4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/archsystm.h>
29 #include <sys/machsystm.h>
30 #include <sys/t_lock.h>
35 #include <sys/cmn_err.h>
36 #include <sys/cpuvar.h>
37 #include <sys/atomic.h>
43 #include <vm/seg_kmem.h>
44 #include <vm/seg_kpm.h>
45 #include <vm/hat_sfmmu.h>
46 #include <sys/debug.h>
47 #include <sys/cpu_module.h>
48 #include <sys/mem_cage.h>
51 * A quick way to generate a cache consistent address to map in a page.
52 * users: ppcopy, pagezero, /proc, dev/mem
54 * The ppmapin/ppmapout routines provide a quick way of generating a cache
55 * consistent address by reserving a given amount of kernel address space.
56 * The base is PPMAPBASE and its size is PPMAPSIZE. This memory is divided
57 * into x number of sets, where x is the number of colors for the virtual
58 * cache. The number of colors is how many times a page can be mapped
59 * simulatenously in the cache. For direct map caches this translates to
60 * the number of pages in the cache.
61 * Each set will be assigned a group of virtual pages from the reserved memory
62 * depending on its virtual color.
63 * When trying to assign a virtual address we will find out the color for the
64 * physical page in question (if applicable). Then we will try to find an
65 * available virtual page from the set of the appropiate color.
68 #define clsettoarray(color, set) ((color * nsets) + set)
70 int pp_slots
= 4; /* small default, tuned by cpu module */
72 /* tuned by cpu module, default is "safe" */
73 int pp_consistent_coloring
= PPAGE_STORES_POLLUTE
| PPAGE_LOADS_POLLUTE
;
75 static caddr_t ppmap_vaddrs
[PPMAPSIZE
/ MMU_PAGESIZE
];
76 static int nsets
; /* number of sets */
77 static int ppmap_pages
; /* generate align mask */
78 static int ppmap_shift
; /* set selector */
81 #define MAXCOLORS 16 /* for debug only */
82 static int ppalloc_noslot
= 0; /* # of allocations from kernelmap */
83 static int align_hits
[MAXCOLORS
];
84 static int pp_allocs
; /* # of ppmapin requests */
88 * There are only 64 TLB entries on spitfire, 16 on cheetah
89 * (fully-associative TLB) so we allow the cpu module to tune the
90 * number to use here via pp_slots.
92 static struct ppmap_va
{
93 caddr_t ppmap_slots
[MAXPP_SLOTS
];
99 int color
, nset
, setsize
;
102 ASSERT(pp_slots
<= MAXPP_SLOTS
);
104 va
= (caddr_t
)PPMAPBASE
;
105 if (cache
& CACHE_VAC
) {
108 ppmap_pages
= mmu_btop(shm_alignment
);
109 nsets
= PPMAPSIZE
/ shm_alignment
;
110 setsize
= shm_alignment
;
111 ppmap_shift
= MMU_PAGESHIFT
;
117 * If we do not have a virtual indexed cache we simply
118 * have only one set containing all pages.
121 nsets
= mmu_btop(PPMAPSIZE
);
122 setsize
= MMU_PAGESIZE
;
123 ppmap_shift
= MMU_PAGESHIFT
;
125 for (color
= 0; color
< ppmap_pages
; color
++) {
126 for (nset
= 0; nset
< nsets
; nset
++) {
127 ppmap_vaddrs
[clsettoarray(color
, nset
)] =
128 (caddr_t
)((uintptr_t)va
+ (nset
* setsize
));
135 * Allocate a cache consistent virtual address to map a page, pp,
136 * with protection, vprot; and map it in the MMU, using the most
137 * efficient means possible. The argument avoid is a virtual address
138 * hint which when masked yields an offset into a virtual cache
139 * that should be avoided when allocating an address to map in a
140 * page. An avoid arg of -1 means you don't care, for instance pagezero.
142 * machine dependent, depends on virtual address space layout,
143 * understands that all kernel addresses have bit 31 set.
145 * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
146 * that found in other architectures. In other architectures the hint
147 * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
148 * This was used to avoid virtual cache trashing in the bcopy. Unfortunately
149 * in the case of a COW, this later on caused a cache aliasing conflict. In
150 * sun4, the bcopy routine uses the block ld/st instructions so we don't have
151 * to worry about virtual cache trashing. Actually, by using the hint to choose
152 * the right color we can almost guarantee a cache conflict will not occur.
156 ppmapin(page_t
*pp
, uint_t vprot
, caddr_t hint
)
158 int color
, nset
, index
, start
;
164 if (cache
& CACHE_VAC
) {
165 color
= sfmmu_get_ppvcolor(pp
);
167 if ((intptr_t)hint
!= -1L) {
168 color
= addr_to_vcolor(hint
);
170 color
= addr_to_vcolor(mmu_ptob(pp
->p_pagenum
));
176 * For physical caches, we can pick any address we want.
183 for (nset
= 0; nset
< nsets
; nset
++) {
184 index
= clsettoarray(color
, nset
);
185 va
= ppmap_vaddrs
[index
];
190 if (atomic_cas_ptr(&ppmap_vaddrs
[index
],
192 hat_memload(kas
.a_hat
, va
, pp
,
200 * first pick didn't succeed, try another
202 if (++color
== ppmap_pages
)
204 } while (color
!= start
);
211 * No free slots; get a random one from the kernel heap area.
213 va
= vmem_alloc(heap_arena
, PAGESIZE
, VM_SLEEP
);
215 hat_memload(kas
.a_hat
, va
, pp
, vprot
| HAT_NOSYNC
, HAT_LOAD_LOCK
);
224 int color
, nset
, index
;
226 if (va
>= kernelheap
&& va
< ekernelheap
) {
228 * Space came from kernelmap, flush the page and
231 hat_unload(kas
.a_hat
, va
, PAGESIZE
,
232 (HAT_UNLOAD_NOSYNC
| HAT_UNLOAD_UNLOCK
));
233 vmem_free(heap_arena
, va
, PAGESIZE
);
236 * Space came from ppmap_vaddrs[], give it back.
238 color
= addr_to_vcolor(va
);
239 ASSERT((cache
& CACHE_VAC
)? (color
< ppmap_pages
) : 1);
241 nset
= ((uintptr_t)va
>> ppmap_shift
) & (nsets
- 1);
242 index
= clsettoarray(color
, nset
);
243 hat_unload(kas
.a_hat
, va
, PAGESIZE
,
244 (HAT_UNLOAD_NOSYNC
| HAT_UNLOAD_UNLOCK
));
246 ASSERT(ppmap_vaddrs
[index
] == NULL
);
247 ppmap_vaddrs
[index
] = va
;
252 #define PP_STAT_ADD(stat) (stat)++
253 uint_t pload
, ploadfail
;
254 uint_t ppzero
, ppzero_short
;
256 #define PP_STAT_ADD(stat)
260 * Find a slot in per CPU page copy area. Load up a locked TLB in the
261 * running cpu. We don't call hat layer to load up the tte since the
262 * mapping is only temporary. If the thread migrates it'll get a TLB
263 * miss trap and TLB/TSB miss handler will panic since there is no
264 * official hat record of this mapping.
267 pp_load_tlb(processorid_t cpu
, caddr_t
**pslot
, page_t
*pp
, uint_t prot
)
269 struct ppmap_va
*ppmap
;
273 long i
, start
, stride
;
275 uint_t flags
, strict_flag
;
279 ppmap
= &ppmap_va
[cpu
];
280 va
= (caddr_t
)(PPMAP_FAST_BASE
+ (MMU_PAGESIZE
* MAXPP_SLOTS
) * cpu
);
281 myslot
= ppmap
->ppmap_slots
;
282 ASSERT(addr_to_vcolor(va
) == 0);
284 if (prot
& TTE_HWWR_INT
) {
285 flags
= PPAGE_STORE_VCOLORING
| PPAGE_STORES_POLLUTE
;
286 strict_flag
= PPAGE_STORES_POLLUTE
;
288 flags
= PPAGE_LOAD_VCOLORING
| PPAGE_LOADS_POLLUTE
;
289 strict_flag
= PPAGE_LOADS_POLLUTE
;
293 * If consistent handling is required then keep the current
294 * vcolor of the page. Furthermore, if loads or stores can
295 * pollute the VAC then using a "new" page (unassigned vcolor)
296 * won't work and we have to return a failure.
298 if (pp_consistent_coloring
& flags
) {
299 vcolor
= sfmmu_get_ppvcolor(pp
);
300 if ((vcolor
== -1) &&
301 (pp_consistent_coloring
& strict_flag
))
303 /* else keep the current vcolor of the page */
309 va
+= MMU_PAGESIZE
* vcolor
;
311 stride
= ppmap_pages
; /* number of colors */
318 for (i
= start
; i
< pp_slots
; i
+= stride
) {
319 if (*myslot
== NULL
) {
320 if (atomic_cas_ptr(myslot
, NULL
, va
) == NULL
)
324 va
+= MMU_PAGESIZE
* stride
;
328 PP_STAT_ADD(ploadfail
);
332 ASSERT(vcolor
== -1 || addr_to_vcolor(va
) == vcolor
);
335 * Now we have a slot we can use, make the tte.
337 tte
.tte_inthi
= TTE_VALID_INT
| TTE_PFN_INTHI(pp
->p_pagenum
);
338 tte
.tte_intlo
= TTE_PFN_INTLO(pp
->p_pagenum
) | TTE_CP_INT
|
339 TTE_CV_INT
| TTE_PRIV_INT
| TTE_LCK_INT
| prot
;
341 ASSERT(CPU
->cpu_id
== cpu
);
342 sfmmu_dtlb_ld_kva(va
, &tte
);
344 *pslot
= myslot
; /* Return ptr to the slot we used. */
350 pp_unload_tlb(caddr_t
*pslot
, caddr_t va
)
352 ASSERT(*pslot
== va
);
354 vtag_flushpage(va
, (uint64_t)ksfmmup
);
355 *pslot
= NULL
; /* release the slot */
359 * Common copy routine which attempts to use hwblkpagecopy. If this routine
360 * can't be used, failure (0) will be returned. Otherwise, a PAGESIZE page
361 * will be copied and success (1) will be returned.
364 ppcopy_common(page_t
*fm_pp
, page_t
*to_pp
)
366 caddr_t fm_va
, to_va
;
367 caddr_t
*fm_slot
, *to_slot
;
372 ASSERT(fm_pp
!= NULL
&& PAGE_LOCKED(fm_pp
));
373 ASSERT(to_pp
!= NULL
&& PAGE_LOCKED(to_pp
));
376 * If we can't use VIS block loads and stores we can't use
377 * pp_load_tlb/pp_unload_tlb due to the possibility of
380 if (!use_hw_bcopy
&& (cache
& CACHE_VAC
))
385 fm_va
= pp_load_tlb(cpu
, &fm_slot
, fm_pp
, 0);
390 to_va
= pp_load_tlb(cpu
, &to_slot
, to_pp
, TTE_HWWR_INT
);
392 pp_unload_tlb(fm_slot
, fm_va
);
396 if (on_fault(&ljb
)) {
400 hwblkpagecopy(fm_va
, to_va
);
403 ASSERT(CPU
->cpu_id
== cpu
);
404 pp_unload_tlb(fm_slot
, fm_va
);
405 pp_unload_tlb(to_slot
, to_va
);
411 * Routine to copy kernel pages during relocation. It will copy one
412 * PAGESIZE page to another PAGESIZE page. This function may be called
413 * above LOCK_LEVEL so it should not grab any locks.
416 ppcopy_kernel__relocatable(page_t
*fm_pp
, page_t
*to_pp
)
418 uint64_t fm_pa
, to_pa
;
421 fm_pa
= (uint64_t)(fm_pp
->p_pagenum
) << MMU_PAGESHIFT
;
422 to_pa
= (uint64_t)(to_pp
->p_pagenum
) << MMU_PAGESHIFT
;
424 nbytes
= MMU_PAGESIZE
;
426 for (; nbytes
> 0; fm_pa
+= 32, to_pa
+= 32, nbytes
-= 32)
427 hw_pa_bcopy32(fm_pa
, to_pa
);
431 * Copy the data from the physical page represented by "frompp" to
432 * that represented by "topp".
434 * Try to use per cpu mapping first, if that fails then call pp_mapin
437 * Returns one on success or zero on some sort of fault while doing the copy.
440 ppcopy(page_t
*fm_pp
, page_t
*to_pp
)
442 caddr_t fm_va
, to_va
;
445 boolean_t use_kpm
= B_FALSE
;
447 /* Try the fast path first */
448 if (ppcopy_common(fm_pp
, to_pp
))
452 * Try to map using KPM if enabled and we are the cageout thread.
453 * If it fails, fall back to ppmapin/ppmaput
457 if (curthread
== kcage_cageout_thread
)
462 if ((fm_va
= hat_kpm_mapin(fm_pp
, NULL
)) == NULL
||
463 (to_va
= hat_kpm_mapin(to_pp
, NULL
)) == NULL
) {
465 hat_kpm_mapout(fm_pp
, NULL
, fm_va
);
470 if (use_kpm
== B_FALSE
) {
471 /* do the slow path */
472 fm_va
= ppmapin(fm_pp
, PROT_READ
, (caddr_t
)-1);
473 to_va
= ppmapin(to_pp
, PROT_READ
| PROT_WRITE
, fm_va
);
474 if (on_fault(&ljb
)) {
479 bcopy(fm_va
, to_va
, PAGESIZE
);
483 if (use_kpm
== B_TRUE
) {
484 hat_kpm_mapout(fm_pp
, NULL
, fm_va
);
485 hat_kpm_mapout(to_pp
, NULL
, to_va
);
494 * Zero the physical page from off to off + len given by `pp'
495 * without changing the reference and modified bits of page.
497 * Again, we'll try per cpu mapping first.
500 pagezero(page_t
*pp
, uint_t off
, uint_t len
)
506 extern int hwblkclr(void *, size_t);
507 extern int use_hw_bzero
;
509 ASSERT((int)len
> 0 && (int)off
>= 0 && off
+ len
<= PAGESIZE
);
510 ASSERT(PAGE_LOCKED(pp
));
514 if (len
!= MMU_PAGESIZE
|| !use_hw_bzero
) {
516 * Since the fast path doesn't do anything about
517 * VAC coloring, we make sure bcopy h/w will be used.
521 PP_STAT_ADD(ppzero_short
);
528 va
= pp_load_tlb(cpu
, &slot
, pp
, TTE_HWWR_INT
);
533 * We are here either length != MMU_PAGESIZE or pp_load_tlb()
534 * returns NULL or use_hw_bzero is disabled.
536 va
= ppmapin(pp
, PROT_READ
| PROT_WRITE
, (caddr_t
)-1);
540 if (hwblkclr(va
+ off
, len
)) {
542 * We may not have used block commit asi.
543 * So flush the I-$ manually
548 sync_icache(va
+ off
, len
);
551 * We have used blk commit, and flushed the I-$. However we
552 * still may have an instruction in the pipeline. Only a flush
553 * instruction will invalidate that.
559 ASSERT(CPU
->cpu_id
== cpu
);
560 pp_unload_tlb(slot
, va
);