3 * Copyright (C) 2001 Todd Inglett, IBM Corporation
5 * pSeries LPAR support.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 /* Enables debugging of low-level hash table routines - careful! */
25 #include <linux/kernel.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/console.h>
28 #include <linux/export.h>
29 #include <asm/processor.h>
32 #include <asm/pgtable.h>
33 #include <asm/machdep.h>
34 #include <asm/abs_addr.h>
35 #include <asm/mmu_context.h>
36 #include <asm/iommu.h>
37 #include <asm/tlbflush.h>
40 #include <asm/cputable.h>
43 #include <asm/trace.h>
44 #include <asm/firmware.h>
46 #include "plpar_wrappers.h"
51 EXPORT_SYMBOL(plpar_hcall
);
52 EXPORT_SYMBOL(plpar_hcall9
);
53 EXPORT_SYMBOL(plpar_hcall_norets
);
55 extern void pSeries_find_serial_port(void);
57 void vpa_init(int cpu
)
59 int hwcpu
= get_hard_smp_processor_id(cpu
);
62 struct paca_struct
*pp
;
63 struct dtl_entry
*dtl
;
65 if (cpu_has_feature(CPU_FTR_ALTIVEC
))
66 lppaca_of(cpu
).vmxregs_in_use
= 1;
68 addr
= __pa(&lppaca_of(cpu
));
69 ret
= register_vpa(hwcpu
, addr
);
72 pr_err("WARNING: VPA registration for cpu %d (hw %d) of area "
73 "%lx failed with %ld\n", cpu
, hwcpu
, addr
, ret
);
77 * PAPR says this feature is SLB-Buffer but firmware never
78 * reports that. All SPLPAR support SLB shadow buffer.
80 addr
= __pa(&slb_shadow
[cpu
]);
81 if (firmware_has_feature(FW_FEATURE_SPLPAR
)) {
82 ret
= register_slb_shadow(hwcpu
, addr
);
84 pr_err("WARNING: SLB shadow buffer registration for "
85 "cpu %d (hw %d) of area %lx failed with %ld\n",
86 cpu
, hwcpu
, addr
, ret
);
90 * Register dispatch trace log, if one has been allocated.
93 dtl
= pp
->dispatch_log
;
97 lppaca_of(cpu
).dtl_idx
= 0;
99 /* hypervisor reads buffer length from this field */
100 dtl
->enqueue_to_dispatch_time
= DISPATCH_LOG_BYTES
;
101 ret
= register_dtl(hwcpu
, __pa(dtl
));
103 pr_err("WARNING: DTL registration of cpu %d (hw %d) "
104 "failed with %ld\n", smp_processor_id(),
106 lppaca_of(cpu
).dtl_enable_mask
= 2;
110 static long pSeries_lpar_hpte_insert(unsigned long hpte_group
,
111 unsigned long va
, unsigned long pa
,
112 unsigned long rflags
, unsigned long vflags
,
113 int psize
, int ssize
)
115 unsigned long lpar_rc
;
118 unsigned long hpte_v
, hpte_r
;
120 if (!(vflags
& HPTE_V_BOLTED
))
121 pr_devel("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
122 "rflags=%lx, vflags=%lx, psize=%d)\n",
123 hpte_group
, va
, pa
, rflags
, vflags
, psize
);
125 hpte_v
= hpte_encode_v(va
, psize
, ssize
) | vflags
| HPTE_V_VALID
;
126 hpte_r
= hpte_encode_r(pa
, psize
) | rflags
;
128 if (!(vflags
& HPTE_V_BOLTED
))
129 pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v
, hpte_r
);
131 /* Now fill in the actual HPTE */
132 /* Set CEC cookie to 0 */
134 /* I-cache Invalidate = 0 */
135 /* I-cache synchronize = 0 */
139 /* Make pHyp happy */
140 if ((rflags
& _PAGE_NO_CACHE
) & !(rflags
& _PAGE_WRITETHRU
))
141 hpte_r
&= ~_PAGE_COHERENT
;
142 if (firmware_has_feature(FW_FEATURE_XCMO
) && !(hpte_r
& HPTE_R_N
))
143 flags
|= H_COALESCE_CAND
;
145 lpar_rc
= plpar_pte_enter(flags
, hpte_group
, hpte_v
, hpte_r
, &slot
);
146 if (unlikely(lpar_rc
== H_PTEG_FULL
)) {
147 if (!(vflags
& HPTE_V_BOLTED
))
153 * Since we try and ioremap PHBs we don't own, the pte insert
154 * will fail. However we must catch the failure in hash_page
155 * or we will loop forever, so return -2 in this case.
157 if (unlikely(lpar_rc
!= H_SUCCESS
)) {
158 if (!(vflags
& HPTE_V_BOLTED
))
159 pr_devel(" lpar err %lu\n", lpar_rc
);
162 if (!(vflags
& HPTE_V_BOLTED
))
163 pr_devel(" -> slot: %lu\n", slot
& 7);
165 /* Because of iSeries, we have to pass down the secondary
166 * bucket bit here as well
168 return (slot
& 7) | (!!(vflags
& HPTE_V_SECONDARY
) << 3);
171 static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock
);
173 static long pSeries_lpar_hpte_remove(unsigned long hpte_group
)
175 unsigned long slot_offset
;
176 unsigned long lpar_rc
;
178 unsigned long dummy1
, dummy2
;
180 /* pick a random slot to start at */
181 slot_offset
= mftb() & 0x7;
183 for (i
= 0; i
< HPTES_PER_GROUP
; i
++) {
185 /* don't remove a bolted entry */
186 lpar_rc
= plpar_pte_remove(H_ANDCOND
, hpte_group
+ slot_offset
,
187 (0x1UL
<< 4), &dummy1
, &dummy2
);
188 if (lpar_rc
== H_SUCCESS
)
190 BUG_ON(lpar_rc
!= H_NOT_FOUND
);
199 static void pSeries_lpar_hptab_clear(void)
201 unsigned long size_bytes
= 1UL << ppc64_pft_size
;
202 unsigned long hpte_count
= size_bytes
>> 4;
210 /* Read in batches of 4,
211 * invalidate only valid entries not in the VRMA
212 * hpte_count will be a multiple of 4
214 for (i
= 0; i
< hpte_count
; i
+= 4) {
215 lpar_rc
= plpar_pte_read_4_raw(0, i
, (void *)ptes
);
216 if (lpar_rc
!= H_SUCCESS
)
218 for (j
= 0; j
< 4; j
++){
219 if ((ptes
[j
].pteh
& HPTE_V_VRMA_MASK
) ==
222 if (ptes
[j
].pteh
& HPTE_V_VALID
)
223 plpar_pte_remove_raw(0, i
+ j
, 0,
224 &(ptes
[j
].pteh
), &(ptes
[j
].ptel
));
230 * This computes the AVPN and B fields of the first dword of a HPTE,
231 * for use when we want to match an existing PTE. The bottom 7 bits
232 * of the returned value are zero.
234 static inline unsigned long hpte_encode_avpn(unsigned long va
, int psize
,
239 v
= (va
>> 23) & ~(mmu_psize_defs
[psize
].avpnm
);
240 v
<<= HPTE_V_AVPN_SHIFT
;
241 v
|= ((unsigned long) ssize
) << HPTE_V_SSIZE_SHIFT
;
246 * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
247 * the low 3 bits of flags happen to line up. So no transform is needed.
248 * We can probably optimize here and assume the high bits of newpp are
249 * already zero. For now I am paranoid.
251 static long pSeries_lpar_hpte_updatepp(unsigned long slot
,
254 int psize
, int ssize
, int local
)
256 unsigned long lpar_rc
;
257 unsigned long flags
= (newpp
& 7) | H_AVPN
;
258 unsigned long want_v
;
260 want_v
= hpte_encode_avpn(va
, psize
, ssize
);
262 pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
263 want_v
, slot
, flags
, psize
);
265 lpar_rc
= plpar_pte_protect(flags
, slot
, want_v
);
267 if (lpar_rc
== H_NOT_FOUND
) {
268 pr_devel("not found !\n");
274 BUG_ON(lpar_rc
!= H_SUCCESS
);
279 static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot
)
281 unsigned long dword0
;
282 unsigned long lpar_rc
;
283 unsigned long dummy_word1
;
286 /* Read 1 pte at a time */
287 /* Do not need RPN to logical page translation */
288 /* No cross CEC PFT access */
291 lpar_rc
= plpar_pte_read(flags
, slot
, &dword0
, &dummy_word1
);
293 BUG_ON(lpar_rc
!= H_SUCCESS
);
298 static long pSeries_lpar_hpte_find(unsigned long va
, int psize
, int ssize
)
303 unsigned long want_v
, hpte_v
;
305 hash
= hpt_hash(va
, mmu_psize_defs
[psize
].shift
, ssize
);
306 want_v
= hpte_encode_avpn(va
, psize
, ssize
);
308 /* Bolted entries are always in the primary group */
309 slot
= (hash
& htab_hash_mask
) * HPTES_PER_GROUP
;
310 for (i
= 0; i
< HPTES_PER_GROUP
; i
++) {
311 hpte_v
= pSeries_lpar_hpte_getword0(slot
);
313 if (HPTE_V_COMPARE(hpte_v
, want_v
) && (hpte_v
& HPTE_V_VALID
))
322 static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp
,
324 int psize
, int ssize
)
326 unsigned long lpar_rc
, slot
, vsid
, va
, flags
;
328 vsid
= get_kernel_vsid(ea
, ssize
);
329 va
= hpt_va(ea
, vsid
, ssize
);
331 slot
= pSeries_lpar_hpte_find(va
, psize
, ssize
);
335 lpar_rc
= plpar_pte_protect(flags
, slot
, 0);
337 BUG_ON(lpar_rc
!= H_SUCCESS
);
340 static void pSeries_lpar_hpte_invalidate(unsigned long slot
, unsigned long va
,
341 int psize
, int ssize
, int local
)
343 unsigned long want_v
;
344 unsigned long lpar_rc
;
345 unsigned long dummy1
, dummy2
;
347 pr_devel(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
348 slot
, va
, psize
, local
);
350 want_v
= hpte_encode_avpn(va
, psize
, ssize
);
351 lpar_rc
= plpar_pte_remove(H_AVPN
, slot
, want_v
, &dummy1
, &dummy2
);
352 if (lpar_rc
== H_NOT_FOUND
)
355 BUG_ON(lpar_rc
!= H_SUCCESS
);
358 static void pSeries_lpar_hpte_removebolted(unsigned long ea
,
359 int psize
, int ssize
)
361 unsigned long slot
, vsid
, va
;
363 vsid
= get_kernel_vsid(ea
, ssize
);
364 va
= hpt_va(ea
, vsid
, ssize
);
366 slot
= pSeries_lpar_hpte_find(va
, psize
, ssize
);
369 pSeries_lpar_hpte_invalidate(slot
, va
, psize
, ssize
, 0);
372 /* Flag bits for H_BULK_REMOVE */
373 #define HBR_REQUEST 0x4000000000000000UL
374 #define HBR_RESPONSE 0x8000000000000000UL
375 #define HBR_END 0xc000000000000000UL
376 #define HBR_AVPN 0x0200000000000000UL
377 #define HBR_ANDCOND 0x0100000000000000UL
380 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
383 static void pSeries_lpar_flush_hash_range(unsigned long number
, int local
)
385 unsigned long i
, pix
, rc
;
386 unsigned long flags
= 0;
387 struct ppc64_tlb_batch
*batch
= &__get_cpu_var(ppc64_tlb_batch
);
388 int lock_tlbie
= !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE
);
389 unsigned long param
[9];
391 unsigned long hash
, index
, shift
, hidx
, slot
;
396 spin_lock_irqsave(&pSeries_lpar_tlbie_lock
, flags
);
398 psize
= batch
->psize
;
399 ssize
= batch
->ssize
;
401 for (i
= 0; i
< number
; i
++) {
402 va
= batch
->vaddr
[i
];
404 pte_iterate_hashed_subpages(pte
, psize
, va
, index
, shift
) {
405 hash
= hpt_hash(va
, shift
, ssize
);
406 hidx
= __rpte_to_hidx(pte
, index
);
407 if (hidx
& _PTEIDX_SECONDARY
)
409 slot
= (hash
& htab_hash_mask
) * HPTES_PER_GROUP
;
410 slot
+= hidx
& _PTEIDX_GROUP_IX
;
411 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE
)) {
412 pSeries_lpar_hpte_invalidate(slot
, va
, psize
,
415 param
[pix
] = HBR_REQUEST
| HBR_AVPN
| slot
;
416 param
[pix
+1] = hpte_encode_avpn(va
, psize
,
420 rc
= plpar_hcall9(H_BULK_REMOVE
, param
,
421 param
[0], param
[1], param
[2],
422 param
[3], param
[4], param
[5],
424 BUG_ON(rc
!= H_SUCCESS
);
428 } pte_iterate_hashed_end();
431 param
[pix
] = HBR_END
;
432 rc
= plpar_hcall9(H_BULK_REMOVE
, param
, param
[0], param
[1],
433 param
[2], param
[3], param
[4], param
[5],
435 BUG_ON(rc
!= H_SUCCESS
);
439 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock
, flags
);
442 static int __init
disable_bulk_remove(char *str
)
444 if (strcmp(str
, "off") == 0 &&
445 firmware_has_feature(FW_FEATURE_BULK_REMOVE
)) {
446 printk(KERN_INFO
"Disabling BULK_REMOVE firmware feature");
447 powerpc_firmware_features
&= ~FW_FEATURE_BULK_REMOVE
;
452 __setup("bulk_remove=", disable_bulk_remove
);
454 void __init
hpte_init_lpar(void)
456 ppc_md
.hpte_invalidate
= pSeries_lpar_hpte_invalidate
;
457 ppc_md
.hpte_updatepp
= pSeries_lpar_hpte_updatepp
;
458 ppc_md
.hpte_updateboltedpp
= pSeries_lpar_hpte_updateboltedpp
;
459 ppc_md
.hpte_insert
= pSeries_lpar_hpte_insert
;
460 ppc_md
.hpte_remove
= pSeries_lpar_hpte_remove
;
461 ppc_md
.hpte_removebolted
= pSeries_lpar_hpte_removebolted
;
462 ppc_md
.flush_hash_range
= pSeries_lpar_flush_hash_range
;
463 ppc_md
.hpte_clear_all
= pSeries_lpar_hptab_clear
;
466 #ifdef CONFIG_PPC_SMLPAR
467 #define CMO_FREE_HINT_DEFAULT 1
468 static int cmo_free_hint_flag
= CMO_FREE_HINT_DEFAULT
;
470 static int __init
cmo_free_hint(char *str
)
473 parm
= strstrip(str
);
475 if (strcasecmp(parm
, "no") == 0 || strcasecmp(parm
, "off") == 0) {
476 printk(KERN_INFO
"cmo_free_hint: CMO free page hinting is not active.\n");
477 cmo_free_hint_flag
= 0;
481 cmo_free_hint_flag
= 1;
482 printk(KERN_INFO
"cmo_free_hint: CMO free page hinting is active.\n");
484 if (strcasecmp(parm
, "yes") == 0 || strcasecmp(parm
, "on") == 0)
490 __setup("cmo_free_hint=", cmo_free_hint
);
492 static void pSeries_set_page_state(struct page
*page
, int order
,
496 unsigned long cmo_page_sz
, addr
;
498 cmo_page_sz
= cmo_get_page_size();
499 addr
= __pa((unsigned long)page_address(page
));
501 for (i
= 0; i
< (1 << order
); i
++, addr
+= PAGE_SIZE
) {
502 for (j
= 0; j
< PAGE_SIZE
; j
+= cmo_page_sz
)
503 plpar_hcall_norets(H_PAGE_INIT
, state
, addr
+ j
, 0);
507 void arch_free_page(struct page
*page
, int order
)
509 if (!cmo_free_hint_flag
|| !firmware_has_feature(FW_FEATURE_CMO
))
512 pSeries_set_page_state(page
, order
, H_PAGE_SET_UNUSED
);
514 EXPORT_SYMBOL(arch_free_page
);
518 #ifdef CONFIG_TRACEPOINTS
520 * We optimise our hcall path by placing hcall_tracepoint_refcount
521 * directly in the TOC so we can check if the hcall tracepoints are
522 * enabled via a single load.
525 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
526 extern long hcall_tracepoint_refcount
;
529 * Since the tracing code might execute hcalls we need to guard against
530 * recursion. One example of this are spinlocks calling H_YIELD on
531 * shared processor partitions.
533 static DEFINE_PER_CPU(unsigned int, hcall_trace_depth
);
535 void hcall_tracepoint_regfunc(void)
537 hcall_tracepoint_refcount
++;
540 void hcall_tracepoint_unregfunc(void)
542 hcall_tracepoint_refcount
--;
545 void __trace_hcall_entry(unsigned long opcode
, unsigned long *args
)
551 * We cannot call tracepoints inside RCU idle regions which
552 * means we must not trace H_CEDE.
554 if (opcode
== H_CEDE
)
557 local_irq_save(flags
);
559 depth
= &__get_cpu_var(hcall_trace_depth
);
566 trace_hcall_entry(opcode
, args
);
570 local_irq_restore(flags
);
573 void __trace_hcall_exit(long opcode
, unsigned long retval
,
574 unsigned long *retbuf
)
579 if (opcode
== H_CEDE
)
582 local_irq_save(flags
);
584 depth
= &__get_cpu_var(hcall_trace_depth
);
590 trace_hcall_exit(opcode
, retval
, retbuf
);
595 local_irq_restore(flags
);
601 * H_GET_MPP hcall returns info in 7 parms
603 int h_get_mpp(struct hvcall_mpp_data
*mpp_data
)
606 unsigned long retbuf
[PLPAR_HCALL9_BUFSIZE
];
608 rc
= plpar_hcall9(H_GET_MPP
, retbuf
);
610 mpp_data
->entitled_mem
= retbuf
[0];
611 mpp_data
->mapped_mem
= retbuf
[1];
613 mpp_data
->group_num
= (retbuf
[2] >> 2 * 8) & 0xffff;
614 mpp_data
->pool_num
= retbuf
[2] & 0xffff;
616 mpp_data
->mem_weight
= (retbuf
[3] >> 7 * 8) & 0xff;
617 mpp_data
->unallocated_mem_weight
= (retbuf
[3] >> 6 * 8) & 0xff;
618 mpp_data
->unallocated_entitlement
= retbuf
[3] & 0xffffffffffff;
620 mpp_data
->pool_size
= retbuf
[4];
621 mpp_data
->loan_request
= retbuf
[5];
622 mpp_data
->backing_mem
= retbuf
[6];
626 EXPORT_SYMBOL(h_get_mpp
);
628 int h_get_mpp_x(struct hvcall_mpp_x_data
*mpp_x_data
)
631 unsigned long retbuf
[PLPAR_HCALL9_BUFSIZE
] = { 0 };
633 rc
= plpar_hcall9(H_GET_MPP_X
, retbuf
);
635 mpp_x_data
->coalesced_bytes
= retbuf
[0];
636 mpp_x_data
->pool_coalesced_bytes
= retbuf
[1];
637 mpp_x_data
->pool_purr_cycles
= retbuf
[2];
638 mpp_x_data
->pool_spurr_cycles
= retbuf
[3];