2 * This file contains idle entry/exit functions for POWER7,
3 * POWER8 and POWER9 CPUs.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
11 #include <linux/threads.h>
12 #include <asm/processor.h>
14 #include <asm/cputable.h>
15 #include <asm/thread_info.h>
16 #include <asm/ppc_asm.h>
17 #include <asm/asm-offsets.h>
18 #include <asm/ppc-opcode.h>
19 #include <asm/hw_irq.h>
20 #include <asm/kvm_book3s_asm.h>
22 #include <asm/cpuidle.h>
23 #include <asm/exception-64s.h>
24 #include <asm/book3s/64/mmu-hash.h>
30 * Use unused space in the interrupt stack to save and restore
31 * registers for winkle support.
46 #define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16
51 * Used by threads before entering deep idle states. Saves SPRs
52 * in interrupt stack frame
56 * Note all register i.e per-core, per-subcore or per-thread is saved
57 * here since any thread in the core might wake up first
61 * Note - SDR1 is dropped in Power ISA v3. Hence not restoring
71 ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
92 * Used by threads when the lock bit of core_idle_state is set.
93 * Threads will spin in HMT_LOW until the lock bit is cleared.
94 * r14 - pointer to core_idle_state
95 * r15 - used to load contents of core_idle_state
96 * r9 - used as a temporary variable
102 andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
106 andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
107 bne- core_idle_lock_held
111 * Pass requested state in r3:
112 * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
113 * - Requested PSSCR value in POWER9
115 * Address of idle handler to branch to in realmode in r4
117 pnv_powersave_common:
118 /* Use r3 to pass state nap/sleep/winkle */
119 /* NAP is a state loss, we create a regs frame on the
120 * stack, fill it up with the state we care about and
121 * stick a pointer to it in PACAR1. We really only
122 * need to save PC, some CR bits and the NV GPRs,
123 * but for now an interrupt frame will do.
129 stdu r1,-INT_FRAME_SIZE(r1)
133 /* We haven't lost state ... yet */
135 stb r0,PACA_NAPSTATELOST(r13)
137 /* Continue saving state */
145 * Go to real mode to do the nap, as required by the architecture.
146 * Also, we need to be in real mode before setting hwthread_state,
147 * because as soon as we do that, another thread can switch
148 * the MMU context to the guest.
150 LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
154 .globl pnv_enter_arch207_idle_mode
155 pnv_enter_arch207_idle_mode:
156 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
157 /* Tell KVM we're entering idle */
158 li r4,KVM_HWTHREAD_IN_IDLE
159 /******************************************************/
160 /* N O T E W E L L ! ! ! N O T E W E L L */
161 /* The following store to HSTATE_HWTHREAD_STATE(r13) */
162 /* MUST occur in real mode, i.e. with the MMU off, */
163 /* and the MMU must stay off until we clear this flag */
164 /* and test HSTATE_HWTHREAD_REQ(r13) in */
165 /* pnv_powersave_wakeup in this file. */
166 /* The reason is that another thread can switch the */
167 /* MMU to a guest context whenever this flag is set */
168 /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
169 /* that would potentially cause this thread to start */
170 /* executing instructions from guest memory in */
171 /* hypervisor mode, leading to a host crash or data */
172 /* corruption, or worse. */
173 /******************************************************/
174 stb r4,HSTATE_HWTHREAD_STATE(r13)
176 stb r3,PACA_THREAD_IDLE_STATE(r13)
177 cmpwi cr3,r3,PNV_THREAD_SLEEP
179 IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
182 /* Sleep or winkle */
183 lbz r7,PACA_THREAD_MASK(r13)
184 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
187 lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h
192 andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
193 bnel- core_idle_lock_held
195 add r15,r15,r5 /* Add if winkle */
196 andc r15,r15,r7 /* Clear thread bit */
198 andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
201 * If cr0 = 0, then current thread is the last thread of the core entering
202 * sleep. Last thread needs to execute the hardware bug workaround code if
203 * required by the platform.
204 * Make the workaround call unconditionally here. The below branch call is
205 * patched out when the idle states are discovered if the platform does not
208 .global pnv_fastsleep_workaround_at_entry
209 pnv_fastsleep_workaround_at_entry:
210 beq fastsleep_workaround_at_entry
216 common_enter: /* common code for all the threads entering sleep or winkle */
218 IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
220 fastsleep_workaround_at_entry:
221 oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
226 /* Fast sleep workaround */
229 bl opal_config_cpu_idle_state
232 xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
238 bl save_sprs_to_stack
240 IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
243 * r3 - PSSCR value corresponding to the requested stop state.
246 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
247 /* Tell KVM we're entering idle */
248 li r4,KVM_HWTHREAD_IN_IDLE
249 /* DO THIS IN REAL MODE! See comment above. */
250 stb r4,HSTATE_HWTHREAD_STATE(r13)
253 * Check if we are executing the lite variant with ESL=EC=0
255 andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
256 clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
257 bne .Lhandle_esl_ec_set
258 IDLE_STATE_ENTER_SEQ(PPC_STOP)
259 li r3,0 /* Since we didn't lose state, return 0 */
262 * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
263 * it can determine if the wakeup reason is an HMI in
264 * CHECK_HMI_INTERRUPT.
266 * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup
267 * reason, so there is no point setting r12 to SRR1.
269 * Further, we clear r12 here, so that we don't accidentally enter the
270 * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI.
277 * POWER9 DD2 can incorrectly set PMAO when waking up after a
278 * state-loss idle. Saving and restoring MMCR0 over idle is a
285 * Check if the requested state is a deep idle state.
287 LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
288 ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
290 bge .Lhandle_deep_stop
291 IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
294 * Entering deep idle state.
295 * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to
296 * stack and enter stop
298 lbz r7,PACA_THREAD_MASK(r13)
299 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
303 andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
304 bnel- core_idle_lock_held
305 andc r15,r15,r7 /* Clear thread bit */
311 bl save_sprs_to_stack
313 IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
316 * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
317 * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE).
319 _GLOBAL(power7_idle_insn)
320 /* Now check if user or arch enabled NAP mode */
321 LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode)
322 b pnv_powersave_common
324 #define CHECK_HMI_INTERRUPT \
325 BEGIN_FTR_SECTION_NESTED(66); \
326 rlwinm r0,r12,45-31,0xf; /* extract wake reason field (P8) */ \
327 FTR_SECTION_ELSE_NESTED(66); \
328 rlwinm r0,r12,45-31,0xe; /* P7 wake reason field is 3 bits */ \
329 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
330 cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
332 /* Invoke opal call to handle hmi */ \
333 ld r2,PACATOC(r13); \
335 std r3,ORIG_GPR3(r1); /* Save original r3 */ \
336 li r3,0; /* NULL argument */ \
337 bl hmi_exception_realmode; \
339 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
343 * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
344 * r3 contains desired PSSCR register value.
346 _GLOBAL(power9_idle_stop)
347 std r3, PACA_REQ_PSSCR(r13)
349 LOAD_REG_ADDR(r4,power_enter_stop)
350 b pnv_powersave_common
354 * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
355 * HSPRG0 will be set to the HSPRG0 value of one of the
356 * threads in this core. Thus the value we have in r13
357 * may not be this thread's paca pointer.
359 * Fortunately, the TIR remains invariant. Since this thread's
360 * paca pointer is recorded in all its sibling's paca, we can
361 * correctly recover this thread's paca pointer if we
362 * know the index of this thread in the core.
364 * This index can be obtained from the TIR.
366 * i.e, thread's position in the core = TIR.
367 * If this value is i, then this thread's paca is
368 * paca->thread_sibling_pacas[i].
370 power9_dd1_recover_paca:
373 * Since each entry in thread_sibling_pacas is 8 bytes
374 * we need to left-shift by 3 bits. Thus r4 = i * 8
377 /* Get &paca->thread_sibling_pacas[0] in r5 */
378 ld r5, PACA_SIBLING_PACA_PTRS(r13)
379 /* Load paca->thread_sibling_pacas[i] into r13 */
383 * Indicate that we have lost NVGPR state
384 * which needs to be restored from the stack.
387 stb r3,PACA_NAPSTATELOST(r13)
391 * Called from machine check handler for powersave wakeups.
392 * Low level machine check processing has already been done. Now just
393 * go through the wake up path to get everything in order.
395 * r3 - The original SRR1 value.
396 * Original SRR[01] have been clobbered.
399 .global pnv_powersave_wakeup_mce
400 pnv_powersave_wakeup_mce:
401 /* Set cr3 for pnv_powersave_wakeup */
402 rlwinm r11,r3,47-31,30,31
406 * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake
407 * reason into r12, which allows reuse of the system reset wakeup
408 * code without being mistaken for another type of wakeup.
410 oris r12,r3,SRR1_WAKEMCE_RESVD@h
412 b pnv_powersave_wakeup
415 * Called from reset vector for powersave wakeups.
416 * cr3 - set to gt if waking up with partial/complete hypervisor state loss
419 .global pnv_powersave_wakeup
420 pnv_powersave_wakeup:
424 BEGIN_FTR_SECTION_NESTED(70)
425 bl power9_dd1_recover_paca
426 END_FTR_SECTION_NESTED_IFSET(CPU_FTR_POWER9_DD1, 70)
427 bl pnv_restore_hyp_resource_arch300
429 bl pnv_restore_hyp_resource_arch207
430 ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
432 li r0,PNV_THREAD_RUNNING
433 stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
437 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
438 li r0,KVM_HWTHREAD_IN_KERNEL
439 stb r0,HSTATE_HWTHREAD_STATE(r13)
440 /* Order setting hwthread_state vs. testing hwthread_req */
442 lbz r0,HSTATE_HWTHREAD_REQ(r13)
449 /* Return SRR1 from power7_nap() */
450 blt cr3,pnv_wakeup_noloss
454 * Check whether we have woken up with hypervisor state loss.
455 * If yes, restore hypervisor state and return back to link.
457 * cr3 - set to gt if waking up with partial/complete hypervisor state loss
459 pnv_restore_hyp_resource_arch300:
461 * Workaround for POWER9, if we lost resources, the ERAT
462 * might have been mixed up and needs flushing. We also need
463 * to reload MMCR0 (see comment above). We also need to set
464 * then clear bit 60 in MMCRA to ensure the PMU starts running.
470 ori r4,r4,(1 << (63-60))
472 xori r4,r4,(1 << (63-60))
478 * POWER ISA 3. Use PSSCR to determine if we
479 * are waking up from deep idle state
481 LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
482 ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
484 BEGIN_FTR_SECTION_NESTED(71)
486 * Assume that we are waking up from the state
487 * same as the Requested Level (RL) in the PSSCR
488 * which are Bits 60-63
490 ld r5,PACA_REQ_PSSCR(r13)
492 FTR_SECTION_ELSE_NESTED(71)
494 * 0-3 bits correspond to Power-Saving Level Status
495 * which indicates the idle state we are waking up from
499 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71)
501 bge cr4,pnv_wakeup_tb_loss /* returns to caller */
503 blr /* Waking up without hypervisor state loss. */
505 /* Same calling convention as arch300 */
506 pnv_restore_hyp_resource_arch207:
508 * POWER ISA 2.07 or less.
509 * Check if we slept with sleep or winkle.
511 lbz r4,PACA_THREAD_IDLE_STATE(r13)
512 cmpwi cr2,r4,PNV_THREAD_NAP
513 bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */
516 * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
517 * up from nap. At this stage CR3 shouldn't contains 'gt' since that
518 * indicates we are waking with hypervisor state loss from nap.
522 blr /* Waking up without hypervisor state loss */
525 * Called if waking up from idle state which can cause either partial or
526 * complete hyp state loss.
527 * In POWER8, called if waking up from fastsleep or winkle
528 * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state
531 * cr3 - gt if waking up with partial/complete hypervisor state loss
534 * cr4 - gt or eq if waking up from complete hypervisor state loss.
537 * r4 - PACA_THREAD_IDLE_STATE
542 * Before entering any idle state, the NVGPRs are saved in the stack.
543 * If there was a state loss, or PACA_NAPSTATELOST was set, then the
544 * NVGPRs are restored. If we are here, it is likely that state is lost,
545 * but not guaranteed -- neither ISA207 nor ISA300 tests to reach
546 * here are the same as the test to restore NVGPRS:
547 * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300,
548 * and SRR1 test for restoring NVGPRs.
550 * We are about to clobber NVGPRs now, so set NAPSTATELOST to
551 * guarantee they will always be restored. This might be tightened
552 * with careful reading of specs (particularly for ISA300) but this
553 * is already a slow wakeup path and it's simpler to be safe.
556 stb r0,PACA_NAPSTATELOST(r13)
560 * Save SRR1 and LR in NVGPRs as they might be clobbered in
561 * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required
562 * to determine the wakeup reason if we branch to kvm_start_guest. LR
563 * is required to return back to reset vector after hypervisor state
564 * restore is complete.
571 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
573 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
574 lbz r7,PACA_THREAD_MASK(r13)
577 * Take the core lock to synchronize against other threads.
579 * Lock bit is set in one of the 2 cases-
580 * a. In the sleep/winkle enter path, the last thread is executing
581 * fastsleep workaround code.
582 * b. In the wake up path, another thread is executing fastsleep
583 * workaround undo code or resyncing timebase or restoring context
584 * In either case loop until the lock bit is cleared.
588 andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
589 bnel- core_idle_lock_held
590 oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
595 andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
600 * cr2 - eq if first thread to wakeup in core
601 * cr3- gt if waking up with partial/complete hypervisor state loss
603 * cr4 - gt or eq if waking up from complete hypervisor state loss.
609 * If yes, check if all threads were in winkle, decrement our
610 * winkle count, set all thread winkle bits if all were in winkle.
611 * Check if our thread has a winkle bit set, and set cr4 accordingly
612 * (to match ISA300, above). Pseudo-code for core idle state
613 * transitions for ISA207 is as follows (everything happens atomically
614 * due to store conditional and/or lock bit):
621 * core_idle_state &= ~thread_in_core
626 * bool first_in_core, first_in_subcore;
628 * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
629 * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
631 * core_idle_state |= thread_in_core;
636 * core_idle_state &= ~thread_in_core;
637 * core_idle_state += 1 << WINKLE_COUNT_SHIFT;
642 * bool first_in_core, first_in_subcore, winkle_state_lost;
644 * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
645 * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
647 * core_idle_state |= thread_in_core;
649 * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT))
650 * core_idle_state |= THREAD_WINKLE_BITS;
651 * core_idle_state -= 1 << WINKLE_COUNT_SHIFT;
653 * winkle_state_lost = core_idle_state &
654 * (thread_in_core << WINKLE_THREAD_SHIFT);
655 * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT);
659 cmpwi r18,PNV_THREAD_WINKLE
661 andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h
662 subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h
664 ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */
666 /* Shift thread bit to winkle mask, then test if this thread is set,
667 * and remove it from the winkle bits */
671 cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */
673 lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
675 cmpwi r4,0 /* Check if first in subcore */
677 or r15,r15,r7 /* Set thread bit */
678 beq first_thread_in_subcore
679 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
681 or r15,r15,r7 /* Set thread bit */
682 beq cr2,first_thread_in_core
684 /* Not first thread in core or subcore to wake up */
687 first_thread_in_subcore:
689 * If waking up from sleep, subcore state is not lost. Hence
690 * skip subcore state restore
692 blt cr4,subcore_state_restored
694 /* Restore per-subcore state */
703 subcore_state_restored:
705 * Check if the thread is also the first thread in the core. If not,
706 * skip to clear_lock.
710 first_thread_in_core:
713 * First thread in the core waking up from any state which can cause
714 * partial or complete hypervisor state loss. It needs to
715 * call the fastsleep workaround code if the platform requires it.
716 * Call it unconditionally here. The below branch instruction will
717 * be patched out if the platform does not have fastsleep or does not
718 * require the workaround. Patching will be performed during the
719 * discovery of idle-states.
721 .global pnv_fastsleep_workaround_at_exit
722 pnv_fastsleep_workaround_at_exit:
723 b fastsleep_workaround_at_exit
727 * Use cr3 which indicates that we are waking up with atleast partial
728 * hypervisor state loss to determine if TIMEBASE RESYNC is needed.
730 ble cr3,.Ltb_resynced
731 /* Time base re-sync */
732 bl opal_resync_timebase;
734 * If waking up from sleep (POWER8), per core state
735 * is not lost, skip to clear_lock.
741 * First thread in the core to wake up and its waking up with
742 * complete hypervisor state loss. Restore per core hypervisor
750 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
758 xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
764 * Common to all threads.
766 * If waking up from sleep, hypervisor state is not lost. Hence
767 * skip hypervisor state restore.
769 blt cr4,hypervisor_state_restored
771 /* Waking up from winkle */
773 BEGIN_MMU_FTR_SECTION
775 END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
776 /* Restore SLB from PACA */
777 ld r8,PACA_SLBSHADOWPTR(r13)
780 li r3, SLBSHADOW_SAVEAREA
784 andis. r7,r5,SLB_ESID_V@h
791 /* Restore per thread state */
802 /* Call cur_cpu_spec->cpu_restore() */
803 LOAD_REG_ADDR(r4, cur_cpu_spec)
805 ld r12,CPU_SPEC_RESTORE(r4)
806 #ifdef PPC64_ELF_ABI_v1
815 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
816 hypervisor_state_restored:
820 blr /* return to pnv_powersave_wakeup */
822 fastsleep_workaround_at_exit:
825 bl opal_config_cpu_idle_state
829 * R3 here contains the value that will be returned to the caller
831 * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
833 .global pnv_wakeup_loss
838 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
844 addi r1,r1,INT_FRAME_SIZE
851 * R3 here contains the value that will be returned to the caller
853 * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
856 lbz r0,PACA_NAPSTATELOST(r13)
862 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
866 addi r1,r1,INT_FRAME_SIZE