4 * Copyright 2011 IBM Corp.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
14 #include <linux/cpu.h>
15 #include <linux/errno.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/tty.h>
19 #include <linux/reboot.h>
20 #include <linux/init.h>
21 #include <linux/console.h>
22 #include <linux/delay.h>
23 #include <linux/irq.h>
24 #include <linux/seq_file.h>
26 #include <linux/of_fdt.h>
27 #include <linux/interrupt.h>
28 #include <linux/bug.h>
29 #include <linux/pci.h>
30 #include <linux/cpufreq.h>
32 #include <asm/machdep.h>
33 #include <asm/firmware.h>
37 #include <asm/kexec.h>
39 #include <asm/cputhreads.h>
40 #include <asm/cpuidle.h>
41 #include <asm/code-patching.h>
46 static void __init
pnv_setup_arch(void)
48 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT
);
56 /* Setup RTC and NVRAM callbacks */
57 if (firmware_has_feature(FW_FEATURE_OPAL
))
66 static void __init
pnv_init_early(void)
69 * Initialize the LPC bus now so that legacy serial
70 * ports can be found on it
74 #ifdef CONFIG_HVC_OPAL
75 if (firmware_has_feature(FW_FEATURE_OPAL
))
76 hvc_opal_init_early();
79 add_preferred_console("hvc", 0, NULL
);
82 static void __init
pnv_init_IRQ(void)
86 WARN_ON(!ppc_md
.get_irq
);
89 static void pnv_show_cpuinfo(struct seq_file
*m
)
91 struct device_node
*root
;
92 const char *model
= "";
94 root
= of_find_node_by_path("/");
96 model
= of_get_property(root
, "model", NULL
);
97 seq_printf(m
, "machine\t\t: PowerNV %s\n", model
);
98 if (firmware_has_feature(FW_FEATURE_OPALv3
))
99 seq_printf(m
, "firmware\t: OPAL v3\n");
100 else if (firmware_has_feature(FW_FEATURE_OPALv2
))
101 seq_printf(m
, "firmware\t: OPAL v2\n");
102 else if (firmware_has_feature(FW_FEATURE_OPAL
))
103 seq_printf(m
, "firmware\t: OPAL v1\n");
105 seq_printf(m
, "firmware\t: BML\n");
109 static void pnv_prepare_going_down(void)
112 * Disable all notifiers from OPAL, we can't
113 * service interrupts anymore anyway
115 opal_notifier_disable();
117 /* Soft disable interrupts */
121 * Return secondary CPUs to firwmare if a flash update
122 * is pending otherwise we will get all sort of error
123 * messages about CPU being stuck etc.. This will also
124 * have the side effect of hard disabling interrupts so
125 * past this point, the kernel is effectively dead.
127 opal_flash_term_callback();
130 static void __noreturn
pnv_restart(char *cmd
)
134 pnv_prepare_going_down();
136 while (rc
== OPAL_BUSY
|| rc
== OPAL_BUSY_EVENT
) {
137 rc
= opal_cec_reboot();
138 if (rc
== OPAL_BUSY_EVENT
)
139 opal_poll_events(NULL
);
144 opal_poll_events(NULL
);
147 static void __noreturn
pnv_power_off(void)
151 pnv_prepare_going_down();
153 while (rc
== OPAL_BUSY
|| rc
== OPAL_BUSY_EVENT
) {
154 rc
= opal_cec_power_down(0);
155 if (rc
== OPAL_BUSY_EVENT
)
156 opal_poll_events(NULL
);
161 opal_poll_events(NULL
);
164 static void __noreturn
pnv_halt(void)
169 static void pnv_progress(char *s
, unsigned short hex
)
173 static int pnv_dma_set_mask(struct device
*dev
, u64 dma_mask
)
176 return pnv_pci_dma_set_mask(to_pci_dev(dev
), dma_mask
);
177 return __dma_set_mask(dev
, dma_mask
);
180 static u64
pnv_dma_get_required_mask(struct device
*dev
)
183 return pnv_pci_dma_get_required_mask(to_pci_dev(dev
));
185 return __dma_get_required_mask(dev
);
188 static void pnv_shutdown(void)
190 /* Let the PCI code clear up IODA tables */
194 * Stop OPAL activity: Unregister all OPAL interrupts so they
195 * don't fire up while we kexec and make sure all potentially
196 * DMA'ing ops are complete (such as dump retrieval).
202 static void pnv_kexec_wait_secondaries_down(void)
204 int my_cpu
, i
, notified
= -1;
208 for_each_online_cpu(i
) {
216 rc
= opal_query_cpu_status(get_hard_smp_processor_id(i
),
218 if (rc
!= OPAL_SUCCESS
|| status
!= OPAL_THREAD_STARTED
)
222 printk(KERN_INFO
"kexec: waiting for cpu %d "
223 "(physical %d) to enter OPAL\n",
224 i
, paca
[i
].hw_cpu_id
);
231 static void pnv_kexec_cpu_down(int crash_shutdown
, int secondary
)
233 xics_kexec_teardown_cpu(secondary
);
235 /* On OPAL v3, we return all CPUs to firmware */
237 if (!firmware_has_feature(FW_FEATURE_OPALv3
))
241 /* Return secondary CPUs to firmware on OPAL v3 */
243 get_paca()->kexec_state
= KEXEC_STATE_REAL_MODE
;
246 /* Return the CPU to OPAL */
248 } else if (crash_shutdown
) {
250 * On crash, we don't wait for secondaries to go
251 * down as they might be unreachable or hung, so
252 * instead we just wait a bit and move on.
256 /* Primary waits for the secondaries to have reached OPAL */
257 pnv_kexec_wait_secondaries_down();
260 #endif /* CONFIG_KEXEC */
262 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
263 static unsigned long pnv_memory_block_size(void)
265 return 256UL * 1024 * 1024;
269 static void __init
pnv_setup_machdep_opal(void)
271 ppc_md
.get_boot_time
= opal_get_boot_time
;
272 ppc_md
.restart
= pnv_restart
;
273 pm_power_off
= pnv_power_off
;
274 ppc_md
.halt
= pnv_halt
;
275 ppc_md
.machine_check_exception
= opal_machine_check
;
276 ppc_md
.mce_check_early_recovery
= opal_mce_check_early_recovery
;
277 ppc_md
.hmi_exception_early
= opal_hmi_exception_early
;
278 ppc_md
.handle_hmi_exception
= opal_handle_hmi_exception
;
281 #ifdef CONFIG_PPC_POWERNV_RTAS
282 static void __init
pnv_setup_machdep_rtas(void)
284 if (rtas_token("get-time-of-day") != RTAS_UNKNOWN_SERVICE
) {
285 ppc_md
.get_boot_time
= rtas_get_boot_time
;
286 ppc_md
.get_rtc_time
= rtas_get_rtc_time
;
287 ppc_md
.set_rtc_time
= rtas_set_rtc_time
;
289 ppc_md
.restart
= rtas_restart
;
290 pm_power_off
= rtas_power_off
;
291 ppc_md
.halt
= rtas_halt
;
293 #endif /* CONFIG_PPC_POWERNV_RTAS */
295 static u32 supported_cpuidle_states
;
297 int pnv_save_sprs_for_winkle(void)
303 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
304 * all cpus at boot. Get these reg values of current cpu and use the
305 * same accross all cpus.
307 uint64_t lpcr_val
= mfspr(SPRN_LPCR
) & ~(u64
)LPCR_PECE1
;
308 uint64_t hid0_val
= mfspr(SPRN_HID0
);
309 uint64_t hid1_val
= mfspr(SPRN_HID1
);
310 uint64_t hid4_val
= mfspr(SPRN_HID4
);
311 uint64_t hid5_val
= mfspr(SPRN_HID5
);
312 uint64_t hmeer_val
= mfspr(SPRN_HMEER
);
314 for_each_possible_cpu(cpu
) {
315 uint64_t pir
= get_hard_smp_processor_id(cpu
);
316 uint64_t hsprg0_val
= (uint64_t)&paca
[cpu
];
319 * HSPRG0 is used to store the cpu's pointer to paca. Hence last
320 * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
321 * with 63rd bit set, so that when a thread wakes up at 0x100 we
322 * can use this bit to distinguish between fastsleep and
327 rc
= opal_slw_set_reg(pir
, SPRN_HSPRG0
, hsprg0_val
);
331 rc
= opal_slw_set_reg(pir
, SPRN_LPCR
, lpcr_val
);
335 /* HIDs are per core registers */
336 if (cpu_thread_in_core(cpu
) == 0) {
338 rc
= opal_slw_set_reg(pir
, SPRN_HMEER
, hmeer_val
);
342 rc
= opal_slw_set_reg(pir
, SPRN_HID0
, hid0_val
);
346 rc
= opal_slw_set_reg(pir
, SPRN_HID1
, hid1_val
);
350 rc
= opal_slw_set_reg(pir
, SPRN_HID4
, hid4_val
);
354 rc
= opal_slw_set_reg(pir
, SPRN_HID5
, hid5_val
);
363 static void pnv_alloc_idle_core_states(void)
366 int nr_cores
= cpu_nr_cores();
367 u32
*core_idle_state
;
370 * core_idle_state - First 8 bits track the idle state of each thread
371 * of the core. The 8th bit is the lock bit. Initially all thread bits
372 * are set. They are cleared when the thread enters deep idle state
373 * like sleep and winkle. Initially the lock bit is cleared.
374 * The lock bit has 2 purposes
375 * a. While the first thread is restoring core state, it prevents
376 * other threads in the core from switching to process context.
377 * b. While the last thread in the core is saving the core state, it
378 * prevents a different thread from waking up.
380 for (i
= 0; i
< nr_cores
; i
++) {
381 int first_cpu
= i
* threads_per_core
;
382 int node
= cpu_to_node(first_cpu
);
384 core_idle_state
= kmalloc_node(sizeof(u32
), GFP_KERNEL
, node
);
385 *core_idle_state
= PNV_CORE_IDLE_THREAD_BITS
;
387 for (j
= 0; j
< threads_per_core
; j
++) {
388 int cpu
= first_cpu
+ j
;
390 paca
[cpu
].core_idle_state_ptr
= core_idle_state
;
391 paca
[cpu
].thread_idle_state
= PNV_THREAD_RUNNING
;
392 paca
[cpu
].thread_mask
= 1 << j
;
396 update_subcore_sibling_mask();
398 if (supported_cpuidle_states
& OPAL_PM_WINKLE_ENABLED
)
399 pnv_save_sprs_for_winkle();
402 u32
pnv_get_supported_cpuidle_states(void)
404 return supported_cpuidle_states
;
406 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states
);
408 static int __init
pnv_init_idle_states(void)
410 struct device_node
*power_mgt
;
412 const __be32
*idle_state_flags
;
413 u32 len_flags
, flags
;
416 supported_cpuidle_states
= 0;
418 if (cpuidle_disable
!= IDLE_NO_OVERRIDE
)
421 if (!firmware_has_feature(FW_FEATURE_OPALv3
))
424 power_mgt
= of_find_node_by_path("/ibm,opal/power-mgt");
426 pr_warn("opal: PowerMgmt Node not found\n");
430 idle_state_flags
= of_get_property(power_mgt
,
431 "ibm,cpu-idle-state-flags", &len_flags
);
432 if (!idle_state_flags
) {
433 pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
437 dt_idle_states
= len_flags
/ sizeof(u32
);
439 for (i
= 0; i
< dt_idle_states
; i
++) {
440 flags
= be32_to_cpu(idle_state_flags
[i
]);
441 supported_cpuidle_states
|= flags
;
443 if (!(supported_cpuidle_states
& OPAL_PM_SLEEP_ENABLED_ER1
)) {
445 (unsigned int *)pnv_fastsleep_workaround_at_entry
,
448 (unsigned int *)pnv_fastsleep_workaround_at_exit
,
451 pnv_alloc_idle_core_states();
455 subsys_initcall(pnv_init_idle_states
);
457 static int __init
pnv_probe(void)
459 unsigned long root
= of_get_flat_dt_root();
461 if (!of_flat_dt_is_compatible(root
, "ibm,powernv"))
466 if (firmware_has_feature(FW_FEATURE_OPAL
))
467 pnv_setup_machdep_opal();
468 #ifdef CONFIG_PPC_POWERNV_RTAS
470 pnv_setup_machdep_rtas();
471 #endif /* CONFIG_PPC_POWERNV_RTAS */
473 pr_debug("PowerNV detected !\n");
479 * Returns the cpu frequency for 'cpu' in Hz. This is used by
482 static unsigned long pnv_get_proc_freq(unsigned int cpu
)
484 unsigned long ret_freq
;
486 ret_freq
= cpufreq_quick_get(cpu
) * 1000ul;
489 * If the backend cpufreq driver does not exist,
490 * then fallback to old way of reporting the clockrate.
493 ret_freq
= ppc_proc_freq
;
497 define_machine(powernv
) {
500 .init_early
= pnv_init_early
,
501 .setup_arch
= pnv_setup_arch
,
502 .init_IRQ
= pnv_init_IRQ
,
503 .show_cpuinfo
= pnv_show_cpuinfo
,
504 .get_proc_freq
= pnv_get_proc_freq
,
505 .progress
= pnv_progress
,
506 .machine_shutdown
= pnv_shutdown
,
507 .power_save
= power7_idle
,
508 .calibrate_decr
= generic_calibrate_decr
,
509 .dma_set_mask
= pnv_dma_set_mask
,
510 .dma_get_required_mask
= pnv_dma_get_required_mask
,
512 .kexec_cpu_down
= pnv_kexec_cpu_down
,
514 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
515 .memory_block_size
= pnv_memory_block_size
,