2 * cpuidle-powernv - idle state cpuidle driver.
3 * Adapted from drivers/cpuidle/cpuidle-pseries
7 #include <linux/kernel.h>
8 #include <linux/module.h>
9 #include <linux/init.h>
10 #include <linux/moduleparam.h>
11 #include <linux/cpuidle.h>
12 #include <linux/cpu.h>
13 #include <linux/notifier.h>
14 #include <linux/clockchips.h>
16 #include <linux/slab.h>
18 #include <asm/machdep.h>
19 #include <asm/firmware.h>
21 #include <asm/runlatch.h>
23 #define POWERNV_THRESHOLD_LATENCY_NS 200000
25 struct cpuidle_driver powernv_idle_driver
= {
26 .name
= "powernv_idle",
30 static int max_idle_state
;
31 static struct cpuidle_state
*cpuidle_state_table
;
33 static u64 stop_psscr_table
[CPUIDLE_STATE_MAX
];
35 static u64 snooze_timeout
;
36 static bool snooze_timeout_en
;
38 static int snooze_loop(struct cpuidle_device
*dev
,
39 struct cpuidle_driver
*drv
,
45 set_thread_flag(TIF_POLLING_NRFLAG
);
47 snooze_exit_time
= get_tb() + snooze_timeout
;
49 while (!need_resched()) {
52 if (snooze_timeout_en
&& get_tb() > snooze_exit_time
)
58 clear_thread_flag(TIF_POLLING_NRFLAG
);
63 static int nap_loop(struct cpuidle_device
*dev
,
64 struct cpuidle_driver
*drv
,
73 /* Register for fastsleep only in oneshot mode of broadcast */
74 #ifdef CONFIG_TICK_ONESHOT
75 static int fastsleep_loop(struct cpuidle_device
*dev
,
76 struct cpuidle_driver
*drv
,
79 unsigned long old_lpcr
= mfspr(SPRN_LPCR
);
80 unsigned long new_lpcr
;
82 if (unlikely(system_state
< SYSTEM_RUNNING
))
86 /* Do not exit powersave upon decrementer as we've setup the timer
89 new_lpcr
&= ~LPCR_PECE1
;
91 mtspr(SPRN_LPCR
, new_lpcr
);
94 mtspr(SPRN_LPCR
, old_lpcr
);
100 static int stop_loop(struct cpuidle_device
*dev
,
101 struct cpuidle_driver
*drv
,
104 ppc64_runlatch_off();
105 power9_idle_stop(stop_psscr_table
[index
]);
111 * States for dedicated partition case.
113 static struct cpuidle_state powernv_states
[CPUIDLE_STATE_MAX
] = {
118 .target_residency
= 0,
119 .enter
= snooze_loop
},
122 static int powernv_cpuidle_cpu_online(unsigned int cpu
)
124 struct cpuidle_device
*dev
= per_cpu(cpuidle_devices
, cpu
);
126 if (dev
&& cpuidle_get_driver()) {
127 cpuidle_pause_and_lock();
128 cpuidle_enable_device(dev
);
129 cpuidle_resume_and_unlock();
134 static int powernv_cpuidle_cpu_dead(unsigned int cpu
)
136 struct cpuidle_device
*dev
= per_cpu(cpuidle_devices
, cpu
);
138 if (dev
&& cpuidle_get_driver()) {
139 cpuidle_pause_and_lock();
140 cpuidle_disable_device(dev
);
141 cpuidle_resume_and_unlock();
147 * powernv_cpuidle_driver_init()
149 static int powernv_cpuidle_driver_init(void)
152 struct cpuidle_driver
*drv
= &powernv_idle_driver
;
154 drv
->state_count
= 0;
156 for (idle_state
= 0; idle_state
< max_idle_state
; ++idle_state
) {
157 /* Is the state not enabled? */
158 if (cpuidle_state_table
[idle_state
].enter
== NULL
)
161 drv
->states
[drv
->state_count
] = /* structure copy */
162 cpuidle_state_table
[idle_state
];
164 drv
->state_count
+= 1;
168 * On the PowerNV platform cpu_present may be less than cpu_possible in
169 * cases when firmware detects the CPU, but it is not available to the
170 * OS. If CONFIG_HOTPLUG_CPU=n, then such CPUs are not hotplugable at
171 * run time and hence cpu_devices are not created for those CPUs by the
172 * generic topology_init().
174 * drv->cpumask defaults to cpu_possible_mask in
175 * __cpuidle_driver_init(). This breaks cpuidle on PowerNV where
176 * cpu_devices are not created for CPUs in cpu_possible_mask that
177 * cannot be hot-added later at run time.
179 * Trying cpuidle_register_device() on a CPU without a cpu_device is
180 * incorrect, so pass a correct CPU mask to the generic cpuidle driver.
183 drv
->cpumask
= (struct cpumask
*)cpu_present_mask
;
188 static int powernv_add_idle_states(void)
190 struct device_node
*power_mgt
;
191 int nr_idle_states
= 1; /* Snooze */
193 u32 latency_ns
[CPUIDLE_STATE_MAX
];
194 u32 residency_ns
[CPUIDLE_STATE_MAX
];
195 u32 flags
[CPUIDLE_STATE_MAX
];
196 u64 psscr_val
[CPUIDLE_STATE_MAX
];
197 const char *names
[CPUIDLE_STATE_MAX
];
200 /* Currently we have snooze statically defined */
202 power_mgt
= of_find_node_by_path("/ibm,opal/power-mgt");
204 pr_warn("opal: PowerMgmt Node not found\n");
208 /* Read values of any property to determine the num of idle states */
209 dt_idle_states
= of_property_count_u32_elems(power_mgt
, "ibm,cpu-idle-state-flags");
210 if (dt_idle_states
< 0) {
211 pr_warn("cpuidle-powernv: no idle states found in the DT\n");
216 * Since snooze is used as first idle state, max idle states allowed is
217 * CPUIDLE_STATE_MAX -1
219 if (dt_idle_states
> CPUIDLE_STATE_MAX
- 1) {
220 pr_warn("cpuidle-powernv: discovered idle states more than allowed");
221 dt_idle_states
= CPUIDLE_STATE_MAX
- 1;
224 if (of_property_read_u32_array(power_mgt
,
225 "ibm,cpu-idle-state-flags", flags
, dt_idle_states
)) {
226 pr_warn("cpuidle-powernv : missing ibm,cpu-idle-state-flags in DT\n");
230 if (of_property_read_u32_array(power_mgt
,
231 "ibm,cpu-idle-state-latencies-ns", latency_ns
,
233 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
236 if (of_property_read_string_array(power_mgt
,
237 "ibm,cpu-idle-state-names", names
, dt_idle_states
) < 0) {
238 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
243 * If the idle states use stop instruction, probe for psscr values
244 * which are necessary to specify required stop level.
246 if (flags
[0] & (OPAL_PM_STOP_INST_FAST
| OPAL_PM_STOP_INST_DEEP
))
247 if (of_property_read_u64_array(power_mgt
,
248 "ibm,cpu-idle-state-psscr", psscr_val
, dt_idle_states
)) {
249 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n");
253 rc
= of_property_read_u32_array(power_mgt
,
254 "ibm,cpu-idle-state-residency-ns", residency_ns
, dt_idle_states
);
256 for (i
= 0; i
< dt_idle_states
; i
++) {
258 * If an idle state has exit latency beyond
259 * POWERNV_THRESHOLD_LATENCY_NS then don't use it
262 if (latency_ns
[i
] > POWERNV_THRESHOLD_LATENCY_NS
)
266 * Cpuidle accepts exit_latency and target_residency in us.
267 * Use default target_residency values if f/w does not expose it.
269 if (flags
[i
] & OPAL_PM_NAP_ENABLED
) {
271 strcpy(powernv_states
[nr_idle_states
].name
, "Nap");
272 strcpy(powernv_states
[nr_idle_states
].desc
, "Nap");
273 powernv_states
[nr_idle_states
].flags
= 0;
274 powernv_states
[nr_idle_states
].target_residency
= 100;
275 powernv_states
[nr_idle_states
].enter
= nap_loop
;
276 } else if ((flags
[i
] & OPAL_PM_STOP_INST_FAST
) &&
277 !(flags
[i
] & OPAL_PM_TIMEBASE_STOP
)) {
278 strncpy(powernv_states
[nr_idle_states
].name
,
279 names
[i
], CPUIDLE_NAME_LEN
);
280 strncpy(powernv_states
[nr_idle_states
].desc
,
281 names
[i
], CPUIDLE_NAME_LEN
);
282 powernv_states
[nr_idle_states
].flags
= 0;
284 powernv_states
[nr_idle_states
].enter
= stop_loop
;
285 stop_psscr_table
[nr_idle_states
] = psscr_val
[i
];
289 * All cpuidle states with CPUIDLE_FLAG_TIMER_STOP set must come
290 * within this config dependency check.
292 #ifdef CONFIG_TICK_ONESHOT
293 if (flags
[i
] & OPAL_PM_SLEEP_ENABLED
||
294 flags
[i
] & OPAL_PM_SLEEP_ENABLED_ER1
) {
295 /* Add FASTSLEEP state */
296 strcpy(powernv_states
[nr_idle_states
].name
, "FastSleep");
297 strcpy(powernv_states
[nr_idle_states
].desc
, "FastSleep");
298 powernv_states
[nr_idle_states
].flags
= CPUIDLE_FLAG_TIMER_STOP
;
299 powernv_states
[nr_idle_states
].target_residency
= 300000;
300 powernv_states
[nr_idle_states
].enter
= fastsleep_loop
;
301 } else if ((flags
[i
] & OPAL_PM_STOP_INST_DEEP
) &&
302 (flags
[i
] & OPAL_PM_TIMEBASE_STOP
)) {
303 strncpy(powernv_states
[nr_idle_states
].name
,
304 names
[i
], CPUIDLE_NAME_LEN
);
305 strncpy(powernv_states
[nr_idle_states
].desc
,
306 names
[i
], CPUIDLE_NAME_LEN
);
308 powernv_states
[nr_idle_states
].flags
= CPUIDLE_FLAG_TIMER_STOP
;
309 powernv_states
[nr_idle_states
].enter
= stop_loop
;
310 stop_psscr_table
[nr_idle_states
] = psscr_val
[i
];
313 powernv_states
[nr_idle_states
].exit_latency
=
314 ((unsigned int)latency_ns
[i
]) / 1000;
317 powernv_states
[nr_idle_states
].target_residency
=
318 ((unsigned int)residency_ns
[i
]) / 1000;
324 return nr_idle_states
;
328 * powernv_idle_probe()
329 * Choose state table for shared versus dedicated partition
331 static int powernv_idle_probe(void)
333 if (cpuidle_disable
!= IDLE_NO_OVERRIDE
)
336 if (firmware_has_feature(FW_FEATURE_OPAL
)) {
337 cpuidle_state_table
= powernv_states
;
338 /* Device tree can indicate more idle states */
339 max_idle_state
= powernv_add_idle_states();
340 if (max_idle_state
> 1) {
341 snooze_timeout_en
= true;
342 snooze_timeout
= powernv_states
[1].target_residency
*
351 static int __init
powernv_processor_idle_init(void)
355 retval
= powernv_idle_probe();
359 powernv_cpuidle_driver_init();
360 retval
= cpuidle_register(&powernv_idle_driver
, NULL
);
362 printk(KERN_DEBUG
"Registration of powernv driver failed.\n");
366 retval
= cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN
,
367 "cpuidle/powernv:online",
368 powernv_cpuidle_cpu_online
, NULL
);
370 retval
= cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_DEAD
,
371 "cpuidle/powernv:dead", NULL
,
372 powernv_cpuidle_cpu_dead
);
374 printk(KERN_DEBUG
"powernv_idle_driver registered\n");
378 device_initcall(powernv_processor_idle_init
);