1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2020-2024 Intel Corporation
6 #include <linux/highmem.h>
7 #include <linux/moduleparam.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/reboot.h>
12 #include "ivpu_coredump.h"
15 #include "ivpu_fw_log.h"
19 #include "ivpu_jsm_msg.h"
23 #include "ivpu_trace.h"
24 #include "vpu_boot_api.h"
26 static bool ivpu_disable_recovery
;
27 #if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG)
28 module_param_named_unsafe(disable_recovery
, ivpu_disable_recovery
, bool, 0644);
29 MODULE_PARM_DESC(disable_recovery
, "Disables recovery when NPU hang is detected");
32 static unsigned long ivpu_tdr_timeout_ms
;
33 module_param_named(tdr_timeout_ms
, ivpu_tdr_timeout_ms
, ulong
, 0644);
34 MODULE_PARM_DESC(tdr_timeout_ms
, "Timeout for device hang detection, in milliseconds, 0 - default");
36 #define PM_RESCHEDULE_LIMIT 5
38 static void ivpu_pm_prepare_cold_boot(struct ivpu_device
*vdev
)
40 struct ivpu_fw_info
*fw
= vdev
->fw
;
42 ivpu_cmdq_reset_all_contexts(vdev
);
44 ivpu_fw_log_reset(vdev
);
46 fw
->entry_point
= fw
->cold_boot_entry_point
;
49 static void ivpu_pm_prepare_warm_boot(struct ivpu_device
*vdev
)
51 struct ivpu_fw_info
*fw
= vdev
->fw
;
52 struct vpu_boot_params
*bp
= ivpu_bo_vaddr(fw
->mem
);
54 if (!bp
->save_restore_ret_address
) {
55 ivpu_pm_prepare_cold_boot(vdev
);
59 ivpu_dbg(vdev
, FW_BOOT
, "Save/restore entry point %llx", bp
->save_restore_ret_address
);
60 fw
->entry_point
= bp
->save_restore_ret_address
;
63 static int ivpu_suspend(struct ivpu_device
*vdev
)
67 ivpu_prepare_for_reset(vdev
);
69 ret
= ivpu_shutdown(vdev
);
71 ivpu_err(vdev
, "Failed to shutdown NPU: %d\n", ret
);
76 static int ivpu_resume(struct ivpu_device
*vdev
)
81 pci_restore_state(to_pci_dev(vdev
->drm
.dev
));
82 pci_set_power_state(to_pci_dev(vdev
->drm
.dev
), PCI_D0
);
84 ret
= ivpu_hw_power_up(vdev
);
86 ivpu_err(vdev
, "Failed to power up HW: %d\n", ret
);
90 ret
= ivpu_mmu_enable(vdev
);
92 ivpu_err(vdev
, "Failed to resume MMU: %d\n", ret
);
96 ret
= ivpu_boot(vdev
);
103 ivpu_mmu_disable(vdev
);
105 ivpu_hw_power_down(vdev
);
106 pci_set_power_state(to_pci_dev(vdev
->drm
.dev
), PCI_D3hot
);
108 if (!ivpu_fw_is_cold_boot(vdev
)) {
109 ivpu_pm_prepare_cold_boot(vdev
);
112 ivpu_err(vdev
, "Failed to resume the FW: %d\n", ret
);
118 static void ivpu_pm_recovery_work(struct work_struct
*work
)
120 struct ivpu_pm_info
*pm
= container_of(work
, struct ivpu_pm_info
, recovery_work
);
121 struct ivpu_device
*vdev
= pm
->vdev
;
122 char *evt
[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL
};
125 ivpu_err(vdev
, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev
->pm
->reset_counter
));
127 ret
= pm_runtime_resume_and_get(vdev
->drm
.dev
);
129 ivpu_err(vdev
, "Failed to resume NPU: %d\n", ret
);
131 ivpu_jsm_state_dump(vdev
);
132 ivpu_dev_coredump(vdev
);
134 atomic_inc(&vdev
->pm
->reset_counter
);
135 atomic_set(&vdev
->pm
->reset_pending
, 1);
136 down_write(&vdev
->pm
->reset_lock
);
139 ivpu_pm_prepare_cold_boot(vdev
);
140 ivpu_jobs_abort_all(vdev
);
141 ivpu_ms_cleanup_all(vdev
);
143 ret
= ivpu_resume(vdev
);
145 ivpu_err(vdev
, "Failed to resume NPU: %d\n", ret
);
147 up_write(&vdev
->pm
->reset_lock
);
148 atomic_set(&vdev
->pm
->reset_pending
, 0);
150 kobject_uevent_env(&vdev
->drm
.dev
->kobj
, KOBJ_CHANGE
, evt
);
151 pm_runtime_mark_last_busy(vdev
->drm
.dev
);
152 pm_runtime_put_autosuspend(vdev
->drm
.dev
);
155 void ivpu_pm_trigger_recovery(struct ivpu_device
*vdev
, const char *reason
)
157 ivpu_err(vdev
, "Recovery triggered by %s\n", reason
);
159 if (ivpu_disable_recovery
) {
160 ivpu_err(vdev
, "Recovery not available when disable_recovery param is set\n");
164 if (ivpu_is_fpga(vdev
)) {
165 ivpu_err(vdev
, "Recovery not available on FPGA\n");
169 /* Trigger recovery if it's not in progress */
170 if (atomic_cmpxchg(&vdev
->pm
->reset_pending
, 0, 1) == 0) {
171 ivpu_hw_diagnose_failure(vdev
);
172 ivpu_hw_irq_disable(vdev
); /* Disable IRQ early to protect from IRQ storm */
173 queue_work(system_long_wq
, &vdev
->pm
->recovery_work
);
177 static void ivpu_job_timeout_work(struct work_struct
*work
)
179 struct ivpu_pm_info
*pm
= container_of(work
, struct ivpu_pm_info
, job_timeout_work
.work
);
180 struct ivpu_device
*vdev
= pm
->vdev
;
182 ivpu_pm_trigger_recovery(vdev
, "TDR");
185 void ivpu_start_job_timeout_detection(struct ivpu_device
*vdev
)
187 unsigned long timeout_ms
= ivpu_tdr_timeout_ms
? ivpu_tdr_timeout_ms
: vdev
->timeout
.tdr
;
189 /* No-op if already queued */
190 queue_delayed_work(system_wq
, &vdev
->pm
->job_timeout_work
, msecs_to_jiffies(timeout_ms
));
193 void ivpu_stop_job_timeout_detection(struct ivpu_device
*vdev
)
195 cancel_delayed_work_sync(&vdev
->pm
->job_timeout_work
);
198 int ivpu_pm_suspend_cb(struct device
*dev
)
200 struct drm_device
*drm
= dev_get_drvdata(dev
);
201 struct ivpu_device
*vdev
= to_ivpu_device(drm
);
202 unsigned long timeout
;
205 ivpu_dbg(vdev
, PM
, "Suspend..\n");
207 timeout
= jiffies
+ msecs_to_jiffies(vdev
->timeout
.tdr
);
208 while (!ivpu_hw_is_idle(vdev
)) {
210 if (time_after_eq(jiffies
, timeout
)) {
211 ivpu_err(vdev
, "Failed to enter idle on system suspend\n");
216 ivpu_jsm_pwr_d0i3_enter(vdev
);
219 ivpu_pm_prepare_warm_boot(vdev
);
221 ivpu_dbg(vdev
, PM
, "Suspend done.\n");
222 trace_pm("suspend done");
227 int ivpu_pm_resume_cb(struct device
*dev
)
229 struct drm_device
*drm
= dev_get_drvdata(dev
);
230 struct ivpu_device
*vdev
= to_ivpu_device(drm
);
234 ivpu_dbg(vdev
, PM
, "Resume..\n");
236 ret
= ivpu_resume(vdev
);
238 ivpu_err(vdev
, "Failed to resume: %d\n", ret
);
240 ivpu_dbg(vdev
, PM
, "Resume done.\n");
241 trace_pm("resume done");
246 int ivpu_pm_runtime_suspend_cb(struct device
*dev
)
248 struct drm_device
*drm
= dev_get_drvdata(dev
);
249 struct ivpu_device
*vdev
= to_ivpu_device(drm
);
253 drm_WARN_ON(&vdev
->drm
, !xa_empty(&vdev
->submitted_jobs_xa
));
254 drm_WARN_ON(&vdev
->drm
, work_pending(&vdev
->pm
->recovery_work
));
256 trace_pm("runtime suspend");
257 ivpu_dbg(vdev
, PM
, "Runtime suspend..\n");
259 ivpu_mmu_disable(vdev
);
261 is_idle
= ivpu_hw_is_idle(vdev
) || vdev
->pm
->dct_active_percent
;
263 ivpu_err(vdev
, "NPU is not idle before autosuspend\n");
265 ret_d0i3
= ivpu_jsm_pwr_d0i3_enter(vdev
);
267 ivpu_err(vdev
, "Failed to prepare for d0i3: %d\n", ret_d0i3
);
269 ret
= ivpu_suspend(vdev
);
271 ivpu_err(vdev
, "Failed to suspend NPU: %d\n", ret
);
273 if (!is_idle
|| ret_d0i3
) {
274 ivpu_err(vdev
, "Forcing cold boot due to previous errors\n");
275 atomic_inc(&vdev
->pm
->reset_counter
);
276 ivpu_dev_coredump(vdev
);
277 ivpu_pm_prepare_cold_boot(vdev
);
279 ivpu_pm_prepare_warm_boot(vdev
);
282 ivpu_dbg(vdev
, PM
, "Runtime suspend done.\n");
283 trace_pm("runtime suspend done");
288 int ivpu_pm_runtime_resume_cb(struct device
*dev
)
290 struct drm_device
*drm
= dev_get_drvdata(dev
);
291 struct ivpu_device
*vdev
= to_ivpu_device(drm
);
294 trace_pm("runtime resume");
295 ivpu_dbg(vdev
, PM
, "Runtime resume..\n");
297 ret
= ivpu_resume(vdev
);
299 ivpu_err(vdev
, "Failed to set RESUME state: %d\n", ret
);
301 ivpu_dbg(vdev
, PM
, "Runtime resume done.\n");
302 trace_pm("runtime resume done");
307 int ivpu_rpm_get(struct ivpu_device
*vdev
)
311 ret
= pm_runtime_resume_and_get(vdev
->drm
.dev
);
312 drm_WARN_ON(&vdev
->drm
, ret
< 0);
317 void ivpu_rpm_put(struct ivpu_device
*vdev
)
319 pm_runtime_mark_last_busy(vdev
->drm
.dev
);
320 pm_runtime_put_autosuspend(vdev
->drm
.dev
);
323 void ivpu_pm_reset_prepare_cb(struct pci_dev
*pdev
)
325 struct ivpu_device
*vdev
= pci_get_drvdata(pdev
);
327 ivpu_dbg(vdev
, PM
, "Pre-reset..\n");
328 atomic_inc(&vdev
->pm
->reset_counter
);
329 atomic_set(&vdev
->pm
->reset_pending
, 1);
331 pm_runtime_get_sync(vdev
->drm
.dev
);
332 down_write(&vdev
->pm
->reset_lock
);
333 ivpu_prepare_for_reset(vdev
);
335 ivpu_pm_prepare_cold_boot(vdev
);
336 ivpu_jobs_abort_all(vdev
);
337 ivpu_ms_cleanup_all(vdev
);
339 ivpu_dbg(vdev
, PM
, "Pre-reset done.\n");
342 void ivpu_pm_reset_done_cb(struct pci_dev
*pdev
)
344 struct ivpu_device
*vdev
= pci_get_drvdata(pdev
);
347 ivpu_dbg(vdev
, PM
, "Post-reset..\n");
348 ret
= ivpu_resume(vdev
);
350 ivpu_err(vdev
, "Failed to set RESUME state: %d\n", ret
);
351 up_write(&vdev
->pm
->reset_lock
);
352 atomic_set(&vdev
->pm
->reset_pending
, 0);
353 ivpu_dbg(vdev
, PM
, "Post-reset done.\n");
355 pm_runtime_mark_last_busy(vdev
->drm
.dev
);
356 pm_runtime_put_autosuspend(vdev
->drm
.dev
);
359 void ivpu_pm_init(struct ivpu_device
*vdev
)
361 struct device
*dev
= vdev
->drm
.dev
;
362 struct ivpu_pm_info
*pm
= vdev
->pm
;
367 init_rwsem(&pm
->reset_lock
);
368 atomic_set(&pm
->reset_pending
, 0);
369 atomic_set(&pm
->reset_counter
, 0);
371 INIT_WORK(&pm
->recovery_work
, ivpu_pm_recovery_work
);
372 INIT_DELAYED_WORK(&pm
->job_timeout_work
, ivpu_job_timeout_work
);
374 if (ivpu_disable_recovery
)
377 delay
= vdev
->timeout
.autosuspend
;
379 pm_runtime_use_autosuspend(dev
);
380 pm_runtime_set_autosuspend_delay(dev
, delay
);
382 ivpu_dbg(vdev
, PM
, "Autosuspend delay = %d\n", delay
);
385 void ivpu_pm_cancel_recovery(struct ivpu_device
*vdev
)
387 drm_WARN_ON(&vdev
->drm
, delayed_work_pending(&vdev
->pm
->job_timeout_work
));
388 cancel_work_sync(&vdev
->pm
->recovery_work
);
391 void ivpu_pm_enable(struct ivpu_device
*vdev
)
393 struct device
*dev
= vdev
->drm
.dev
;
395 pm_runtime_set_active(dev
);
396 pm_runtime_allow(dev
);
397 pm_runtime_mark_last_busy(dev
);
398 pm_runtime_put_autosuspend(dev
);
401 void ivpu_pm_disable(struct ivpu_device
*vdev
)
403 pm_runtime_get_noresume(vdev
->drm
.dev
);
404 pm_runtime_forbid(vdev
->drm
.dev
);
407 int ivpu_pm_dct_init(struct ivpu_device
*vdev
)
409 if (vdev
->pm
->dct_active_percent
)
410 return ivpu_pm_dct_enable(vdev
, vdev
->pm
->dct_active_percent
);
415 int ivpu_pm_dct_enable(struct ivpu_device
*vdev
, u8 active_percent
)
417 u32 active_us
, inactive_us
;
420 if (active_percent
== 0 || active_percent
> 100)
423 active_us
= (DCT_PERIOD_US
* active_percent
) / 100;
424 inactive_us
= DCT_PERIOD_US
- active_us
;
426 ret
= ivpu_jsm_dct_enable(vdev
, active_us
, inactive_us
);
428 ivpu_err_ratelimited(vdev
, "Failed to enable DCT: %d\n", ret
);
432 vdev
->pm
->dct_active_percent
= active_percent
;
434 ivpu_dbg(vdev
, PM
, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n",
435 active_percent
, active_us
, inactive_us
);
439 int ivpu_pm_dct_disable(struct ivpu_device
*vdev
)
443 ret
= ivpu_jsm_dct_disable(vdev
);
445 ivpu_err_ratelimited(vdev
, "Failed to disable DCT: %d\n", ret
);
449 vdev
->pm
->dct_active_percent
= 0;
451 ivpu_dbg(vdev
, PM
, "DCT disabled\n");
455 void ivpu_pm_dct_irq_thread_handler(struct ivpu_device
*vdev
)
460 if (ivpu_hw_btrs_dct_get_request(vdev
, &enable
))
463 if (vdev
->pm
->dct_active_percent
)
464 ret
= ivpu_pm_dct_enable(vdev
, DCT_DEFAULT_ACTIVE_PERCENT
);
466 ret
= ivpu_pm_dct_disable(vdev
);
469 ivpu_hw_btrs_dct_set_status(vdev
, enable
, vdev
->pm
->dct_active_percent
);