1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2020-2024 Intel Corporation
6 #include <linux/highmem.h>
7 #include <linux/moduleparam.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/reboot.h>
12 #include "vpu_boot_api.h"
16 #include "ivpu_fw_log.h"
19 #include "ivpu_jsm_msg.h"
24 static bool ivpu_disable_recovery
;
25 module_param_named_unsafe(disable_recovery
, ivpu_disable_recovery
, bool, 0644);
26 MODULE_PARM_DESC(disable_recovery
, "Disables recovery when NPU hang is detected");
28 static unsigned long ivpu_tdr_timeout_ms
;
29 module_param_named(tdr_timeout_ms
, ivpu_tdr_timeout_ms
, ulong
, 0644);
30 MODULE_PARM_DESC(tdr_timeout_ms
, "Timeout for device hang detection, in milliseconds, 0 - default");
32 #define PM_RESCHEDULE_LIMIT 5
34 static void ivpu_pm_prepare_cold_boot(struct ivpu_device
*vdev
)
36 struct ivpu_fw_info
*fw
= vdev
->fw
;
38 ivpu_cmdq_reset_all_contexts(vdev
);
41 fw
->entry_point
= fw
->cold_boot_entry_point
;
44 static void ivpu_pm_prepare_warm_boot(struct ivpu_device
*vdev
)
46 struct ivpu_fw_info
*fw
= vdev
->fw
;
47 struct vpu_boot_params
*bp
= ivpu_bo_vaddr(fw
->mem
);
49 if (!bp
->save_restore_ret_address
) {
50 ivpu_pm_prepare_cold_boot(vdev
);
54 ivpu_dbg(vdev
, FW_BOOT
, "Save/restore entry point %llx", bp
->save_restore_ret_address
);
55 fw
->entry_point
= bp
->save_restore_ret_address
;
58 static int ivpu_suspend(struct ivpu_device
*vdev
)
62 ivpu_prepare_for_reset(vdev
);
64 ret
= ivpu_shutdown(vdev
);
66 ivpu_err(vdev
, "Failed to shutdown NPU: %d\n", ret
);
71 static int ivpu_resume(struct ivpu_device
*vdev
)
76 pci_restore_state(to_pci_dev(vdev
->drm
.dev
));
77 pci_set_power_state(to_pci_dev(vdev
->drm
.dev
), PCI_D0
);
79 ret
= ivpu_hw_power_up(vdev
);
81 ivpu_err(vdev
, "Failed to power up HW: %d\n", ret
);
85 ret
= ivpu_mmu_enable(vdev
);
87 ivpu_err(vdev
, "Failed to resume MMU: %d\n", ret
);
91 ret
= ivpu_boot(vdev
);
98 ivpu_mmu_disable(vdev
);
100 ivpu_hw_power_down(vdev
);
101 pci_set_power_state(to_pci_dev(vdev
->drm
.dev
), PCI_D3hot
);
103 if (!ivpu_fw_is_cold_boot(vdev
)) {
104 ivpu_pm_prepare_cold_boot(vdev
);
107 ivpu_err(vdev
, "Failed to resume the FW: %d\n", ret
);
113 static void ivpu_pm_recovery_work(struct work_struct
*work
)
115 struct ivpu_pm_info
*pm
= container_of(work
, struct ivpu_pm_info
, recovery_work
);
116 struct ivpu_device
*vdev
= pm
->vdev
;
117 char *evt
[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL
};
120 ivpu_err(vdev
, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev
->pm
->reset_counter
));
122 ret
= pm_runtime_resume_and_get(vdev
->drm
.dev
);
124 ivpu_err(vdev
, "Failed to resume NPU: %d\n", ret
);
126 ivpu_fw_log_dump(vdev
);
128 atomic_inc(&vdev
->pm
->reset_counter
);
129 atomic_set(&vdev
->pm
->reset_pending
, 1);
130 down_write(&vdev
->pm
->reset_lock
);
133 ivpu_pm_prepare_cold_boot(vdev
);
134 ivpu_jobs_abort_all(vdev
);
135 ivpu_ms_cleanup_all(vdev
);
137 ret
= ivpu_resume(vdev
);
139 ivpu_err(vdev
, "Failed to resume NPU: %d\n", ret
);
141 up_write(&vdev
->pm
->reset_lock
);
142 atomic_set(&vdev
->pm
->reset_pending
, 0);
144 kobject_uevent_env(&vdev
->drm
.dev
->kobj
, KOBJ_CHANGE
, evt
);
145 pm_runtime_mark_last_busy(vdev
->drm
.dev
);
146 pm_runtime_put_autosuspend(vdev
->drm
.dev
);
149 void ivpu_pm_trigger_recovery(struct ivpu_device
*vdev
, const char *reason
)
151 ivpu_err(vdev
, "Recovery triggered by %s\n", reason
);
153 if (ivpu_disable_recovery
) {
154 ivpu_err(vdev
, "Recovery not available when disable_recovery param is set\n");
158 if (ivpu_is_fpga(vdev
)) {
159 ivpu_err(vdev
, "Recovery not available on FPGA\n");
163 /* Trigger recovery if it's not in progress */
164 if (atomic_cmpxchg(&vdev
->pm
->reset_pending
, 0, 1) == 0) {
165 ivpu_hw_diagnose_failure(vdev
);
166 ivpu_hw_irq_disable(vdev
); /* Disable IRQ early to protect from IRQ storm */
167 queue_work(system_long_wq
, &vdev
->pm
->recovery_work
);
171 static void ivpu_job_timeout_work(struct work_struct
*work
)
173 struct ivpu_pm_info
*pm
= container_of(work
, struct ivpu_pm_info
, job_timeout_work
.work
);
174 struct ivpu_device
*vdev
= pm
->vdev
;
176 ivpu_pm_trigger_recovery(vdev
, "TDR");
179 void ivpu_start_job_timeout_detection(struct ivpu_device
*vdev
)
181 unsigned long timeout_ms
= ivpu_tdr_timeout_ms
? ivpu_tdr_timeout_ms
: vdev
->timeout
.tdr
;
183 /* No-op if already queued */
184 queue_delayed_work(system_wq
, &vdev
->pm
->job_timeout_work
, msecs_to_jiffies(timeout_ms
));
187 void ivpu_stop_job_timeout_detection(struct ivpu_device
*vdev
)
189 cancel_delayed_work_sync(&vdev
->pm
->job_timeout_work
);
192 int ivpu_pm_suspend_cb(struct device
*dev
)
194 struct drm_device
*drm
= dev_get_drvdata(dev
);
195 struct ivpu_device
*vdev
= to_ivpu_device(drm
);
196 unsigned long timeout
;
198 ivpu_dbg(vdev
, PM
, "Suspend..\n");
200 timeout
= jiffies
+ msecs_to_jiffies(vdev
->timeout
.tdr
);
201 while (!ivpu_hw_is_idle(vdev
)) {
203 if (time_after_eq(jiffies
, timeout
)) {
204 ivpu_err(vdev
, "Failed to enter idle on system suspend\n");
209 ivpu_jsm_pwr_d0i3_enter(vdev
);
212 ivpu_pm_prepare_warm_boot(vdev
);
214 ivpu_dbg(vdev
, PM
, "Suspend done.\n");
219 int ivpu_pm_resume_cb(struct device
*dev
)
221 struct drm_device
*drm
= dev_get_drvdata(dev
);
222 struct ivpu_device
*vdev
= to_ivpu_device(drm
);
225 ivpu_dbg(vdev
, PM
, "Resume..\n");
227 ret
= ivpu_resume(vdev
);
229 ivpu_err(vdev
, "Failed to resume: %d\n", ret
);
231 ivpu_dbg(vdev
, PM
, "Resume done.\n");
236 int ivpu_pm_runtime_suspend_cb(struct device
*dev
)
238 struct drm_device
*drm
= dev_get_drvdata(dev
);
239 struct ivpu_device
*vdev
= to_ivpu_device(drm
);
243 drm_WARN_ON(&vdev
->drm
, !xa_empty(&vdev
->submitted_jobs_xa
));
244 drm_WARN_ON(&vdev
->drm
, work_pending(&vdev
->pm
->recovery_work
));
246 ivpu_dbg(vdev
, PM
, "Runtime suspend..\n");
248 ivpu_mmu_disable(vdev
);
250 is_idle
= ivpu_hw_is_idle(vdev
) || vdev
->pm
->dct_active_percent
;
252 ivpu_err(vdev
, "NPU is not idle before autosuspend\n");
254 ret_d0i3
= ivpu_jsm_pwr_d0i3_enter(vdev
);
256 ivpu_err(vdev
, "Failed to prepare for d0i3: %d\n", ret_d0i3
);
258 ret
= ivpu_suspend(vdev
);
260 ivpu_err(vdev
, "Failed to suspend NPU: %d\n", ret
);
262 if (!is_idle
|| ret_d0i3
) {
263 ivpu_err(vdev
, "Forcing cold boot due to previous errors\n");
264 atomic_inc(&vdev
->pm
->reset_counter
);
265 ivpu_fw_log_dump(vdev
);
266 ivpu_pm_prepare_cold_boot(vdev
);
268 ivpu_pm_prepare_warm_boot(vdev
);
271 ivpu_dbg(vdev
, PM
, "Runtime suspend done.\n");
276 int ivpu_pm_runtime_resume_cb(struct device
*dev
)
278 struct drm_device
*drm
= dev_get_drvdata(dev
);
279 struct ivpu_device
*vdev
= to_ivpu_device(drm
);
282 ivpu_dbg(vdev
, PM
, "Runtime resume..\n");
284 ret
= ivpu_resume(vdev
);
286 ivpu_err(vdev
, "Failed to set RESUME state: %d\n", ret
);
288 ivpu_dbg(vdev
, PM
, "Runtime resume done.\n");
293 int ivpu_rpm_get(struct ivpu_device
*vdev
)
297 ret
= pm_runtime_resume_and_get(vdev
->drm
.dev
);
298 drm_WARN_ON(&vdev
->drm
, ret
< 0);
303 void ivpu_rpm_put(struct ivpu_device
*vdev
)
305 pm_runtime_mark_last_busy(vdev
->drm
.dev
);
306 pm_runtime_put_autosuspend(vdev
->drm
.dev
);
309 void ivpu_pm_reset_prepare_cb(struct pci_dev
*pdev
)
311 struct ivpu_device
*vdev
= pci_get_drvdata(pdev
);
313 ivpu_dbg(vdev
, PM
, "Pre-reset..\n");
314 atomic_inc(&vdev
->pm
->reset_counter
);
315 atomic_set(&vdev
->pm
->reset_pending
, 1);
317 pm_runtime_get_sync(vdev
->drm
.dev
);
318 down_write(&vdev
->pm
->reset_lock
);
319 ivpu_prepare_for_reset(vdev
);
321 ivpu_pm_prepare_cold_boot(vdev
);
322 ivpu_jobs_abort_all(vdev
);
323 ivpu_ms_cleanup_all(vdev
);
325 ivpu_dbg(vdev
, PM
, "Pre-reset done.\n");
328 void ivpu_pm_reset_done_cb(struct pci_dev
*pdev
)
330 struct ivpu_device
*vdev
= pci_get_drvdata(pdev
);
333 ivpu_dbg(vdev
, PM
, "Post-reset..\n");
334 ret
= ivpu_resume(vdev
);
336 ivpu_err(vdev
, "Failed to set RESUME state: %d\n", ret
);
337 up_write(&vdev
->pm
->reset_lock
);
338 atomic_set(&vdev
->pm
->reset_pending
, 0);
339 ivpu_dbg(vdev
, PM
, "Post-reset done.\n");
341 pm_runtime_mark_last_busy(vdev
->drm
.dev
);
342 pm_runtime_put_autosuspend(vdev
->drm
.dev
);
345 void ivpu_pm_init(struct ivpu_device
*vdev
)
347 struct device
*dev
= vdev
->drm
.dev
;
348 struct ivpu_pm_info
*pm
= vdev
->pm
;
353 init_rwsem(&pm
->reset_lock
);
354 atomic_set(&pm
->reset_pending
, 0);
355 atomic_set(&pm
->reset_counter
, 0);
357 INIT_WORK(&pm
->recovery_work
, ivpu_pm_recovery_work
);
358 INIT_DELAYED_WORK(&pm
->job_timeout_work
, ivpu_job_timeout_work
);
360 if (ivpu_disable_recovery
)
363 delay
= vdev
->timeout
.autosuspend
;
365 pm_runtime_use_autosuspend(dev
);
366 pm_runtime_set_autosuspend_delay(dev
, delay
);
368 ivpu_dbg(vdev
, PM
, "Autosuspend delay = %d\n", delay
);
371 void ivpu_pm_cancel_recovery(struct ivpu_device
*vdev
)
373 drm_WARN_ON(&vdev
->drm
, delayed_work_pending(&vdev
->pm
->job_timeout_work
));
374 cancel_work_sync(&vdev
->pm
->recovery_work
);
377 void ivpu_pm_enable(struct ivpu_device
*vdev
)
379 struct device
*dev
= vdev
->drm
.dev
;
381 pm_runtime_set_active(dev
);
382 pm_runtime_allow(dev
);
383 pm_runtime_mark_last_busy(dev
);
384 pm_runtime_put_autosuspend(dev
);
387 void ivpu_pm_disable(struct ivpu_device
*vdev
)
389 pm_runtime_get_noresume(vdev
->drm
.dev
);
390 pm_runtime_forbid(vdev
->drm
.dev
);
393 int ivpu_pm_dct_init(struct ivpu_device
*vdev
)
395 if (vdev
->pm
->dct_active_percent
)
396 return ivpu_pm_dct_enable(vdev
, vdev
->pm
->dct_active_percent
);
401 int ivpu_pm_dct_enable(struct ivpu_device
*vdev
, u8 active_percent
)
403 u32 active_us
, inactive_us
;
406 if (active_percent
== 0 || active_percent
> 100)
409 active_us
= (DCT_PERIOD_US
* active_percent
) / 100;
410 inactive_us
= DCT_PERIOD_US
- active_us
;
412 ret
= ivpu_jsm_dct_enable(vdev
, active_us
, inactive_us
);
414 ivpu_err_ratelimited(vdev
, "Filed to enable DCT: %d\n", ret
);
418 vdev
->pm
->dct_active_percent
= active_percent
;
420 ivpu_dbg(vdev
, PM
, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n",
421 active_percent
, active_us
, inactive_us
);
425 int ivpu_pm_dct_disable(struct ivpu_device
*vdev
)
429 ret
= ivpu_jsm_dct_disable(vdev
);
431 ivpu_err_ratelimited(vdev
, "Filed to disable DCT: %d\n", ret
);
435 vdev
->pm
->dct_active_percent
= 0;
437 ivpu_dbg(vdev
, PM
, "DCT disabled\n");
441 void ivpu_pm_dct_irq_thread_handler(struct ivpu_device
*vdev
)
446 if (ivpu_hw_btrs_dct_get_request(vdev
, &enable
))
449 if (vdev
->pm
->dct_active_percent
)
450 ret
= ivpu_pm_dct_enable(vdev
, DCT_DEFAULT_ACTIVE_PERCENT
);
452 ret
= ivpu_pm_dct_disable(vdev
);
455 ivpu_hw_btrs_dct_set_status(vdev
, enable
, vdev
->pm
->dct_active_percent
);