2 * SPDX-License-Identifier: MIT
4 * Copyright © 2019 Intel Corporation
9 #include "intel_breadcrumbs.h"
10 #include "intel_context.h"
11 #include "intel_engine.h"
12 #include "intel_engine_heartbeat.h"
13 #include "intel_engine_pm.h"
15 #include "intel_gt_pm.h"
16 #include "intel_rc6.h"
17 #include "intel_ring.h"
18 #include "shmem_utils.h"
20 static void dbg_poison_ce(struct intel_context
*ce
)
22 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM
))
26 struct drm_i915_gem_object
*obj
= ce
->state
->obj
;
27 int type
= i915_coherent_map_type(ce
->engine
->i915
);
30 map
= i915_gem_object_pin_map(obj
, type
);
32 memset(map
, CONTEXT_REDZONE
, obj
->base
.size
);
33 i915_gem_object_flush_map(obj
);
34 i915_gem_object_unpin_map(obj
);
39 static int __engine_unpark(struct intel_wakeref
*wf
)
41 struct intel_engine_cs
*engine
=
42 container_of(wf
, typeof(*engine
), wakeref
);
43 struct intel_context
*ce
;
45 ENGINE_TRACE(engine
, "\n");
47 intel_gt_pm_get(engine
->gt
);
49 /* Discard stale context state from across idling */
50 ce
= engine
->kernel_context
;
52 GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT
, &ce
->flags
));
54 /* Flush all pending HW writes before we touch the context */
55 while (unlikely(intel_context_inflight(ce
)))
56 intel_engine_flush_submission(engine
);
58 /* First poison the image to verify we never fully trust it */
61 /* Scrub the context image after our loss of control */
66 engine
->unpark(engine
);
68 intel_engine_unpark_heartbeat(engine
);
72 #if IS_ENABLED(CONFIG_LOCKDEP)
74 static inline unsigned long __timeline_mark_lock(struct intel_context
*ce
)
78 local_irq_save(flags
);
79 mutex_acquire(&ce
->timeline
->mutex
.dep_map
, 2, 0, _THIS_IP_
);
84 static inline void __timeline_mark_unlock(struct intel_context
*ce
,
87 mutex_release(&ce
->timeline
->mutex
.dep_map
, _THIS_IP_
);
88 local_irq_restore(flags
);
93 static inline unsigned long __timeline_mark_lock(struct intel_context
*ce
)
98 static inline void __timeline_mark_unlock(struct intel_context
*ce
,
103 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
105 static void duration(struct dma_fence
*fence
, struct dma_fence_cb
*cb
)
107 struct i915_request
*rq
= to_request(fence
);
109 ewma__engine_latency_add(&rq
->engine
->latency
,
110 ktime_us_delta(rq
->fence
.timestamp
,
111 rq
->duration
.emitted
));
115 __queue_and_release_pm(struct i915_request
*rq
,
116 struct intel_timeline
*tl
,
117 struct intel_engine_cs
*engine
)
119 struct intel_gt_timelines
*timelines
= &engine
->gt
->timelines
;
121 ENGINE_TRACE(engine
, "parking\n");
124 * We have to serialise all potential retirement paths with our
125 * submission, as we don't want to underflow either the
126 * engine->wakeref.counter or our timeline->active_count.
128 * Equally, we cannot allow a new submission to start until
129 * after we finish queueing, nor could we allow that submitter
130 * to retire us before we are ready!
132 spin_lock(&timelines
->lock
);
134 /* Let intel_gt_retire_requests() retire us (acquired under lock) */
135 if (!atomic_fetch_inc(&tl
->active_count
))
136 list_add_tail(&tl
->link
, &timelines
->active_list
);
138 /* Hand the request over to HW and so engine_retire() */
139 __i915_request_queue(rq
, NULL
);
141 /* Let new submissions commence (and maybe retire this timeline) */
142 __intel_wakeref_defer_park(&engine
->wakeref
);
144 spin_unlock(&timelines
->lock
);
147 static bool switch_to_kernel_context(struct intel_engine_cs
*engine
)
149 struct intel_context
*ce
= engine
->kernel_context
;
150 struct i915_request
*rq
;
154 /* GPU is pointing to the void, as good as in the kernel context. */
155 if (intel_gt_is_wedged(engine
->gt
))
158 GEM_BUG_ON(!intel_context_is_barrier(ce
));
159 GEM_BUG_ON(ce
->timeline
->hwsp_ggtt
!= engine
->status_page
.vma
);
161 /* Already inside the kernel context, safe to power down. */
162 if (engine
->wakeref_serial
== engine
->serial
)
166 * Note, we do this without taking the timeline->mutex. We cannot
167 * as we may be called while retiring the kernel context and so
168 * already underneath the timeline->mutex. Instead we rely on the
169 * exclusive property of the __engine_park that prevents anyone
170 * else from creating a request on this engine. This also requires
171 * that the ring is empty and we avoid any waits while constructing
172 * the context, as they assume protection by the timeline->mutex.
173 * This should hold true as we can only park the engine after
174 * retiring the last request, thus all rings should be empty and
175 * all timelines idle.
177 * For unlocking, there are 2 other parties and the GPU who have a
180 * A new gpu user will be waiting on the engine-pm to start their
181 * engine_unpark. New waiters are predicated on engine->wakeref.count
182 * and so intel_wakeref_defer_park() acts like a mutex_unlock of the
185 * The other party is intel_gt_retire_requests(), which is walking the
186 * list of active timelines looking for completions. Meanwhile as soon
187 * as we call __i915_request_queue(), the GPU may complete our request.
188 * Ergo, if we put ourselves on the timelines.active_list
189 * (se intel_timeline_enter()) before we increment the
190 * engine->wakeref.count, we may see the request completion and retire
191 * it causing an underflow of the engine->wakeref.
193 flags
= __timeline_mark_lock(ce
);
194 GEM_BUG_ON(atomic_read(&ce
->timeline
->active_count
) < 0);
196 rq
= __i915_request_create(ce
, GFP_NOWAIT
);
198 /* Context switch failed, hope for the best! Maybe reset? */
201 /* Check again on the next retirement. */
202 engine
->wakeref_serial
= engine
->serial
+ 1;
203 i915_request_add_active_barriers(rq
);
205 /* Install ourselves as a preemption barrier */
206 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
207 if (likely(!__i915_request_commit(rq
))) { /* engine should be idle! */
209 * Use an interrupt for precise measurement of duration,
210 * otherwise we rely on someone else retiring all the requests
211 * which may delay the signaling (i.e. we will likely wait
212 * until the background request retirement running every
215 BUILD_BUG_ON(sizeof(rq
->duration
) > sizeof(rq
->submitq
));
216 dma_fence_add_callback(&rq
->fence
, &rq
->duration
.cb
, duration
);
217 rq
->duration
.emitted
= ktime_get();
220 /* Expose ourselves to the world */
221 __queue_and_release_pm(rq
, ce
->timeline
, engine
);
225 __timeline_mark_unlock(ce
, flags
);
229 static void call_idle_barriers(struct intel_engine_cs
*engine
)
231 struct llist_node
*node
, *next
;
233 llist_for_each_safe(node
, next
, llist_del_all(&engine
->barrier_tasks
)) {
234 struct dma_fence_cb
*cb
=
235 container_of((struct list_head
*)node
,
238 cb
->func(ERR_PTR(-EAGAIN
), cb
);
242 static int __engine_park(struct intel_wakeref
*wf
)
244 struct intel_engine_cs
*engine
=
245 container_of(wf
, typeof(*engine
), wakeref
);
247 engine
->saturated
= 0;
250 * If one and only one request is completed between pm events,
251 * we know that we are inside the kernel context and it is
252 * safe to power down. (We are paranoid in case that runtime
253 * suspend causes corruption to the active context image, and
254 * want to avoid that impacting userspace.)
256 if (!switch_to_kernel_context(engine
))
259 ENGINE_TRACE(engine
, "parked\n");
261 call_idle_barriers(engine
); /* cleanup after wedging */
263 intel_engine_park_heartbeat(engine
);
264 intel_breadcrumbs_park(engine
->breadcrumbs
);
266 /* Must be reset upon idling, or we may miss the busy wakeup. */
267 GEM_BUG_ON(engine
->execlists
.queue_priority_hint
!= INT_MIN
);
270 engine
->park(engine
);
272 engine
->execlists
.no_priolist
= false;
274 /* While gt calls i915_vma_parked(), we have to break the lock cycle */
275 intel_gt_pm_put_async(engine
->gt
);
279 static const struct intel_wakeref_ops wf_ops
= {
280 .get
= __engine_unpark
,
281 .put
= __engine_park
,
284 void intel_engine_init__pm(struct intel_engine_cs
*engine
)
286 struct intel_runtime_pm
*rpm
= engine
->uncore
->rpm
;
288 intel_wakeref_init(&engine
->wakeref
, rpm
, &wf_ops
);
289 intel_engine_init_heartbeat(engine
);
292 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
293 #include "selftest_engine_pm.c"