2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include <linux/kthread.h>
26 #include <trace/events/dma_fence.h>
27 #include <uapi/linux/sched/types.h>
30 #include "i915_trace.h"
31 #include "intel_breadcrumbs.h"
32 #include "intel_context.h"
33 #include "intel_engine_pm.h"
34 #include "intel_gt_pm.h"
35 #include "intel_gt_requests.h"
37 static bool irq_enable(struct intel_engine_cs
*engine
)
39 if (!engine
->irq_enable
)
42 /* Caller disables interrupts */
43 spin_lock(&engine
->gt
->irq_lock
);
44 engine
->irq_enable(engine
);
45 spin_unlock(&engine
->gt
->irq_lock
);
50 static void irq_disable(struct intel_engine_cs
*engine
)
52 if (!engine
->irq_disable
)
55 /* Caller disables interrupts */
56 spin_lock(&engine
->gt
->irq_lock
);
57 engine
->irq_disable(engine
);
58 spin_unlock(&engine
->gt
->irq_lock
);
61 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs
*b
)
64 * Since we are waiting on a request, the GPU should be busy
65 * and should have its own rpm reference.
67 if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b
->irq_engine
->gt
)))
71 * The breadcrumb irq will be disarmed on the interrupt after the
72 * waiters are signaled. This gives us a single interrupt window in
73 * which we can add a new waiter and avoid the cost of re-enabling
76 WRITE_ONCE(b
->irq_armed
, true);
78 /* Requests may have completed before we could enable the interrupt. */
79 if (!b
->irq_enabled
++ && irq_enable(b
->irq_engine
))
80 irq_work_queue(&b
->irq_work
);
83 static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs
*b
)
88 spin_lock(&b
->irq_lock
);
90 __intel_breadcrumbs_arm_irq(b
);
91 spin_unlock(&b
->irq_lock
);
94 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs
*b
)
96 GEM_BUG_ON(!b
->irq_enabled
);
97 if (!--b
->irq_enabled
)
98 irq_disable(b
->irq_engine
);
100 WRITE_ONCE(b
->irq_armed
, false);
101 intel_gt_pm_put_async(b
->irq_engine
->gt
);
104 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs
*b
)
106 spin_lock(&b
->irq_lock
);
108 __intel_breadcrumbs_disarm_irq(b
);
109 spin_unlock(&b
->irq_lock
);
112 static void add_signaling_context(struct intel_breadcrumbs
*b
,
113 struct intel_context
*ce
)
115 lockdep_assert_held(&ce
->signal_lock
);
117 spin_lock(&b
->signalers_lock
);
118 list_add_rcu(&ce
->signal_link
, &b
->signalers
);
119 spin_unlock(&b
->signalers_lock
);
122 static bool remove_signaling_context(struct intel_breadcrumbs
*b
,
123 struct intel_context
*ce
)
125 lockdep_assert_held(&ce
->signal_lock
);
127 if (!list_empty(&ce
->signals
))
130 spin_lock(&b
->signalers_lock
);
131 list_del_rcu(&ce
->signal_link
);
132 spin_unlock(&b
->signalers_lock
);
137 static inline bool __request_completed(const struct i915_request
*rq
)
139 return i915_seqno_passed(__hwsp_seqno(rq
), rq
->fence
.seqno
);
142 __maybe_unused
static bool
143 check_signal_order(struct intel_context
*ce
, struct i915_request
*rq
)
145 if (rq
->context
!= ce
)
148 if (!list_is_last(&rq
->signal_link
, &ce
->signals
) &&
149 i915_seqno_passed(rq
->fence
.seqno
,
150 list_next_entry(rq
, signal_link
)->fence
.seqno
))
153 if (!list_is_first(&rq
->signal_link
, &ce
->signals
) &&
154 i915_seqno_passed(list_prev_entry(rq
, signal_link
)->fence
.seqno
,
162 __dma_fence_signal(struct dma_fence
*fence
)
164 return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT
, &fence
->flags
);
168 __dma_fence_signal__timestamp(struct dma_fence
*fence
, ktime_t timestamp
)
170 fence
->timestamp
= timestamp
;
171 set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT
, &fence
->flags
);
172 trace_dma_fence_signaled(fence
);
176 __dma_fence_signal__notify(struct dma_fence
*fence
,
177 const struct list_head
*list
)
179 struct dma_fence_cb
*cur
, *tmp
;
181 lockdep_assert_held(fence
->lock
);
183 list_for_each_entry_safe(cur
, tmp
, list
, node
) {
184 INIT_LIST_HEAD(&cur
->node
);
185 cur
->func(fence
, cur
);
189 static void add_retire(struct intel_breadcrumbs
*b
, struct intel_timeline
*tl
)
192 intel_engine_add_retire(b
->irq_engine
, tl
);
195 static bool __signal_request(struct i915_request
*rq
)
197 GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL
, &rq
->fence
.flags
));
199 if (!__dma_fence_signal(&rq
->fence
)) {
200 i915_request_put(rq
);
207 static struct llist_node
*
208 slist_add(struct llist_node
*node
, struct llist_node
*head
)
214 static void signal_irq_work(struct irq_work
*work
)
216 struct intel_breadcrumbs
*b
= container_of(work
, typeof(*b
), irq_work
);
217 const ktime_t timestamp
= ktime_get();
218 struct llist_node
*signal
, *sn
;
219 struct intel_context
*ce
;
222 if (unlikely(!llist_empty(&b
->signaled_requests
)))
223 signal
= llist_del_all(&b
->signaled_requests
);
226 * Keep the irq armed until the interrupt after all listeners are gone.
228 * Enabling/disabling the interrupt is rather costly, roughly a couple
229 * of hundred microseconds. If we are proactive and enable/disable
230 * the interrupt around every request that wants a breadcrumb, we
231 * quickly drown in the extra orders of magnitude of latency imposed
232 * on request submission.
234 * So we try to be lazy, and keep the interrupts enabled until no
235 * more listeners appear within a breadcrumb interrupt interval (that
236 * is until a request completes that no one cares about). The
237 * observation is that listeners come in batches, and will often
238 * listen to a bunch of requests in succession. Though note on icl+,
239 * interrupts are always enabled due to concerns with rc6 being
240 * dysfunctional with per-engine interrupt masking.
242 * We also try to avoid raising too many interrupts, as they may
243 * be generated by userspace batches and it is unfortunately rather
244 * too easy to drown the CPU under a flood of GPU interrupts. Thus
245 * whenever no one appears to be listening, we turn off the interrupts.
246 * Fewer interrupts should conserve power -- at the very least, fewer
247 * interrupt draw less ire from other users of the system and tools
250 if (!signal
&& READ_ONCE(b
->irq_armed
) && list_empty(&b
->signalers
))
251 intel_breadcrumbs_disarm_irq(b
);
254 list_for_each_entry_rcu(ce
, &b
->signalers
, signal_link
) {
255 struct i915_request
*rq
;
257 list_for_each_entry_rcu(rq
, &ce
->signals
, signal_link
) {
260 if (!__request_completed(rq
))
263 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL
,
268 * Queue for execution after dropping the signaling
269 * spinlock as the callback chain may end up adding
270 * more signalers to the same context or engine.
272 spin_lock(&ce
->signal_lock
);
273 list_del_rcu(&rq
->signal_link
);
274 release
= remove_signaling_context(b
, ce
);
275 spin_unlock(&ce
->signal_lock
);
277 if (__signal_request(rq
))
278 /* We own signal_node now, xfer to local list */
279 signal
= slist_add(&rq
->signal_node
, signal
);
282 add_retire(b
, ce
->timeline
);
283 intel_context_put(ce
);
289 llist_for_each_safe(signal
, sn
, signal
) {
290 struct i915_request
*rq
=
291 llist_entry(signal
, typeof(*rq
), signal_node
);
292 struct list_head cb_list
;
294 spin_lock(&rq
->lock
);
295 list_replace(&rq
->fence
.cb_list
, &cb_list
);
296 __dma_fence_signal__timestamp(&rq
->fence
, timestamp
);
297 __dma_fence_signal__notify(&rq
->fence
, &cb_list
);
298 spin_unlock(&rq
->lock
);
300 i915_request_put(rq
);
303 if (!READ_ONCE(b
->irq_armed
) && !list_empty(&b
->signalers
))
304 intel_breadcrumbs_arm_irq(b
);
307 struct intel_breadcrumbs
*
308 intel_breadcrumbs_create(struct intel_engine_cs
*irq_engine
)
310 struct intel_breadcrumbs
*b
;
312 b
= kzalloc(sizeof(*b
), GFP_KERNEL
);
316 b
->irq_engine
= irq_engine
;
318 spin_lock_init(&b
->signalers_lock
);
319 INIT_LIST_HEAD(&b
->signalers
);
320 init_llist_head(&b
->signaled_requests
);
322 spin_lock_init(&b
->irq_lock
);
323 init_irq_work(&b
->irq_work
, signal_irq_work
);
328 void intel_breadcrumbs_reset(struct intel_breadcrumbs
*b
)
335 spin_lock_irqsave(&b
->irq_lock
, flags
);
338 irq_enable(b
->irq_engine
);
340 irq_disable(b
->irq_engine
);
342 spin_unlock_irqrestore(&b
->irq_lock
, flags
);
345 void intel_breadcrumbs_park(struct intel_breadcrumbs
*b
)
347 /* Kick the work once more to drain the signalers */
348 irq_work_sync(&b
->irq_work
);
349 while (unlikely(READ_ONCE(b
->irq_armed
))) {
351 signal_irq_work(&b
->irq_work
);
355 GEM_BUG_ON(!list_empty(&b
->signalers
));
358 void intel_breadcrumbs_free(struct intel_breadcrumbs
*b
)
360 irq_work_sync(&b
->irq_work
);
361 GEM_BUG_ON(!list_empty(&b
->signalers
));
362 GEM_BUG_ON(b
->irq_armed
);
366 static void insert_breadcrumb(struct i915_request
*rq
)
368 struct intel_breadcrumbs
*b
= READ_ONCE(rq
->engine
)->breadcrumbs
;
369 struct intel_context
*ce
= rq
->context
;
370 struct list_head
*pos
;
372 if (test_bit(I915_FENCE_FLAG_SIGNAL
, &rq
->fence
.flags
))
375 i915_request_get(rq
);
378 * If the request is already completed, we can transfer it
379 * straight onto a signaled list, and queue the irq worker for
380 * its signal completion.
382 if (__request_completed(rq
)) {
383 if (__signal_request(rq
) &&
384 llist_add(&rq
->signal_node
, &b
->signaled_requests
))
385 irq_work_queue(&b
->irq_work
);
389 if (list_empty(&ce
->signals
)) {
390 intel_context_get(ce
);
391 add_signaling_context(b
, ce
);
395 * We keep the seqno in retirement order, so we can break
396 * inside intel_engine_signal_breadcrumbs as soon as we've
397 * passed the last completed request (or seen a request that
398 * hasn't event started). We could walk the timeline->requests,
399 * but keeping a separate signalers_list has the advantage of
400 * hopefully being much smaller than the full list and so
401 * provides faster iteration and detection when there are no
402 * more interrupts required for this context.
404 * We typically expect to add new signalers in order, so we
405 * start looking for our insertion point from the tail of
408 list_for_each_prev(pos
, &ce
->signals
) {
409 struct i915_request
*it
=
410 list_entry(pos
, typeof(*it
), signal_link
);
412 if (i915_seqno_passed(rq
->fence
.seqno
, it
->fence
.seqno
))
416 list_add_rcu(&rq
->signal_link
, pos
);
417 GEM_BUG_ON(!check_signal_order(ce
, rq
));
418 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT
, &rq
->fence
.flags
));
419 set_bit(I915_FENCE_FLAG_SIGNAL
, &rq
->fence
.flags
);
422 * Defer enabling the interrupt to after HW submission and recheck
423 * the request as it may have completed and raised the interrupt as
424 * we were attaching it into the lists.
426 irq_work_queue(&b
->irq_work
);
429 bool i915_request_enable_breadcrumb(struct i915_request
*rq
)
431 struct intel_context
*ce
= rq
->context
;
433 /* Serialises with i915_request_retire() using rq->lock */
434 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT
, &rq
->fence
.flags
))
438 * Peek at i915_request_submit()/i915_request_unsubmit() status.
440 * If the request is not yet active (and not signaled), we will
441 * attach the breadcrumb later.
443 if (!test_bit(I915_FENCE_FLAG_ACTIVE
, &rq
->fence
.flags
))
446 spin_lock(&ce
->signal_lock
);
447 if (test_bit(I915_FENCE_FLAG_ACTIVE
, &rq
->fence
.flags
))
448 insert_breadcrumb(rq
);
449 spin_unlock(&ce
->signal_lock
);
454 void i915_request_cancel_breadcrumb(struct i915_request
*rq
)
456 struct intel_context
*ce
= rq
->context
;
459 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL
, &rq
->fence
.flags
))
462 spin_lock(&ce
->signal_lock
);
463 list_del_rcu(&rq
->signal_link
);
464 release
= remove_signaling_context(rq
->engine
->breadcrumbs
, ce
);
465 spin_unlock(&ce
->signal_lock
);
467 intel_context_put(ce
);
469 i915_request_put(rq
);
472 static void print_signals(struct intel_breadcrumbs
*b
, struct drm_printer
*p
)
474 struct intel_context
*ce
;
475 struct i915_request
*rq
;
477 drm_printf(p
, "Signals:\n");
480 list_for_each_entry_rcu(ce
, &b
->signalers
, signal_link
) {
481 list_for_each_entry_rcu(rq
, &ce
->signals
, signal_link
)
482 drm_printf(p
, "\t[%llx:%llx%s] @ %dms\n",
483 rq
->fence
.context
, rq
->fence
.seqno
,
484 i915_request_completed(rq
) ? "!" :
485 i915_request_started(rq
) ? "*" :
487 jiffies_to_msecs(jiffies
- rq
->emitted_jiffies
));
492 void intel_engine_print_breadcrumbs(struct intel_engine_cs
*engine
,
493 struct drm_printer
*p
)
495 struct intel_breadcrumbs
*b
;
497 b
= engine
->breadcrumbs
;
501 drm_printf(p
, "IRQ: %s\n", enableddisabled(b
->irq_armed
));
502 if (!list_empty(&b
->signalers
))