2 * SPDX-License-Identifier: MIT
4 * Copyright © 2016-2018 Intel Corporation
9 #include "i915_active.h"
10 #include "i915_syncmap.h"
12 #include "intel_ring.h"
13 #include "intel_timeline.h"
15 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
16 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
18 #define CACHELINE_BITS 6
19 #define CACHELINE_FREE CACHELINE_BITS
21 struct intel_timeline_hwsp
{
23 struct intel_gt_timelines
*gt_timelines
;
24 struct list_head free_link
;
29 static struct i915_vma
*__hwsp_alloc(struct intel_gt
*gt
)
31 struct drm_i915_private
*i915
= gt
->i915
;
32 struct drm_i915_gem_object
*obj
;
35 obj
= i915_gem_object_create_internal(i915
, PAGE_SIZE
);
39 i915_gem_object_set_cache_coherency(obj
, I915_CACHE_LLC
);
41 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
43 i915_gem_object_put(obj
);
48 static struct i915_vma
*
49 hwsp_alloc(struct intel_timeline
*timeline
, unsigned int *cacheline
)
51 struct intel_gt_timelines
*gt
= &timeline
->gt
->timelines
;
52 struct intel_timeline_hwsp
*hwsp
;
54 BUILD_BUG_ON(BITS_PER_TYPE(u64
) * CACHELINE_BYTES
> PAGE_SIZE
);
56 spin_lock_irq(>
->hwsp_lock
);
58 /* hwsp_free_list only contains HWSP that have available cachelines */
59 hwsp
= list_first_entry_or_null(>
->hwsp_free_list
,
60 typeof(*hwsp
), free_link
);
64 spin_unlock_irq(>
->hwsp_lock
);
66 hwsp
= kmalloc(sizeof(*hwsp
), GFP_KERNEL
);
68 return ERR_PTR(-ENOMEM
);
70 vma
= __hwsp_alloc(timeline
->gt
);
76 GT_TRACE(timeline
->gt
, "new HWSP allocated\n");
79 hwsp
->gt
= timeline
->gt
;
81 hwsp
->free_bitmap
= ~0ull;
82 hwsp
->gt_timelines
= gt
;
84 spin_lock_irq(>
->hwsp_lock
);
85 list_add(&hwsp
->free_link
, >
->hwsp_free_list
);
88 GEM_BUG_ON(!hwsp
->free_bitmap
);
89 *cacheline
= __ffs64(hwsp
->free_bitmap
);
90 hwsp
->free_bitmap
&= ~BIT_ULL(*cacheline
);
91 if (!hwsp
->free_bitmap
)
92 list_del(&hwsp
->free_link
);
94 spin_unlock_irq(>
->hwsp_lock
);
96 GEM_BUG_ON(hwsp
->vma
->private != hwsp
);
100 static void __idle_hwsp_free(struct intel_timeline_hwsp
*hwsp
, int cacheline
)
102 struct intel_gt_timelines
*gt
= hwsp
->gt_timelines
;
105 spin_lock_irqsave(>
->hwsp_lock
, flags
);
107 /* As a cacheline becomes available, publish the HWSP on the freelist */
108 if (!hwsp
->free_bitmap
)
109 list_add_tail(&hwsp
->free_link
, >
->hwsp_free_list
);
111 GEM_BUG_ON(cacheline
>= BITS_PER_TYPE(hwsp
->free_bitmap
));
112 hwsp
->free_bitmap
|= BIT_ULL(cacheline
);
114 /* And if no one is left using it, give the page back to the system */
115 if (hwsp
->free_bitmap
== ~0ull) {
116 i915_vma_put(hwsp
->vma
);
117 list_del(&hwsp
->free_link
);
121 spin_unlock_irqrestore(>
->hwsp_lock
, flags
);
124 static void __rcu_cacheline_free(struct rcu_head
*rcu
)
126 struct intel_timeline_cacheline
*cl
=
127 container_of(rcu
, typeof(*cl
), rcu
);
129 i915_active_fini(&cl
->active
);
133 static void __idle_cacheline_free(struct intel_timeline_cacheline
*cl
)
135 GEM_BUG_ON(!i915_active_is_idle(&cl
->active
));
137 i915_gem_object_unpin_map(cl
->hwsp
->vma
->obj
);
138 i915_vma_put(cl
->hwsp
->vma
);
139 __idle_hwsp_free(cl
->hwsp
, ptr_unmask_bits(cl
->vaddr
, CACHELINE_BITS
));
141 call_rcu(&cl
->rcu
, __rcu_cacheline_free
);
145 static void __cacheline_retire(struct i915_active
*active
)
147 struct intel_timeline_cacheline
*cl
=
148 container_of(active
, typeof(*cl
), active
);
150 i915_vma_unpin(cl
->hwsp
->vma
);
151 if (ptr_test_bit(cl
->vaddr
, CACHELINE_FREE
))
152 __idle_cacheline_free(cl
);
155 static int __cacheline_active(struct i915_active
*active
)
157 struct intel_timeline_cacheline
*cl
=
158 container_of(active
, typeof(*cl
), active
);
160 __i915_vma_pin(cl
->hwsp
->vma
);
164 static struct intel_timeline_cacheline
*
165 cacheline_alloc(struct intel_timeline_hwsp
*hwsp
, unsigned int cacheline
)
167 struct intel_timeline_cacheline
*cl
;
170 GEM_BUG_ON(cacheline
>= BIT(CACHELINE_BITS
));
172 cl
= kmalloc(sizeof(*cl
), GFP_KERNEL
);
174 return ERR_PTR(-ENOMEM
);
176 vaddr
= i915_gem_object_pin_map(hwsp
->vma
->obj
, I915_MAP_WB
);
179 return ERR_CAST(vaddr
);
182 i915_vma_get(hwsp
->vma
);
184 cl
->vaddr
= page_pack_bits(vaddr
, cacheline
);
186 i915_active_init(&cl
->active
, __cacheline_active
, __cacheline_retire
);
191 static void cacheline_acquire(struct intel_timeline_cacheline
*cl
,
197 cl
->ggtt_offset
= ggtt_offset
;
198 i915_active_acquire(&cl
->active
);
201 static void cacheline_release(struct intel_timeline_cacheline
*cl
)
204 i915_active_release(&cl
->active
);
207 static void cacheline_free(struct intel_timeline_cacheline
*cl
)
209 if (!i915_active_acquire_if_busy(&cl
->active
)) {
210 __idle_cacheline_free(cl
);
214 GEM_BUG_ON(ptr_test_bit(cl
->vaddr
, CACHELINE_FREE
));
215 cl
->vaddr
= ptr_set_bit(cl
->vaddr
, CACHELINE_FREE
);
217 i915_active_release(&cl
->active
);
220 static int intel_timeline_init(struct intel_timeline
*timeline
,
222 struct i915_vma
*hwsp
,
227 kref_init(&timeline
->kref
);
228 atomic_set(&timeline
->pin_count
, 0);
232 timeline
->has_initial_breadcrumb
= !hwsp
;
233 timeline
->hwsp_cacheline
= NULL
;
236 struct intel_timeline_cacheline
*cl
;
237 unsigned int cacheline
;
239 hwsp
= hwsp_alloc(timeline
, &cacheline
);
241 return PTR_ERR(hwsp
);
243 cl
= cacheline_alloc(hwsp
->private, cacheline
);
245 __idle_hwsp_free(hwsp
->private, cacheline
);
249 timeline
->hwsp_cacheline
= cl
;
250 timeline
->hwsp_offset
= cacheline
* CACHELINE_BYTES
;
252 vaddr
= page_mask_bits(cl
->vaddr
);
254 timeline
->hwsp_offset
= offset
;
255 vaddr
= i915_gem_object_pin_map(hwsp
->obj
, I915_MAP_WB
);
257 return PTR_ERR(vaddr
);
260 timeline
->hwsp_seqno
=
261 memset(vaddr
+ timeline
->hwsp_offset
, 0, CACHELINE_BYTES
);
263 timeline
->hwsp_ggtt
= i915_vma_get(hwsp
);
264 GEM_BUG_ON(timeline
->hwsp_offset
>= hwsp
->size
);
266 timeline
->fence_context
= dma_fence_context_alloc(1);
268 mutex_init(&timeline
->mutex
);
270 INIT_ACTIVE_FENCE(&timeline
->last_request
);
271 INIT_LIST_HEAD(&timeline
->requests
);
273 i915_syncmap_init(&timeline
->sync
);
278 void intel_gt_init_timelines(struct intel_gt
*gt
)
280 struct intel_gt_timelines
*timelines
= >
->timelines
;
282 spin_lock_init(&timelines
->lock
);
283 INIT_LIST_HEAD(&timelines
->active_list
);
285 spin_lock_init(&timelines
->hwsp_lock
);
286 INIT_LIST_HEAD(&timelines
->hwsp_free_list
);
289 static void intel_timeline_fini(struct intel_timeline
*timeline
)
291 GEM_BUG_ON(atomic_read(&timeline
->pin_count
));
292 GEM_BUG_ON(!list_empty(&timeline
->requests
));
293 GEM_BUG_ON(timeline
->retire
);
295 if (timeline
->hwsp_cacheline
)
296 cacheline_free(timeline
->hwsp_cacheline
);
298 i915_gem_object_unpin_map(timeline
->hwsp_ggtt
->obj
);
300 i915_vma_put(timeline
->hwsp_ggtt
);
303 struct intel_timeline
*
304 __intel_timeline_create(struct intel_gt
*gt
,
305 struct i915_vma
*global_hwsp
,
308 struct intel_timeline
*timeline
;
311 timeline
= kzalloc(sizeof(*timeline
), GFP_KERNEL
);
313 return ERR_PTR(-ENOMEM
);
315 err
= intel_timeline_init(timeline
, gt
, global_hwsp
, offset
);
324 void __intel_timeline_pin(struct intel_timeline
*tl
)
326 GEM_BUG_ON(!atomic_read(&tl
->pin_count
));
327 atomic_inc(&tl
->pin_count
);
330 int intel_timeline_pin(struct intel_timeline
*tl
, struct i915_gem_ww_ctx
*ww
)
334 if (atomic_add_unless(&tl
->pin_count
, 1, 0))
337 err
= i915_ggtt_pin(tl
->hwsp_ggtt
, ww
, 0, PIN_HIGH
);
342 i915_ggtt_offset(tl
->hwsp_ggtt
) +
343 offset_in_page(tl
->hwsp_offset
);
344 GT_TRACE(tl
->gt
, "timeline:%llx using HWSP offset:%x\n",
345 tl
->fence_context
, tl
->hwsp_offset
);
347 cacheline_acquire(tl
->hwsp_cacheline
, tl
->hwsp_offset
);
348 if (atomic_fetch_inc(&tl
->pin_count
)) {
349 cacheline_release(tl
->hwsp_cacheline
);
350 __i915_vma_unpin(tl
->hwsp_ggtt
);
356 void intel_timeline_reset_seqno(const struct intel_timeline
*tl
)
358 /* Must be pinned to be writable, and no requests in flight. */
359 GEM_BUG_ON(!atomic_read(&tl
->pin_count
));
360 WRITE_ONCE(*(u32
*)tl
->hwsp_seqno
, tl
->seqno
);
363 void intel_timeline_enter(struct intel_timeline
*tl
)
365 struct intel_gt_timelines
*timelines
= &tl
->gt
->timelines
;
368 * Pretend we are serialised by the timeline->mutex.
370 * While generally true, there are a few exceptions to the rule
371 * for the engine->kernel_context being used to manage power
372 * transitions. As the engine_park may be called from under any
373 * timeline, it uses the power mutex as a global serialisation
374 * lock to prevent any other request entering its timeline.
376 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
378 * However, intel_gt_retire_request() does not know which engine
379 * it is retiring along and so cannot partake in the engine-pm
380 * barrier, and there we use the tl->active_count as a means to
381 * pin the timeline in the active_list while the locks are dropped.
382 * Ergo, as that is outside of the engine-pm barrier, we need to
383 * use atomic to manipulate tl->active_count.
385 lockdep_assert_held(&tl
->mutex
);
387 if (atomic_add_unless(&tl
->active_count
, 1, 0))
390 spin_lock(&timelines
->lock
);
391 if (!atomic_fetch_inc(&tl
->active_count
)) {
393 * The HWSP is volatile, and may have been lost while inactive,
394 * e.g. across suspend/resume. Be paranoid, and ensure that
395 * the HWSP value matches our seqno so we don't proclaim
396 * the next request as already complete.
398 intel_timeline_reset_seqno(tl
);
399 list_add_tail(&tl
->link
, &timelines
->active_list
);
401 spin_unlock(&timelines
->lock
);
404 void intel_timeline_exit(struct intel_timeline
*tl
)
406 struct intel_gt_timelines
*timelines
= &tl
->gt
->timelines
;
408 /* See intel_timeline_enter() */
409 lockdep_assert_held(&tl
->mutex
);
411 GEM_BUG_ON(!atomic_read(&tl
->active_count
));
412 if (atomic_add_unless(&tl
->active_count
, -1, 1))
415 spin_lock(&timelines
->lock
);
416 if (atomic_dec_and_test(&tl
->active_count
))
418 spin_unlock(&timelines
->lock
);
421 * Since this timeline is idle, all bariers upon which we were waiting
422 * must also be complete and so we can discard the last used barriers
423 * without loss of information.
425 i915_syncmap_free(&tl
->sync
);
428 static u32
timeline_advance(struct intel_timeline
*tl
)
430 GEM_BUG_ON(!atomic_read(&tl
->pin_count
));
431 GEM_BUG_ON(tl
->seqno
& tl
->has_initial_breadcrumb
);
433 return tl
->seqno
+= 1 + tl
->has_initial_breadcrumb
;
436 static void timeline_rollback(struct intel_timeline
*tl
)
438 tl
->seqno
-= 1 + tl
->has_initial_breadcrumb
;
442 __intel_timeline_get_seqno(struct intel_timeline
*tl
,
443 struct i915_request
*rq
,
446 struct intel_timeline_cacheline
*cl
;
447 unsigned int cacheline
;
448 struct i915_vma
*vma
;
452 might_lock(&tl
->gt
->ggtt
->vm
.mutex
);
453 GT_TRACE(tl
->gt
, "timeline:%llx wrapped\n", tl
->fence_context
);
456 * If there is an outstanding GPU reference to this cacheline,
457 * such as it being sampled by a HW semaphore on another timeline,
458 * we cannot wraparound our seqno value (the HW semaphore does
459 * a strict greater-than-or-equals compare, not i915_seqno_passed).
460 * So if the cacheline is still busy, we must detach ourselves
461 * from it and leave it inflight alongside its users.
463 * However, if nobody is watching and we can guarantee that nobody
464 * will, we could simply reuse the same cacheline.
466 * if (i915_active_request_is_signaled(&tl->last_request) &&
467 * i915_active_is_signaled(&tl->hwsp_cacheline->active))
470 * That seems unlikely for a busy timeline that needed to wrap in
471 * the first place, so just replace the cacheline.
474 vma
= hwsp_alloc(tl
, &cacheline
);
480 err
= i915_ggtt_pin(vma
, NULL
, 0, PIN_HIGH
);
482 __idle_hwsp_free(vma
->private, cacheline
);
486 cl
= cacheline_alloc(vma
->private, cacheline
);
489 __idle_hwsp_free(vma
->private, cacheline
);
492 GEM_BUG_ON(cl
->hwsp
->vma
!= vma
);
495 * Attach the old cacheline to the current request, so that we only
496 * free it after the current request is retired, which ensures that
497 * all writes into the cacheline from previous requests are complete.
499 err
= i915_active_ref(&tl
->hwsp_cacheline
->active
,
505 cacheline_release(tl
->hwsp_cacheline
); /* ownership now xfered to rq */
506 cacheline_free(tl
->hwsp_cacheline
);
508 i915_vma_unpin(tl
->hwsp_ggtt
); /* binding kept alive by old cacheline */
509 i915_vma_put(tl
->hwsp_ggtt
);
511 tl
->hwsp_ggtt
= i915_vma_get(vma
);
513 vaddr
= page_mask_bits(cl
->vaddr
);
514 tl
->hwsp_offset
= cacheline
* CACHELINE_BYTES
;
516 memset(vaddr
+ tl
->hwsp_offset
, 0, CACHELINE_BYTES
);
518 tl
->hwsp_offset
+= i915_ggtt_offset(vma
);
519 GT_TRACE(tl
->gt
, "timeline:%llx using HWSP offset:%x\n",
520 tl
->fence_context
, tl
->hwsp_offset
);
522 cacheline_acquire(cl
, tl
->hwsp_offset
);
523 tl
->hwsp_cacheline
= cl
;
525 *seqno
= timeline_advance(tl
);
526 GEM_BUG_ON(i915_seqno_passed(*tl
->hwsp_seqno
, *seqno
));
534 timeline_rollback(tl
);
538 int intel_timeline_get_seqno(struct intel_timeline
*tl
,
539 struct i915_request
*rq
,
542 *seqno
= timeline_advance(tl
);
544 /* Replace the HWSP on wraparound for HW semaphores */
545 if (unlikely(!*seqno
&& tl
->hwsp_cacheline
))
546 return __intel_timeline_get_seqno(tl
, rq
, seqno
);
551 static int cacheline_ref(struct intel_timeline_cacheline
*cl
,
552 struct i915_request
*rq
)
554 return i915_active_add_request(&cl
->active
, rq
);
557 int intel_timeline_read_hwsp(struct i915_request
*from
,
558 struct i915_request
*to
,
561 struct intel_timeline_cacheline
*cl
;
564 GEM_BUG_ON(!rcu_access_pointer(from
->hwsp_cacheline
));
567 cl
= rcu_dereference(from
->hwsp_cacheline
);
568 if (i915_request_completed(from
)) /* confirm cacheline is valid */
570 if (unlikely(!i915_active_acquire_if_busy(&cl
->active
)))
571 goto unlock
; /* seqno wrapped and completed! */
572 if (unlikely(i915_request_completed(from
)))
576 err
= cacheline_ref(cl
, to
);
580 *hwsp
= cl
->ggtt_offset
;
582 i915_active_release(&cl
->active
);
586 i915_active_release(&cl
->active
);
592 void intel_timeline_unpin(struct intel_timeline
*tl
)
594 GEM_BUG_ON(!atomic_read(&tl
->pin_count
));
595 if (!atomic_dec_and_test(&tl
->pin_count
))
598 cacheline_release(tl
->hwsp_cacheline
);
600 __i915_vma_unpin(tl
->hwsp_ggtt
);
603 void __intel_timeline_free(struct kref
*kref
)
605 struct intel_timeline
*timeline
=
606 container_of(kref
, typeof(*timeline
), kref
);
608 intel_timeline_fini(timeline
);
609 kfree_rcu(timeline
, rcu
);
612 void intel_gt_fini_timelines(struct intel_gt
*gt
)
614 struct intel_gt_timelines
*timelines
= >
->timelines
;
616 GEM_BUG_ON(!list_empty(&timelines
->active_list
));
617 GEM_BUG_ON(!list_empty(&timelines
->hwsp_free_list
));
620 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
621 #include "gt/selftests/mock_timeline.c"
622 #include "gt/selftest_timeline.c"