2 * SPDX-License-Identifier: MIT
4 * Copyright © 2016-2018 Intel Corporation
9 #include "i915_active.h"
10 #include "i915_syncmap.h"
12 #include "intel_ring.h"
13 #include "intel_timeline.h"
15 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
16 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
18 #define CACHELINE_BITS 6
19 #define CACHELINE_FREE CACHELINE_BITS
21 struct intel_timeline_hwsp
{
23 struct intel_gt_timelines
*gt_timelines
;
24 struct list_head free_link
;
29 static struct i915_vma
*__hwsp_alloc(struct intel_gt
*gt
)
31 struct drm_i915_private
*i915
= gt
->i915
;
32 struct drm_i915_gem_object
*obj
;
35 obj
= i915_gem_object_create_internal(i915
, PAGE_SIZE
);
39 i915_gem_object_set_cache_coherency(obj
, I915_CACHE_LLC
);
41 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
43 i915_gem_object_put(obj
);
48 static struct i915_vma
*
49 hwsp_alloc(struct intel_timeline
*timeline
, unsigned int *cacheline
)
51 struct intel_gt_timelines
*gt
= &timeline
->gt
->timelines
;
52 struct intel_timeline_hwsp
*hwsp
;
54 BUILD_BUG_ON(BITS_PER_TYPE(u64
) * CACHELINE_BYTES
> PAGE_SIZE
);
56 spin_lock_irq(>
->hwsp_lock
);
58 /* hwsp_free_list only contains HWSP that have available cachelines */
59 hwsp
= list_first_entry_or_null(>
->hwsp_free_list
,
60 typeof(*hwsp
), free_link
);
64 spin_unlock_irq(>
->hwsp_lock
);
66 hwsp
= kmalloc(sizeof(*hwsp
), GFP_KERNEL
);
68 return ERR_PTR(-ENOMEM
);
70 vma
= __hwsp_alloc(timeline
->gt
);
77 hwsp
->gt
= timeline
->gt
;
79 hwsp
->free_bitmap
= ~0ull;
80 hwsp
->gt_timelines
= gt
;
82 spin_lock_irq(>
->hwsp_lock
);
83 list_add(&hwsp
->free_link
, >
->hwsp_free_list
);
86 GEM_BUG_ON(!hwsp
->free_bitmap
);
87 *cacheline
= __ffs64(hwsp
->free_bitmap
);
88 hwsp
->free_bitmap
&= ~BIT_ULL(*cacheline
);
89 if (!hwsp
->free_bitmap
)
90 list_del(&hwsp
->free_link
);
92 spin_unlock_irq(>
->hwsp_lock
);
94 GEM_BUG_ON(hwsp
->vma
->private != hwsp
);
98 static void __idle_hwsp_free(struct intel_timeline_hwsp
*hwsp
, int cacheline
)
100 struct intel_gt_timelines
*gt
= hwsp
->gt_timelines
;
103 spin_lock_irqsave(>
->hwsp_lock
, flags
);
105 /* As a cacheline becomes available, publish the HWSP on the freelist */
106 if (!hwsp
->free_bitmap
)
107 list_add_tail(&hwsp
->free_link
, >
->hwsp_free_list
);
109 GEM_BUG_ON(cacheline
>= BITS_PER_TYPE(hwsp
->free_bitmap
));
110 hwsp
->free_bitmap
|= BIT_ULL(cacheline
);
112 /* And if no one is left using it, give the page back to the system */
113 if (hwsp
->free_bitmap
== ~0ull) {
114 i915_vma_put(hwsp
->vma
);
115 list_del(&hwsp
->free_link
);
119 spin_unlock_irqrestore(>
->hwsp_lock
, flags
);
122 static void __idle_cacheline_free(struct intel_timeline_cacheline
*cl
)
124 GEM_BUG_ON(!i915_active_is_idle(&cl
->active
));
126 i915_gem_object_unpin_map(cl
->hwsp
->vma
->obj
);
127 i915_vma_put(cl
->hwsp
->vma
);
128 __idle_hwsp_free(cl
->hwsp
, ptr_unmask_bits(cl
->vaddr
, CACHELINE_BITS
));
130 i915_active_fini(&cl
->active
);
135 static void __cacheline_retire(struct i915_active
*active
)
137 struct intel_timeline_cacheline
*cl
=
138 container_of(active
, typeof(*cl
), active
);
140 i915_vma_unpin(cl
->hwsp
->vma
);
141 if (ptr_test_bit(cl
->vaddr
, CACHELINE_FREE
))
142 __idle_cacheline_free(cl
);
145 static int __cacheline_active(struct i915_active
*active
)
147 struct intel_timeline_cacheline
*cl
=
148 container_of(active
, typeof(*cl
), active
);
150 __i915_vma_pin(cl
->hwsp
->vma
);
154 static struct intel_timeline_cacheline
*
155 cacheline_alloc(struct intel_timeline_hwsp
*hwsp
, unsigned int cacheline
)
157 struct intel_timeline_cacheline
*cl
;
160 GEM_BUG_ON(cacheline
>= BIT(CACHELINE_BITS
));
162 cl
= kmalloc(sizeof(*cl
), GFP_KERNEL
);
164 return ERR_PTR(-ENOMEM
);
166 vaddr
= i915_gem_object_pin_map(hwsp
->vma
->obj
, I915_MAP_WB
);
169 return ERR_CAST(vaddr
);
172 i915_vma_get(hwsp
->vma
);
174 cl
->vaddr
= page_pack_bits(vaddr
, cacheline
);
176 i915_active_init(&cl
->active
, __cacheline_active
, __cacheline_retire
);
181 static void cacheline_acquire(struct intel_timeline_cacheline
*cl
)
184 i915_active_acquire(&cl
->active
);
187 static void cacheline_release(struct intel_timeline_cacheline
*cl
)
190 i915_active_release(&cl
->active
);
193 static void cacheline_free(struct intel_timeline_cacheline
*cl
)
195 GEM_BUG_ON(ptr_test_bit(cl
->vaddr
, CACHELINE_FREE
));
196 cl
->vaddr
= ptr_set_bit(cl
->vaddr
, CACHELINE_FREE
);
198 if (i915_active_is_idle(&cl
->active
))
199 __idle_cacheline_free(cl
);
202 int intel_timeline_init(struct intel_timeline
*timeline
,
204 struct i915_vma
*hwsp
)
208 kref_init(&timeline
->kref
);
209 atomic_set(&timeline
->pin_count
, 0);
213 timeline
->has_initial_breadcrumb
= !hwsp
;
214 timeline
->hwsp_cacheline
= NULL
;
217 struct intel_timeline_cacheline
*cl
;
218 unsigned int cacheline
;
220 hwsp
= hwsp_alloc(timeline
, &cacheline
);
222 return PTR_ERR(hwsp
);
224 cl
= cacheline_alloc(hwsp
->private, cacheline
);
226 __idle_hwsp_free(hwsp
->private, cacheline
);
230 timeline
->hwsp_cacheline
= cl
;
231 timeline
->hwsp_offset
= cacheline
* CACHELINE_BYTES
;
233 vaddr
= page_mask_bits(cl
->vaddr
);
235 timeline
->hwsp_offset
= I915_GEM_HWS_SEQNO_ADDR
;
237 vaddr
= i915_gem_object_pin_map(hwsp
->obj
, I915_MAP_WB
);
239 return PTR_ERR(vaddr
);
242 timeline
->hwsp_seqno
=
243 memset(vaddr
+ timeline
->hwsp_offset
, 0, CACHELINE_BYTES
);
245 timeline
->hwsp_ggtt
= i915_vma_get(hwsp
);
246 GEM_BUG_ON(timeline
->hwsp_offset
>= hwsp
->size
);
248 timeline
->fence_context
= dma_fence_context_alloc(1);
250 mutex_init(&timeline
->mutex
);
252 INIT_ACTIVE_FENCE(&timeline
->last_request
);
253 INIT_LIST_HEAD(&timeline
->requests
);
255 i915_syncmap_init(&timeline
->sync
);
260 void intel_gt_init_timelines(struct intel_gt
*gt
)
262 struct intel_gt_timelines
*timelines
= >
->timelines
;
264 spin_lock_init(&timelines
->lock
);
265 INIT_LIST_HEAD(&timelines
->active_list
);
267 spin_lock_init(&timelines
->hwsp_lock
);
268 INIT_LIST_HEAD(&timelines
->hwsp_free_list
);
271 void intel_timeline_fini(struct intel_timeline
*timeline
)
273 GEM_BUG_ON(atomic_read(&timeline
->pin_count
));
274 GEM_BUG_ON(!list_empty(&timeline
->requests
));
275 GEM_BUG_ON(timeline
->retire
);
277 if (timeline
->hwsp_cacheline
)
278 cacheline_free(timeline
->hwsp_cacheline
);
280 i915_gem_object_unpin_map(timeline
->hwsp_ggtt
->obj
);
282 i915_vma_put(timeline
->hwsp_ggtt
);
285 struct intel_timeline
*
286 intel_timeline_create(struct intel_gt
*gt
, struct i915_vma
*global_hwsp
)
288 struct intel_timeline
*timeline
;
291 timeline
= kzalloc(sizeof(*timeline
), GFP_KERNEL
);
293 return ERR_PTR(-ENOMEM
);
295 err
= intel_timeline_init(timeline
, gt
, global_hwsp
);
304 int intel_timeline_pin(struct intel_timeline
*tl
)
308 if (atomic_add_unless(&tl
->pin_count
, 1, 0))
311 err
= i915_vma_pin(tl
->hwsp_ggtt
, 0, 0, PIN_GLOBAL
| PIN_HIGH
);
316 i915_ggtt_offset(tl
->hwsp_ggtt
) +
317 offset_in_page(tl
->hwsp_offset
);
319 cacheline_acquire(tl
->hwsp_cacheline
);
320 if (atomic_fetch_inc(&tl
->pin_count
)) {
321 cacheline_release(tl
->hwsp_cacheline
);
322 __i915_vma_unpin(tl
->hwsp_ggtt
);
328 void intel_timeline_enter(struct intel_timeline
*tl
)
330 struct intel_gt_timelines
*timelines
= &tl
->gt
->timelines
;
333 * Pretend we are serialised by the timeline->mutex.
335 * While generally true, there are a few exceptions to the rule
336 * for the engine->kernel_context being used to manage power
337 * transitions. As the engine_park may be called from under any
338 * timeline, it uses the power mutex as a global serialisation
339 * lock to prevent any other request entering its timeline.
341 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
343 * However, intel_gt_retire_request() does not know which engine
344 * it is retiring along and so cannot partake in the engine-pm
345 * barrier, and there we use the tl->active_count as a means to
346 * pin the timeline in the active_list while the locks are dropped.
347 * Ergo, as that is outside of the engine-pm barrier, we need to
348 * use atomic to manipulate tl->active_count.
350 lockdep_assert_held(&tl
->mutex
);
352 if (atomic_add_unless(&tl
->active_count
, 1, 0))
355 spin_lock(&timelines
->lock
);
356 if (!atomic_fetch_inc(&tl
->active_count
))
357 list_add_tail(&tl
->link
, &timelines
->active_list
);
358 spin_unlock(&timelines
->lock
);
361 void intel_timeline_exit(struct intel_timeline
*tl
)
363 struct intel_gt_timelines
*timelines
= &tl
->gt
->timelines
;
365 /* See intel_timeline_enter() */
366 lockdep_assert_held(&tl
->mutex
);
368 GEM_BUG_ON(!atomic_read(&tl
->active_count
));
369 if (atomic_add_unless(&tl
->active_count
, -1, 1))
372 spin_lock(&timelines
->lock
);
373 if (atomic_dec_and_test(&tl
->active_count
))
375 spin_unlock(&timelines
->lock
);
378 * Since this timeline is idle, all bariers upon which we were waiting
379 * must also be complete and so we can discard the last used barriers
380 * without loss of information.
382 i915_syncmap_free(&tl
->sync
);
385 static u32
timeline_advance(struct intel_timeline
*tl
)
387 GEM_BUG_ON(!atomic_read(&tl
->pin_count
));
388 GEM_BUG_ON(tl
->seqno
& tl
->has_initial_breadcrumb
);
390 return tl
->seqno
+= 1 + tl
->has_initial_breadcrumb
;
393 static void timeline_rollback(struct intel_timeline
*tl
)
395 tl
->seqno
-= 1 + tl
->has_initial_breadcrumb
;
399 __intel_timeline_get_seqno(struct intel_timeline
*tl
,
400 struct i915_request
*rq
,
403 struct intel_timeline_cacheline
*cl
;
404 unsigned int cacheline
;
405 struct i915_vma
*vma
;
410 * If there is an outstanding GPU reference to this cacheline,
411 * such as it being sampled by a HW semaphore on another timeline,
412 * we cannot wraparound our seqno value (the HW semaphore does
413 * a strict greater-than-or-equals compare, not i915_seqno_passed).
414 * So if the cacheline is still busy, we must detach ourselves
415 * from it and leave it inflight alongside its users.
417 * However, if nobody is watching and we can guarantee that nobody
418 * will, we could simply reuse the same cacheline.
420 * if (i915_active_request_is_signaled(&tl->last_request) &&
421 * i915_active_is_signaled(&tl->hwsp_cacheline->active))
424 * That seems unlikely for a busy timeline that needed to wrap in
425 * the first place, so just replace the cacheline.
428 vma
= hwsp_alloc(tl
, &cacheline
);
434 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
| PIN_HIGH
);
436 __idle_hwsp_free(vma
->private, cacheline
);
440 cl
= cacheline_alloc(vma
->private, cacheline
);
443 __idle_hwsp_free(vma
->private, cacheline
);
446 GEM_BUG_ON(cl
->hwsp
->vma
!= vma
);
449 * Attach the old cacheline to the current request, so that we only
450 * free it after the current request is retired, which ensures that
451 * all writes into the cacheline from previous requests are complete.
453 err
= i915_active_ref(&tl
->hwsp_cacheline
->active
, tl
, &rq
->fence
);
457 cacheline_release(tl
->hwsp_cacheline
); /* ownership now xfered to rq */
458 cacheline_free(tl
->hwsp_cacheline
);
460 i915_vma_unpin(tl
->hwsp_ggtt
); /* binding kept alive by old cacheline */
461 i915_vma_put(tl
->hwsp_ggtt
);
463 tl
->hwsp_ggtt
= i915_vma_get(vma
);
465 vaddr
= page_mask_bits(cl
->vaddr
);
466 tl
->hwsp_offset
= cacheline
* CACHELINE_BYTES
;
468 memset(vaddr
+ tl
->hwsp_offset
, 0, CACHELINE_BYTES
);
470 tl
->hwsp_offset
+= i915_ggtt_offset(vma
);
472 cacheline_acquire(cl
);
473 tl
->hwsp_cacheline
= cl
;
475 *seqno
= timeline_advance(tl
);
476 GEM_BUG_ON(i915_seqno_passed(*tl
->hwsp_seqno
, *seqno
));
484 timeline_rollback(tl
);
488 int intel_timeline_get_seqno(struct intel_timeline
*tl
,
489 struct i915_request
*rq
,
492 *seqno
= timeline_advance(tl
);
494 /* Replace the HWSP on wraparound for HW semaphores */
495 if (unlikely(!*seqno
&& tl
->hwsp_cacheline
))
496 return __intel_timeline_get_seqno(tl
, rq
, seqno
);
501 static int cacheline_ref(struct intel_timeline_cacheline
*cl
,
502 struct i915_request
*rq
)
504 return i915_active_add_request(&cl
->active
, rq
);
507 int intel_timeline_read_hwsp(struct i915_request
*from
,
508 struct i915_request
*to
,
511 struct intel_timeline_cacheline
*cl
;
514 GEM_BUG_ON(!rcu_access_pointer(from
->hwsp_cacheline
));
517 cl
= rcu_dereference(from
->hwsp_cacheline
);
518 if (unlikely(!i915_active_acquire_if_busy(&cl
->active
)))
519 goto unlock
; /* seqno wrapped and completed! */
520 if (unlikely(i915_request_completed(from
)))
524 err
= cacheline_ref(cl
, to
);
528 *hwsp
= i915_ggtt_offset(cl
->hwsp
->vma
) +
529 ptr_unmask_bits(cl
->vaddr
, CACHELINE_BITS
) * CACHELINE_BYTES
;
532 i915_active_release(&cl
->active
);
536 i915_active_release(&cl
->active
);
542 void intel_timeline_unpin(struct intel_timeline
*tl
)
544 GEM_BUG_ON(!atomic_read(&tl
->pin_count
));
545 if (!atomic_dec_and_test(&tl
->pin_count
))
548 cacheline_release(tl
->hwsp_cacheline
);
550 __i915_vma_unpin(tl
->hwsp_ggtt
);
553 void __intel_timeline_free(struct kref
*kref
)
555 struct intel_timeline
*timeline
=
556 container_of(kref
, typeof(*timeline
), kref
);
558 intel_timeline_fini(timeline
);
559 kfree_rcu(timeline
, rcu
);
562 void intel_gt_fini_timelines(struct intel_gt
*gt
)
564 struct intel_gt_timelines
*timelines
= >
->timelines
;
566 GEM_BUG_ON(!list_empty(&timelines
->active_list
));
567 GEM_BUG_ON(!list_empty(&timelines
->hwsp_free_list
));
570 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
571 #include "gt/selftests/mock_timeline.c"
572 #include "gt/selftest_timeline.c"