2 * SPDX-License-Identifier: MIT
4 * Copyright © 2019 Intel Corporation
7 #include <linux/kref.h>
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_gt.h"
12 #include "i915_selftest.h"
14 #include "igt_flush_test.h"
15 #include "lib_sw_fence.h"
17 #define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab"
20 alloc_empty_config(struct i915_perf
*perf
)
22 struct i915_oa_config
*oa_config
;
24 oa_config
= kzalloc(sizeof(*oa_config
), GFP_KERNEL
);
28 oa_config
->perf
= perf
;
29 kref_init(&oa_config
->ref
);
31 strlcpy(oa_config
->uuid
, TEST_OA_CONFIG_UUID
, sizeof(oa_config
->uuid
));
33 mutex_lock(&perf
->metrics_lock
);
35 oa_config
->id
= idr_alloc(&perf
->metrics_idr
, oa_config
, 2, 0, GFP_KERNEL
);
36 if (oa_config
->id
< 0) {
37 mutex_unlock(&perf
->metrics_lock
);
38 i915_oa_config_put(oa_config
);
42 mutex_unlock(&perf
->metrics_lock
);
48 destroy_empty_config(struct i915_perf
*perf
)
50 struct i915_oa_config
*oa_config
= NULL
, *tmp
;
53 mutex_lock(&perf
->metrics_lock
);
55 idr_for_each_entry(&perf
->metrics_idr
, tmp
, id
) {
56 if (!strcmp(tmp
->uuid
, TEST_OA_CONFIG_UUID
)) {
63 idr_remove(&perf
->metrics_idr
, oa_config
->id
);
65 mutex_unlock(&perf
->metrics_lock
);
68 i915_oa_config_put(oa_config
);
71 static struct i915_oa_config
*
72 get_empty_config(struct i915_perf
*perf
)
74 struct i915_oa_config
*oa_config
= NULL
, *tmp
;
77 mutex_lock(&perf
->metrics_lock
);
79 idr_for_each_entry(&perf
->metrics_idr
, tmp
, id
) {
80 if (!strcmp(tmp
->uuid
, TEST_OA_CONFIG_UUID
)) {
81 oa_config
= i915_oa_config_get(tmp
);
86 mutex_unlock(&perf
->metrics_lock
);
91 static struct i915_perf_stream
*
92 test_stream(struct i915_perf
*perf
)
94 struct drm_i915_perf_open_param param
= {};
95 struct i915_oa_config
*oa_config
= get_empty_config(perf
);
96 struct perf_open_properties props
= {
97 .engine
= intel_engine_lookup_user(perf
->i915
,
98 I915_ENGINE_CLASS_RENDER
,
100 .sample_flags
= SAMPLE_OA_REPORT
,
101 .oa_format
= IS_GEN(perf
->i915
, 12) ?
102 I915_OA_FORMAT_A32u40_A4u32_B8_C8
: I915_OA_FORMAT_C4_B8
,
104 struct i915_perf_stream
*stream
;
109 props
.metrics_set
= oa_config
->id
;
111 stream
= kzalloc(sizeof(*stream
), GFP_KERNEL
);
113 i915_oa_config_put(oa_config
);
119 mutex_lock(&perf
->lock
);
120 if (i915_oa_stream_init(stream
, ¶m
, &props
)) {
124 mutex_unlock(&perf
->lock
);
126 i915_oa_config_put(oa_config
);
131 static void stream_destroy(struct i915_perf_stream
*stream
)
133 struct i915_perf
*perf
= stream
->perf
;
135 mutex_lock(&perf
->lock
);
136 i915_perf_destroy_locked(stream
);
137 mutex_unlock(&perf
->lock
);
140 static int live_sanitycheck(void *arg
)
142 struct drm_i915_private
*i915
= arg
;
143 struct i915_perf_stream
*stream
;
145 /* Quick check we can create a perf stream */
147 stream
= test_stream(&i915
->perf
);
151 stream_destroy(stream
);
155 static int write_timestamp(struct i915_request
*rq
, int slot
)
160 cs
= intel_ring_begin(rq
, 6);
165 if (INTEL_GEN(rq
->engine
->i915
) >= 8)
168 *cs
++ = GFX_OP_PIPE_CONTROL(len
);
169 *cs
++ = PIPE_CONTROL_GLOBAL_GTT_IVB
|
170 PIPE_CONTROL_STORE_DATA_INDEX
|
171 PIPE_CONTROL_WRITE_TIMESTAMP
;
172 *cs
++ = slot
* sizeof(u32
);
177 intel_ring_advance(rq
, cs
);
182 static ktime_t
poll_status(struct i915_request
*rq
, int slot
)
184 while (!intel_read_status_page(rq
->engine
, slot
) &&
185 !i915_request_completed(rq
))
191 static int live_noa_delay(void *arg
)
193 struct drm_i915_private
*i915
= arg
;
194 struct i915_perf_stream
*stream
;
195 struct i915_request
*rq
;
202 /* Check that the GPU delays matches expectations */
204 stream
= test_stream(&i915
->perf
);
208 expected
= atomic64_read(&stream
->perf
->noa_programming_delay
);
210 if (stream
->engine
->class != RENDER_CLASS
) {
215 for (i
= 0; i
< 4; i
++)
216 intel_write_status_page(stream
->engine
, 0x100 + i
, 0);
218 rq
= intel_engine_create_kernel_request(stream
->engine
);
224 if (rq
->engine
->emit_init_breadcrumb
) {
225 err
= rq
->engine
->emit_init_breadcrumb(rq
);
227 i915_request_add(rq
);
232 err
= write_timestamp(rq
, 0x100);
234 i915_request_add(rq
);
238 err
= rq
->engine
->emit_bb_start(rq
,
239 i915_ggtt_offset(stream
->noa_wait
), 0,
240 I915_DISPATCH_SECURE
);
242 i915_request_add(rq
);
246 err
= write_timestamp(rq
, 0x102);
248 i915_request_add(rq
);
252 i915_request_get(rq
);
253 i915_request_add(rq
);
256 t0
= poll_status(rq
, 0x100);
257 t1
= poll_status(rq
, 0x102);
260 pr_info("CPU delay: %lluns, expected %lluns\n",
261 ktime_sub(t1
, t0
), expected
);
263 delay
= intel_read_status_page(stream
->engine
, 0x102);
264 delay
-= intel_read_status_page(stream
->engine
, 0x100);
265 delay
= i915_cs_timestamp_ticks_to_ns(i915
, delay
);
266 pr_info("GPU delay: %uns, expected %lluns\n",
269 if (4 * delay
< 3 * expected
|| 2 * delay
> 3 * expected
) {
270 pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
272 div_u64(3 * expected
, 4000),
273 div_u64(3 * expected
, 2000));
277 i915_request_put(rq
);
279 stream_destroy(stream
);
283 static int live_noa_gpr(void *arg
)
285 struct drm_i915_private
*i915
= arg
;
286 struct i915_perf_stream
*stream
;
287 struct intel_context
*ce
;
288 struct i915_request
*rq
;
295 /* Check that the delay does not clobber user context state (GPR) */
297 stream
= test_stream(&i915
->perf
);
301 gpr0
= i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream
->engine
->mmio_base
, 0));
303 ce
= intel_context_create(stream
->engine
);
309 /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
310 scratch
= kmap(__px_page(ce
->vm
->scratch
[0]));
311 memset(scratch
, POISON_FREE
, PAGE_SIZE
);
313 rq
= intel_context_create_request(ce
);
318 i915_request_get(rq
);
320 if (rq
->engine
->emit_init_breadcrumb
) {
321 err
= rq
->engine
->emit_init_breadcrumb(rq
);
323 i915_request_add(rq
);
328 /* Fill the 16 qword [32 dword] GPR with a known unlikely value */
329 cs
= intel_ring_begin(rq
, 2 * 32 + 2);
332 i915_request_add(rq
);
336 *cs
++ = MI_LOAD_REGISTER_IMM(32);
337 for (i
= 0; i
< 32; i
++) {
338 *cs
++ = gpr0
+ i
* sizeof(u32
);
342 intel_ring_advance(rq
, cs
);
344 /* Execute the GPU delay */
345 err
= rq
->engine
->emit_bb_start(rq
,
346 i915_ggtt_offset(stream
->noa_wait
), 0,
347 I915_DISPATCH_SECURE
);
349 i915_request_add(rq
);
353 /* Read the GPR back, using the pinned global HWSP for convenience */
354 store
= memset32(rq
->engine
->status_page
.addr
+ 512, 0, 32);
355 for (i
= 0; i
< 32; i
++) {
358 cs
= intel_ring_begin(rq
, 4);
361 i915_request_add(rq
);
365 cmd
= MI_STORE_REGISTER_MEM
;
366 if (INTEL_GEN(i915
) >= 8)
371 *cs
++ = gpr0
+ i
* sizeof(u32
);
372 *cs
++ = i915_ggtt_offset(rq
->engine
->status_page
.vma
) +
373 offset_in_page(store
) +
376 intel_ring_advance(rq
, cs
);
379 i915_request_add(rq
);
381 if (i915_request_wait(rq
, I915_WAIT_INTERRUPTIBLE
, HZ
/ 2) < 0) {
382 pr_err("noa_wait timed out\n");
383 intel_gt_set_wedged(stream
->engine
->gt
);
388 /* Verify that the GPR contain our expected values */
389 for (i
= 0; i
< 32; i
++) {
390 if (store
[i
] == STACK_MAGIC
)
393 pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n",
394 i
, store
[i
], STACK_MAGIC
);
398 /* Verify that the user's scratch page was not used for GPR storage */
399 if (memchr_inv(scratch
, POISON_FREE
, PAGE_SIZE
)) {
400 pr_err("Scratch page overwritten!\n");
401 igt_hexdump(scratch
, 4096);
406 i915_request_put(rq
);
408 kunmap(__px_page(ce
->vm
->scratch
[0]));
409 intel_context_put(ce
);
411 stream_destroy(stream
);
415 int i915_perf_live_selftests(struct drm_i915_private
*i915
)
417 static const struct i915_subtest tests
[] = {
418 SUBTEST(live_sanitycheck
),
419 SUBTEST(live_noa_delay
),
420 SUBTEST(live_noa_gpr
),
422 struct i915_perf
*perf
= &i915
->perf
;
425 if (!perf
->metrics_kobj
|| !perf
->ops
.enable_metric_set
)
428 if (intel_gt_is_wedged(&i915
->gt
))
431 err
= alloc_empty_config(&i915
->perf
);
435 err
= i915_subtests(tests
, i915
);
437 destroy_empty_config(&i915
->perf
);