2 * SPDX-License-Identifier: MIT
4 * Copyright © 2019 Intel Corporation
7 #include <linux/kref.h>
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_gt.h"
12 #include "i915_selftest.h"
14 #include "igt_flush_test.h"
15 #include "lib_sw_fence.h"
17 static struct i915_perf_stream
*
18 test_stream(struct i915_perf
*perf
)
20 struct drm_i915_perf_open_param param
= {};
21 struct perf_open_properties props
= {
22 .engine
= intel_engine_lookup_user(perf
->i915
,
23 I915_ENGINE_CLASS_RENDER
,
25 .sample_flags
= SAMPLE_OA_REPORT
,
26 .oa_format
= IS_GEN(perf
->i915
, 12) ?
27 I915_OA_FORMAT_A32u40_A4u32_B8_C8
: I915_OA_FORMAT_C4_B8
,
30 struct i915_perf_stream
*stream
;
32 stream
= kzalloc(sizeof(*stream
), GFP_KERNEL
);
38 mutex_lock(&perf
->lock
);
39 if (i915_oa_stream_init(stream
, ¶m
, &props
)) {
43 mutex_unlock(&perf
->lock
);
48 static void stream_destroy(struct i915_perf_stream
*stream
)
50 struct i915_perf
*perf
= stream
->perf
;
52 mutex_lock(&perf
->lock
);
53 i915_perf_destroy_locked(stream
);
54 mutex_unlock(&perf
->lock
);
57 static int live_sanitycheck(void *arg
)
59 struct drm_i915_private
*i915
= arg
;
60 struct i915_perf_stream
*stream
;
62 /* Quick check we can create a perf stream */
64 stream
= test_stream(&i915
->perf
);
68 stream_destroy(stream
);
72 static int write_timestamp(struct i915_request
*rq
, int slot
)
77 cs
= intel_ring_begin(rq
, 6);
82 if (INTEL_GEN(rq
->i915
) >= 8)
85 *cs
++ = GFX_OP_PIPE_CONTROL(len
);
86 *cs
++ = PIPE_CONTROL_GLOBAL_GTT_IVB
|
87 PIPE_CONTROL_STORE_DATA_INDEX
|
88 PIPE_CONTROL_WRITE_TIMESTAMP
;
89 *cs
++ = slot
* sizeof(u32
);
94 intel_ring_advance(rq
, cs
);
99 static ktime_t
poll_status(struct i915_request
*rq
, int slot
)
101 while (!intel_read_status_page(rq
->engine
, slot
) &&
102 !i915_request_completed(rq
))
108 static int live_noa_delay(void *arg
)
110 struct drm_i915_private
*i915
= arg
;
111 struct i915_perf_stream
*stream
;
112 struct i915_request
*rq
;
119 /* Check that the GPU delays matches expectations */
121 stream
= test_stream(&i915
->perf
);
125 expected
= atomic64_read(&stream
->perf
->noa_programming_delay
);
127 if (stream
->engine
->class != RENDER_CLASS
) {
132 for (i
= 0; i
< 4; i
++)
133 intel_write_status_page(stream
->engine
, 0x100 + i
, 0);
135 rq
= intel_engine_create_kernel_request(stream
->engine
);
141 if (rq
->engine
->emit_init_breadcrumb
&&
142 i915_request_timeline(rq
)->has_initial_breadcrumb
) {
143 err
= rq
->engine
->emit_init_breadcrumb(rq
);
145 i915_request_add(rq
);
150 err
= write_timestamp(rq
, 0x100);
152 i915_request_add(rq
);
156 err
= rq
->engine
->emit_bb_start(rq
,
157 i915_ggtt_offset(stream
->noa_wait
), 0,
158 I915_DISPATCH_SECURE
);
160 i915_request_add(rq
);
164 err
= write_timestamp(rq
, 0x102);
166 i915_request_add(rq
);
170 i915_request_get(rq
);
171 i915_request_add(rq
);
174 t0
= poll_status(rq
, 0x100);
175 t1
= poll_status(rq
, 0x102);
178 pr_info("CPU delay: %lluns, expected %lluns\n",
179 ktime_sub(t1
, t0
), expected
);
181 delay
= intel_read_status_page(stream
->engine
, 0x102);
182 delay
-= intel_read_status_page(stream
->engine
, 0x100);
183 delay
= div_u64(mul_u32_u32(delay
, 1000 * 1000),
184 RUNTIME_INFO(i915
)->cs_timestamp_frequency_khz
);
185 pr_info("GPU delay: %uns, expected %lluns\n",
188 if (4 * delay
< 3 * expected
|| 2 * delay
> 3 * expected
) {
189 pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
191 div_u64(3 * expected
, 4000),
192 div_u64(3 * expected
, 2000));
196 i915_request_put(rq
);
198 stream_destroy(stream
);
202 int i915_perf_live_selftests(struct drm_i915_private
*i915
)
204 static const struct i915_subtest tests
[] = {
205 SUBTEST(live_sanitycheck
),
206 SUBTEST(live_noa_delay
),
208 struct i915_perf
*perf
= &i915
->perf
;
210 if (!perf
->metrics_kobj
|| !perf
->ops
.enable_metric_set
)
213 if (intel_gt_is_wedged(&i915
->gt
))
216 return i915_subtests(tests
, i915
);