2 * SPDX-License-Identifier: MIT
4 * Copyright © 2017 Intel Corporation
7 #include <linux/prime_numbers.h>
9 #include "gt/intel_engine_pm.h"
10 #include "gt/intel_gt.h"
11 #include "gt/intel_gt_pm.h"
12 #include "gt/intel_ring.h"
14 #include "i915_selftest.h"
15 #include "selftests/i915_random.h"
18 struct drm_i915_gem_object
*obj
;
19 struct intel_engine_cs
*engine
;
22 static int cpu_set(struct context
*ctx
, unsigned long offset
, u32 v
)
24 unsigned int needs_clflush
;
30 err
= i915_gem_object_prepare_write(ctx
->obj
, &needs_clflush
);
34 page
= i915_gem_object_get_page(ctx
->obj
, offset
>> PAGE_SHIFT
);
35 map
= kmap_atomic(page
);
36 cpu
= map
+ offset_in_page(offset
);
38 if (needs_clflush
& CLFLUSH_BEFORE
)
39 drm_clflush_virt_range(cpu
, sizeof(*cpu
));
43 if (needs_clflush
& CLFLUSH_AFTER
)
44 drm_clflush_virt_range(cpu
, sizeof(*cpu
));
47 i915_gem_object_finish_access(ctx
->obj
);
52 static int cpu_get(struct context
*ctx
, unsigned long offset
, u32
*v
)
54 unsigned int needs_clflush
;
60 err
= i915_gem_object_prepare_read(ctx
->obj
, &needs_clflush
);
64 page
= i915_gem_object_get_page(ctx
->obj
, offset
>> PAGE_SHIFT
);
65 map
= kmap_atomic(page
);
66 cpu
= map
+ offset_in_page(offset
);
68 if (needs_clflush
& CLFLUSH_BEFORE
)
69 drm_clflush_virt_range(cpu
, sizeof(*cpu
));
74 i915_gem_object_finish_access(ctx
->obj
);
79 static int gtt_set(struct context
*ctx
, unsigned long offset
, u32 v
)
85 i915_gem_object_lock(ctx
->obj
);
86 err
= i915_gem_object_set_to_gtt_domain(ctx
->obj
, true);
87 i915_gem_object_unlock(ctx
->obj
);
91 vma
= i915_gem_object_ggtt_pin(ctx
->obj
, NULL
, 0, 0, PIN_MAPPABLE
);
95 intel_gt_pm_get(vma
->vm
->gt
);
97 map
= i915_vma_pin_iomap(vma
);
104 iowrite32(v
, &map
[offset
/ sizeof(*map
)]);
105 i915_vma_unpin_iomap(vma
);
108 intel_gt_pm_put(vma
->vm
->gt
);
112 static int gtt_get(struct context
*ctx
, unsigned long offset
, u32
*v
)
114 struct i915_vma
*vma
;
118 i915_gem_object_lock(ctx
->obj
);
119 err
= i915_gem_object_set_to_gtt_domain(ctx
->obj
, false);
120 i915_gem_object_unlock(ctx
->obj
);
124 vma
= i915_gem_object_ggtt_pin(ctx
->obj
, NULL
, 0, 0, PIN_MAPPABLE
);
128 intel_gt_pm_get(vma
->vm
->gt
);
130 map
= i915_vma_pin_iomap(vma
);
137 *v
= ioread32(&map
[offset
/ sizeof(*map
)]);
138 i915_vma_unpin_iomap(vma
);
141 intel_gt_pm_put(vma
->vm
->gt
);
145 static int wc_set(struct context
*ctx
, unsigned long offset
, u32 v
)
150 i915_gem_object_lock(ctx
->obj
);
151 err
= i915_gem_object_set_to_wc_domain(ctx
->obj
, true);
152 i915_gem_object_unlock(ctx
->obj
);
156 map
= i915_gem_object_pin_map(ctx
->obj
, I915_MAP_WC
);
160 map
[offset
/ sizeof(*map
)] = v
;
161 i915_gem_object_unpin_map(ctx
->obj
);
166 static int wc_get(struct context
*ctx
, unsigned long offset
, u32
*v
)
171 i915_gem_object_lock(ctx
->obj
);
172 err
= i915_gem_object_set_to_wc_domain(ctx
->obj
, false);
173 i915_gem_object_unlock(ctx
->obj
);
177 map
= i915_gem_object_pin_map(ctx
->obj
, I915_MAP_WC
);
181 *v
= map
[offset
/ sizeof(*map
)];
182 i915_gem_object_unpin_map(ctx
->obj
);
187 static int gpu_set(struct context
*ctx
, unsigned long offset
, u32 v
)
189 struct i915_request
*rq
;
190 struct i915_vma
*vma
;
194 i915_gem_object_lock(ctx
->obj
);
195 err
= i915_gem_object_set_to_gtt_domain(ctx
->obj
, true);
196 i915_gem_object_unlock(ctx
->obj
);
200 vma
= i915_gem_object_ggtt_pin(ctx
->obj
, NULL
, 0, 0, 0);
204 rq
= intel_engine_create_kernel_request(ctx
->engine
);
210 cs
= intel_ring_begin(rq
, 4);
212 i915_request_add(rq
);
217 if (INTEL_GEN(ctx
->engine
->i915
) >= 8) {
218 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| 1 << 22;
219 *cs
++ = lower_32_bits(i915_ggtt_offset(vma
) + offset
);
220 *cs
++ = upper_32_bits(i915_ggtt_offset(vma
) + offset
);
222 } else if (INTEL_GEN(ctx
->engine
->i915
) >= 4) {
223 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
225 *cs
++ = i915_ggtt_offset(vma
) + offset
;
228 *cs
++ = MI_STORE_DWORD_IMM
| MI_MEM_VIRTUAL
;
229 *cs
++ = i915_ggtt_offset(vma
) + offset
;
233 intel_ring_advance(rq
, cs
);
236 err
= i915_request_await_object(rq
, vma
->obj
, true);
238 err
= i915_vma_move_to_active(vma
, rq
, EXEC_OBJECT_WRITE
);
239 i915_vma_unlock(vma
);
242 i915_request_add(rq
);
247 static bool always_valid(struct context
*ctx
)
252 static bool needs_fence_registers(struct context
*ctx
)
254 struct intel_gt
*gt
= ctx
->engine
->gt
;
256 if (intel_gt_is_wedged(gt
))
259 return gt
->ggtt
->num_fences
;
262 static bool needs_mi_store_dword(struct context
*ctx
)
264 if (intel_gt_is_wedged(ctx
->engine
->gt
))
267 return intel_engine_can_store_dword(ctx
->engine
);
270 static const struct igt_coherency_mode
{
272 int (*set
)(struct context
*ctx
, unsigned long offset
, u32 v
);
273 int (*get
)(struct context
*ctx
, unsigned long offset
, u32
*v
);
274 bool (*valid
)(struct context
*ctx
);
275 } igt_coherency_mode
[] = {
276 { "cpu", cpu_set
, cpu_get
, always_valid
},
277 { "gtt", gtt_set
, gtt_get
, needs_fence_registers
},
278 { "wc", wc_set
, wc_get
, always_valid
},
279 { "gpu", gpu_set
, NULL
, needs_mi_store_dword
},
283 static struct intel_engine_cs
*
284 random_engine(struct drm_i915_private
*i915
, struct rnd_state
*prng
)
286 struct intel_engine_cs
*engine
;
290 for_each_uabi_engine(engine
, i915
)
293 count
= i915_prandom_u32_max_state(count
, prng
);
294 for_each_uabi_engine(engine
, i915
)
301 static int igt_gem_coherency(void *arg
)
303 const unsigned int ncachelines
= PAGE_SIZE
/64;
304 struct drm_i915_private
*i915
= arg
;
305 const struct igt_coherency_mode
*read
, *write
, *over
;
306 unsigned long count
, n
;
307 u32
*offsets
, *values
;
308 I915_RND_STATE(prng
);
313 * We repeatedly write, overwrite and read from a sequence of
314 * cachelines in order to try and detect incoherency (unflushed writes
315 * from either the CPU or GPU). Each setter/getter uses our cache
316 * domain API which should prevent incoherency.
319 offsets
= kmalloc_array(ncachelines
, 2*sizeof(u32
), GFP_KERNEL
);
322 for (count
= 0; count
< ncachelines
; count
++)
323 offsets
[count
] = count
* 64 + 4 * (count
% 16);
325 values
= offsets
+ ncachelines
;
327 ctx
.engine
= random_engine(i915
, &prng
);
332 pr_info("%s: using %s\n", __func__
, ctx
.engine
->name
);
333 intel_engine_pm_get(ctx
.engine
);
335 for (over
= igt_coherency_mode
; over
->name
; over
++) {
339 if (!over
->valid(&ctx
))
342 for (write
= igt_coherency_mode
; write
->name
; write
++) {
346 if (!write
->valid(&ctx
))
349 for (read
= igt_coherency_mode
; read
->name
; read
++) {
353 if (!read
->valid(&ctx
))
356 for_each_prime_number_from(count
, 1, ncachelines
) {
357 ctx
.obj
= i915_gem_object_create_internal(i915
, PAGE_SIZE
);
358 if (IS_ERR(ctx
.obj
)) {
359 err
= PTR_ERR(ctx
.obj
);
363 i915_random_reorder(offsets
, ncachelines
, &prng
);
364 for (n
= 0; n
< count
; n
++)
365 values
[n
] = prandom_u32_state(&prng
);
367 for (n
= 0; n
< count
; n
++) {
368 err
= over
->set(&ctx
, offsets
[n
], ~values
[n
]);
370 pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n",
371 n
, count
, over
->name
, err
);
376 for (n
= 0; n
< count
; n
++) {
377 err
= write
->set(&ctx
, offsets
[n
], values
[n
]);
379 pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n",
380 n
, count
, write
->name
, err
);
385 for (n
= 0; n
< count
; n
++) {
388 err
= read
->get(&ctx
, offsets
[n
], &found
);
390 pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n",
391 n
, count
, read
->name
, err
);
395 if (found
!= values
[n
]) {
396 pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n",
397 n
, count
, over
->name
,
398 write
->name
, values
[n
],
400 ~values
[n
], offsets
[n
]);
406 i915_gem_object_put(ctx
.obj
);
412 intel_engine_pm_put(ctx
.engine
);
418 i915_gem_object_put(ctx
.obj
);
422 int i915_gem_coherency_live_selftests(struct drm_i915_private
*i915
)
424 static const struct i915_subtest tests
[] = {
425 SUBTEST(igt_gem_coherency
),
428 return i915_subtests(tests
, i915
);