2 * SPDX-License-Identifier: MIT
4 * Copyright © 2017 Intel Corporation
7 #include <linux/prime_numbers.h>
9 #include "gt/intel_engine_pm.h"
10 #include "gt/intel_gt.h"
11 #include "gt/intel_gt_pm.h"
12 #include "gt/intel_ring.h"
14 #include "i915_selftest.h"
15 #include "selftests/i915_random.h"
18 struct drm_i915_gem_object
*obj
;
19 struct intel_engine_cs
*engine
;
22 static int cpu_set(struct context
*ctx
, unsigned long offset
, u32 v
)
24 unsigned int needs_clflush
;
30 i915_gem_object_lock(ctx
->obj
, NULL
);
31 err
= i915_gem_object_prepare_write(ctx
->obj
, &needs_clflush
);
35 page
= i915_gem_object_get_page(ctx
->obj
, offset
>> PAGE_SHIFT
);
36 map
= kmap_atomic(page
);
37 cpu
= map
+ offset_in_page(offset
);
39 if (needs_clflush
& CLFLUSH_BEFORE
)
40 drm_clflush_virt_range(cpu
, sizeof(*cpu
));
44 if (needs_clflush
& CLFLUSH_AFTER
)
45 drm_clflush_virt_range(cpu
, sizeof(*cpu
));
48 i915_gem_object_finish_access(ctx
->obj
);
51 i915_gem_object_unlock(ctx
->obj
);
55 static int cpu_get(struct context
*ctx
, unsigned long offset
, u32
*v
)
57 unsigned int needs_clflush
;
63 i915_gem_object_lock(ctx
->obj
, NULL
);
64 err
= i915_gem_object_prepare_read(ctx
->obj
, &needs_clflush
);
68 page
= i915_gem_object_get_page(ctx
->obj
, offset
>> PAGE_SHIFT
);
69 map
= kmap_atomic(page
);
70 cpu
= map
+ offset_in_page(offset
);
72 if (needs_clflush
& CLFLUSH_BEFORE
)
73 drm_clflush_virt_range(cpu
, sizeof(*cpu
));
78 i915_gem_object_finish_access(ctx
->obj
);
81 i915_gem_object_unlock(ctx
->obj
);
85 static int gtt_set(struct context
*ctx
, unsigned long offset
, u32 v
)
91 i915_gem_object_lock(ctx
->obj
, NULL
);
92 err
= i915_gem_object_set_to_gtt_domain(ctx
->obj
, true);
93 i915_gem_object_unlock(ctx
->obj
);
97 vma
= i915_gem_object_ggtt_pin(ctx
->obj
, NULL
, 0, 0, PIN_MAPPABLE
);
101 intel_gt_pm_get(vma
->vm
->gt
);
103 map
= i915_vma_pin_iomap(vma
);
110 iowrite32(v
, &map
[offset
/ sizeof(*map
)]);
111 i915_vma_unpin_iomap(vma
);
114 intel_gt_pm_put(vma
->vm
->gt
);
118 static int gtt_get(struct context
*ctx
, unsigned long offset
, u32
*v
)
120 struct i915_vma
*vma
;
124 i915_gem_object_lock(ctx
->obj
, NULL
);
125 err
= i915_gem_object_set_to_gtt_domain(ctx
->obj
, false);
126 i915_gem_object_unlock(ctx
->obj
);
130 vma
= i915_gem_object_ggtt_pin(ctx
->obj
, NULL
, 0, 0, PIN_MAPPABLE
);
134 intel_gt_pm_get(vma
->vm
->gt
);
136 map
= i915_vma_pin_iomap(vma
);
143 *v
= ioread32(&map
[offset
/ sizeof(*map
)]);
144 i915_vma_unpin_iomap(vma
);
147 intel_gt_pm_put(vma
->vm
->gt
);
151 static int wc_set(struct context
*ctx
, unsigned long offset
, u32 v
)
156 i915_gem_object_lock(ctx
->obj
, NULL
);
157 err
= i915_gem_object_set_to_wc_domain(ctx
->obj
, true);
158 i915_gem_object_unlock(ctx
->obj
);
162 map
= i915_gem_object_pin_map(ctx
->obj
, I915_MAP_WC
);
166 map
[offset
/ sizeof(*map
)] = v
;
168 __i915_gem_object_flush_map(ctx
->obj
, offset
, sizeof(*map
));
169 i915_gem_object_unpin_map(ctx
->obj
);
174 static int wc_get(struct context
*ctx
, unsigned long offset
, u32
*v
)
179 i915_gem_object_lock(ctx
->obj
, NULL
);
180 err
= i915_gem_object_set_to_wc_domain(ctx
->obj
, false);
181 i915_gem_object_unlock(ctx
->obj
);
185 map
= i915_gem_object_pin_map(ctx
->obj
, I915_MAP_WC
);
189 *v
= map
[offset
/ sizeof(*map
)];
190 i915_gem_object_unpin_map(ctx
->obj
);
195 static int gpu_set(struct context
*ctx
, unsigned long offset
, u32 v
)
197 struct i915_request
*rq
;
198 struct i915_vma
*vma
;
202 i915_gem_object_lock(ctx
->obj
, NULL
);
203 err
= i915_gem_object_set_to_gtt_domain(ctx
->obj
, true);
207 vma
= i915_gem_object_ggtt_pin(ctx
->obj
, NULL
, 0, 0, 0);
213 rq
= intel_engine_create_kernel_request(ctx
->engine
);
219 cs
= intel_ring_begin(rq
, 4);
225 if (INTEL_GEN(ctx
->engine
->i915
) >= 8) {
226 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| 1 << 22;
227 *cs
++ = lower_32_bits(i915_ggtt_offset(vma
) + offset
);
228 *cs
++ = upper_32_bits(i915_ggtt_offset(vma
) + offset
);
230 } else if (INTEL_GEN(ctx
->engine
->i915
) >= 4) {
231 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
233 *cs
++ = i915_ggtt_offset(vma
) + offset
;
236 *cs
++ = MI_STORE_DWORD_IMM
| MI_MEM_VIRTUAL
;
237 *cs
++ = i915_ggtt_offset(vma
) + offset
;
241 intel_ring_advance(rq
, cs
);
243 err
= i915_request_await_object(rq
, vma
->obj
, true);
245 err
= i915_vma_move_to_active(vma
, rq
, EXEC_OBJECT_WRITE
);
248 i915_request_add(rq
);
252 i915_gem_object_unlock(ctx
->obj
);
257 static bool always_valid(struct context
*ctx
)
262 static bool needs_fence_registers(struct context
*ctx
)
264 struct intel_gt
*gt
= ctx
->engine
->gt
;
266 if (intel_gt_is_wedged(gt
))
269 return gt
->ggtt
->num_fences
;
272 static bool needs_mi_store_dword(struct context
*ctx
)
274 if (intel_gt_is_wedged(ctx
->engine
->gt
))
277 return intel_engine_can_store_dword(ctx
->engine
);
280 static const struct igt_coherency_mode
{
282 int (*set
)(struct context
*ctx
, unsigned long offset
, u32 v
);
283 int (*get
)(struct context
*ctx
, unsigned long offset
, u32
*v
);
284 bool (*valid
)(struct context
*ctx
);
285 } igt_coherency_mode
[] = {
286 { "cpu", cpu_set
, cpu_get
, always_valid
},
287 { "gtt", gtt_set
, gtt_get
, needs_fence_registers
},
288 { "wc", wc_set
, wc_get
, always_valid
},
289 { "gpu", gpu_set
, NULL
, needs_mi_store_dword
},
293 static struct intel_engine_cs
*
294 random_engine(struct drm_i915_private
*i915
, struct rnd_state
*prng
)
296 struct intel_engine_cs
*engine
;
300 for_each_uabi_engine(engine
, i915
)
303 count
= i915_prandom_u32_max_state(count
, prng
);
304 for_each_uabi_engine(engine
, i915
)
311 static int igt_gem_coherency(void *arg
)
313 const unsigned int ncachelines
= PAGE_SIZE
/64;
314 struct drm_i915_private
*i915
= arg
;
315 const struct igt_coherency_mode
*read
, *write
, *over
;
316 unsigned long count
, n
;
317 u32
*offsets
, *values
;
318 I915_RND_STATE(prng
);
323 * We repeatedly write, overwrite and read from a sequence of
324 * cachelines in order to try and detect incoherency (unflushed writes
325 * from either the CPU or GPU). Each setter/getter uses our cache
326 * domain API which should prevent incoherency.
329 offsets
= kmalloc_array(ncachelines
, 2*sizeof(u32
), GFP_KERNEL
);
332 for (count
= 0; count
< ncachelines
; count
++)
333 offsets
[count
] = count
* 64 + 4 * (count
% 16);
335 values
= offsets
+ ncachelines
;
337 ctx
.engine
= random_engine(i915
, &prng
);
342 pr_info("%s: using %s\n", __func__
, ctx
.engine
->name
);
343 intel_engine_pm_get(ctx
.engine
);
345 for (over
= igt_coherency_mode
; over
->name
; over
++) {
349 if (!over
->valid(&ctx
))
352 for (write
= igt_coherency_mode
; write
->name
; write
++) {
356 if (!write
->valid(&ctx
))
359 for (read
= igt_coherency_mode
; read
->name
; read
++) {
363 if (!read
->valid(&ctx
))
366 for_each_prime_number_from(count
, 1, ncachelines
) {
367 ctx
.obj
= i915_gem_object_create_internal(i915
, PAGE_SIZE
);
368 if (IS_ERR(ctx
.obj
)) {
369 err
= PTR_ERR(ctx
.obj
);
373 i915_random_reorder(offsets
, ncachelines
, &prng
);
374 for (n
= 0; n
< count
; n
++)
375 values
[n
] = prandom_u32_state(&prng
);
377 for (n
= 0; n
< count
; n
++) {
378 err
= over
->set(&ctx
, offsets
[n
], ~values
[n
]);
380 pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n",
381 n
, count
, over
->name
, err
);
386 for (n
= 0; n
< count
; n
++) {
387 err
= write
->set(&ctx
, offsets
[n
], values
[n
]);
389 pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n",
390 n
, count
, write
->name
, err
);
395 for (n
= 0; n
< count
; n
++) {
398 err
= read
->get(&ctx
, offsets
[n
], &found
);
400 pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n",
401 n
, count
, read
->name
, err
);
405 if (found
!= values
[n
]) {
406 pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n",
407 n
, count
, over
->name
,
408 write
->name
, values
[n
],
410 ~values
[n
], offsets
[n
]);
416 i915_gem_object_put(ctx
.obj
);
422 intel_engine_pm_put(ctx
.engine
);
428 i915_gem_object_put(ctx
.obj
);
432 int i915_gem_coherency_live_selftests(struct drm_i915_private
*i915
)
434 static const struct i915_subtest tests
[] = {
435 SUBTEST(igt_gem_coherency
),
438 return i915_subtests(tests
, i915
);