2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include <linux/prime_numbers.h>
27 #include "../i915_selftest.h"
28 #include "i915_random.h"
30 static int cpu_set(struct drm_i915_gem_object
*obj
,
34 unsigned int needs_clflush
;
39 err
= i915_gem_obj_prepare_shmem_write(obj
, &needs_clflush
);
43 page
= i915_gem_object_get_page(obj
, offset
>> PAGE_SHIFT
);
44 map
= kmap_atomic(page
);
45 if (needs_clflush
& CLFLUSH_BEFORE
)
46 clflush(map
+offset_in_page(offset
) / sizeof(*map
));
47 map
[offset_in_page(offset
) / sizeof(*map
)] = v
;
48 if (needs_clflush
& CLFLUSH_AFTER
)
49 clflush(map
+offset_in_page(offset
) / sizeof(*map
));
52 i915_gem_obj_finish_shmem_access(obj
);
56 static int cpu_get(struct drm_i915_gem_object
*obj
,
60 unsigned int needs_clflush
;
65 err
= i915_gem_obj_prepare_shmem_read(obj
, &needs_clflush
);
69 page
= i915_gem_object_get_page(obj
, offset
>> PAGE_SHIFT
);
70 map
= kmap_atomic(page
);
71 if (needs_clflush
& CLFLUSH_BEFORE
)
72 clflush(map
+offset_in_page(offset
) / sizeof(*map
));
73 *v
= map
[offset_in_page(offset
) / sizeof(*map
)];
76 i915_gem_obj_finish_shmem_access(obj
);
80 static int gtt_set(struct drm_i915_gem_object
*obj
,
88 err
= i915_gem_object_set_to_gtt_domain(obj
, true);
92 vma
= i915_gem_object_ggtt_pin(obj
, NULL
, 0, 0, PIN_MAPPABLE
);
96 map
= i915_vma_pin_iomap(vma
);
101 iowrite32(v
, &map
[offset
/ sizeof(*map
)]);
102 i915_vma_unpin_iomap(vma
);
107 static int gtt_get(struct drm_i915_gem_object
*obj
,
108 unsigned long offset
,
111 struct i915_vma
*vma
;
115 err
= i915_gem_object_set_to_gtt_domain(obj
, false);
119 vma
= i915_gem_object_ggtt_pin(obj
, NULL
, 0, 0, PIN_MAPPABLE
);
123 map
= i915_vma_pin_iomap(vma
);
128 *v
= ioread32(&map
[offset
/ sizeof(*map
)]);
129 i915_vma_unpin_iomap(vma
);
134 static int wc_set(struct drm_i915_gem_object
*obj
,
135 unsigned long offset
,
141 err
= i915_gem_object_set_to_wc_domain(obj
, true);
145 map
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
149 map
[offset
/ sizeof(*map
)] = v
;
150 i915_gem_object_unpin_map(obj
);
155 static int wc_get(struct drm_i915_gem_object
*obj
,
156 unsigned long offset
,
162 err
= i915_gem_object_set_to_wc_domain(obj
, false);
166 map
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
170 *v
= map
[offset
/ sizeof(*map
)];
171 i915_gem_object_unpin_map(obj
);
176 static int gpu_set(struct drm_i915_gem_object
*obj
,
177 unsigned long offset
,
180 struct drm_i915_private
*i915
= to_i915(obj
->base
.dev
);
181 struct drm_i915_gem_request
*rq
;
182 struct i915_vma
*vma
;
186 err
= i915_gem_object_set_to_gtt_domain(obj
, true);
190 vma
= i915_gem_object_ggtt_pin(obj
, NULL
, 0, 0, 0);
194 rq
= i915_gem_request_alloc(i915
->engine
[RCS
], i915
->kernel_context
);
200 cs
= intel_ring_begin(rq
, 4);
202 __i915_add_request(rq
, false);
207 if (INTEL_GEN(i915
) >= 8) {
208 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| 1 << 22;
209 *cs
++ = lower_32_bits(i915_ggtt_offset(vma
) + offset
);
210 *cs
++ = upper_32_bits(i915_ggtt_offset(vma
) + offset
);
212 } else if (INTEL_GEN(i915
) >= 4) {
213 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| 1 << 22;
215 *cs
++ = i915_ggtt_offset(vma
) + offset
;
218 *cs
++ = MI_STORE_DWORD_IMM
| 1 << 22;
219 *cs
++ = i915_ggtt_offset(vma
) + offset
;
223 intel_ring_advance(rq
, cs
);
225 i915_vma_move_to_active(vma
, rq
, EXEC_OBJECT_WRITE
);
228 reservation_object_lock(obj
->resv
, NULL
);
229 reservation_object_add_excl_fence(obj
->resv
, &rq
->fence
);
230 reservation_object_unlock(obj
->resv
);
232 __i915_add_request(rq
, true);
237 static bool always_valid(struct drm_i915_private
*i915
)
242 static bool needs_mi_store_dword(struct drm_i915_private
*i915
)
244 return intel_engine_can_store_dword(i915
->engine
[RCS
]);
247 static const struct igt_coherency_mode
{
249 int (*set
)(struct drm_i915_gem_object
*, unsigned long offset
, u32 v
);
250 int (*get
)(struct drm_i915_gem_object
*, unsigned long offset
, u32
*v
);
251 bool (*valid
)(struct drm_i915_private
*i915
);
252 } igt_coherency_mode
[] = {
253 { "cpu", cpu_set
, cpu_get
, always_valid
},
254 { "gtt", gtt_set
, gtt_get
, always_valid
},
255 { "wc", wc_set
, wc_get
, always_valid
},
256 { "gpu", gpu_set
, NULL
, needs_mi_store_dword
},
260 static int igt_gem_coherency(void *arg
)
262 const unsigned int ncachelines
= PAGE_SIZE
/64;
263 I915_RND_STATE(prng
);
264 struct drm_i915_private
*i915
= arg
;
265 const struct igt_coherency_mode
*read
, *write
, *over
;
266 struct drm_i915_gem_object
*obj
;
267 unsigned long count
, n
;
268 u32
*offsets
, *values
;
271 /* We repeatedly write, overwrite and read from a sequence of
272 * cachelines in order to try and detect incoherency (unflushed writes
273 * from either the CPU or GPU). Each setter/getter uses our cache
274 * domain API which should prevent incoherency.
277 offsets
= kmalloc_array(ncachelines
, 2*sizeof(u32
), GFP_KERNEL
);
280 for (count
= 0; count
< ncachelines
; count
++)
281 offsets
[count
] = count
* 64 + 4 * (count
% 16);
283 values
= offsets
+ ncachelines
;
285 mutex_lock(&i915
->drm
.struct_mutex
);
286 for (over
= igt_coherency_mode
; over
->name
; over
++) {
290 if (!over
->valid(i915
))
293 for (write
= igt_coherency_mode
; write
->name
; write
++) {
297 if (!write
->valid(i915
))
300 for (read
= igt_coherency_mode
; read
->name
; read
++) {
304 if (!read
->valid(i915
))
307 for_each_prime_number_from(count
, 1, ncachelines
) {
308 obj
= i915_gem_object_create_internal(i915
, PAGE_SIZE
);
314 i915_random_reorder(offsets
, ncachelines
, &prng
);
315 for (n
= 0; n
< count
; n
++)
316 values
[n
] = prandom_u32_state(&prng
);
318 for (n
= 0; n
< count
; n
++) {
319 err
= over
->set(obj
, offsets
[n
], ~values
[n
]);
321 pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n",
322 n
, count
, over
->name
, err
);
327 for (n
= 0; n
< count
; n
++) {
328 err
= write
->set(obj
, offsets
[n
], values
[n
]);
330 pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n",
331 n
, count
, write
->name
, err
);
336 for (n
= 0; n
< count
; n
++) {
339 err
= read
->get(obj
, offsets
[n
], &found
);
341 pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n",
342 n
, count
, read
->name
, err
);
346 if (found
!= values
[n
]) {
347 pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n",
348 n
, count
, over
->name
,
349 write
->name
, values
[n
],
351 ~values
[n
], offsets
[n
]);
357 __i915_gem_object_release_unless_active(obj
);
363 mutex_unlock(&i915
->drm
.struct_mutex
);
368 __i915_gem_object_release_unless_active(obj
);
372 int i915_gem_coherency_live_selftests(struct drm_i915_private
*i915
)
374 static const struct i915_subtest tests
[] = {
375 SUBTEST(igt_gem_coherency
),
378 return i915_subtests(tests
, i915
);