1 // SPDX-License-Identifier: MIT
3 * Copyright © 2019 Intel Corporation
6 #include <linux/sort.h>
8 #include "gt/intel_gt.h"
9 #include "gt/intel_engine_user.h"
11 #include "i915_selftest.h"
13 #include "gem/i915_gem_context.h"
14 #include "selftests/igt_flush_test.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/mock_drm.h"
17 #include "huge_gem_object.h"
18 #include "mock_context.h"
20 static int wrap_ktime_compare(const void *A
, const void *B
)
22 const ktime_t
*a
= A
, *b
= B
;
24 return ktime_compare(*a
, *b
);
27 static int __perf_fill_blt(struct drm_i915_gem_object
*obj
)
29 struct drm_i915_private
*i915
= to_i915(obj
->base
.dev
);
33 struct intel_engine_cs
*engine
;
38 engine
= intel_engine_lookup_user(i915
,
39 I915_ENGINE_CLASS_COPY
,
44 intel_engine_pm_get(engine
);
45 for (pass
= 0; pass
< ARRAY_SIZE(t
); pass
++) {
46 struct intel_context
*ce
= engine
->kernel_context
;
51 err
= i915_gem_object_fill_blt(obj
, ce
, 0);
55 err
= i915_gem_object_wait(obj
,
57 MAX_SCHEDULE_TIMEOUT
);
62 t
[pass
] = ktime_sub(t1
, t0
);
64 intel_engine_pm_put(engine
);
68 sort(t
, ARRAY_SIZE(t
), sizeof(*t
), wrap_ktime_compare
, NULL
);
69 pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
72 div64_u64(mul_u32_u32(4 * obj
->base
.size
,
74 t
[1] + 2 * t
[2] + t
[3]) >> 20);
78 static int perf_fill_blt(void *arg
)
80 struct drm_i915_private
*i915
= arg
;
81 static const unsigned long sizes
[] = {
89 for (i
= 0; i
< ARRAY_SIZE(sizes
); i
++) {
90 struct drm_i915_gem_object
*obj
;
93 obj
= i915_gem_object_create_internal(i915
, sizes
[i
]);
97 err
= __perf_fill_blt(obj
);
98 i915_gem_object_put(obj
);
106 static int __perf_copy_blt(struct drm_i915_gem_object
*src
,
107 struct drm_i915_gem_object
*dst
)
109 struct drm_i915_private
*i915
= to_i915(src
->base
.dev
);
113 struct intel_engine_cs
*engine
;
118 engine
= intel_engine_lookup_user(i915
,
119 I915_ENGINE_CLASS_COPY
,
124 intel_engine_pm_get(engine
);
125 for (pass
= 0; pass
< ARRAY_SIZE(t
); pass
++) {
126 struct intel_context
*ce
= engine
->kernel_context
;
131 err
= i915_gem_object_copy_blt(src
, dst
, ce
);
135 err
= i915_gem_object_wait(dst
,
137 MAX_SCHEDULE_TIMEOUT
);
142 t
[pass
] = ktime_sub(t1
, t0
);
144 intel_engine_pm_put(engine
);
148 sort(t
, ARRAY_SIZE(t
), sizeof(*t
), wrap_ktime_compare
, NULL
);
149 pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
151 src
->base
.size
>> 10,
152 div64_u64(mul_u32_u32(4 * src
->base
.size
,
154 t
[1] + 2 * t
[2] + t
[3]) >> 20);
158 static int perf_copy_blt(void *arg
)
160 struct drm_i915_private
*i915
= arg
;
161 static const unsigned long sizes
[] = {
169 for (i
= 0; i
< ARRAY_SIZE(sizes
); i
++) {
170 struct drm_i915_gem_object
*src
, *dst
;
173 src
= i915_gem_object_create_internal(i915
, sizes
[i
]);
177 dst
= i915_gem_object_create_internal(i915
, sizes
[i
]);
183 err
= __perf_copy_blt(src
, dst
);
185 i915_gem_object_put(dst
);
187 i915_gem_object_put(src
);
195 struct igt_thread_arg
{
196 struct drm_i915_private
*i915
;
197 struct i915_gem_context
*ctx
;
199 struct rnd_state prng
;
203 static int igt_fill_blt_thread(void *arg
)
205 struct igt_thread_arg
*thread
= arg
;
206 struct drm_i915_private
*i915
= thread
->i915
;
207 struct rnd_state
*prng
= &thread
->prng
;
208 struct drm_i915_gem_object
*obj
;
209 struct i915_gem_context
*ctx
;
210 struct intel_context
*ce
;
217 ctx
= live_context(i915
, thread
->file
);
221 prio
= i915_prandom_u32_max_state(I915_PRIORITY_MAX
, prng
);
222 ctx
->sched
.priority
= I915_USER_PRIORITY(prio
);
225 ce
= i915_gem_context_get_engine(ctx
, BCS0
);
226 GEM_BUG_ON(IS_ERR(ce
));
229 const u32 max_block_size
= S16_MAX
* PAGE_SIZE
;
230 u32 val
= prandom_u32_state(prng
);
231 u64 total
= ce
->vm
->total
;
238 * If we have a tiny shared address space, like for the GGTT
239 * then we can't be too greedy.
241 if (i915_is_ggtt(ce
->vm
))
242 total
= div64_u64(total
, thread
->n_cpus
);
244 sz
= min_t(u64
, total
>> 4, prandom_u32_state(prng
));
245 phys_sz
= sz
% (max_block_size
+ 1);
247 sz
= round_up(sz
, PAGE_SIZE
);
248 phys_sz
= round_up(phys_sz
, PAGE_SIZE
);
250 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__
,
253 obj
= huge_gem_object(i915
, phys_sz
, sz
);
259 vaddr
= i915_gem_object_pin_map(obj
, I915_MAP_WB
);
261 err
= PTR_ERR(vaddr
);
266 * Make sure the potentially async clflush does its job, if
269 memset32(vaddr
, val
^ 0xdeadbeaf,
270 huge_gem_object_phys_size(obj
) / sizeof(u32
));
272 if (!(obj
->cache_coherent
& I915_BO_CACHE_COHERENT_FOR_WRITE
))
273 obj
->cache_dirty
= true;
275 err
= i915_gem_object_fill_blt(obj
, ce
, val
);
279 i915_gem_object_lock(obj
);
280 err
= i915_gem_object_set_to_cpu_domain(obj
, false);
281 i915_gem_object_unlock(obj
);
285 for (i
= 0; i
< huge_gem_object_phys_size(obj
) / sizeof(u32
); ++i
) {
286 if (vaddr
[i
] != val
) {
287 pr_err("vaddr[%u]=%x, expected=%x\n", i
,
294 i915_gem_object_unpin_map(obj
);
295 i915_gem_object_put(obj
);
296 } while (!time_after(jiffies
, end
));
301 i915_gem_object_unpin_map(obj
);
303 i915_gem_object_put(obj
);
308 intel_context_put(ce
);
312 static int igt_copy_blt_thread(void *arg
)
314 struct igt_thread_arg
*thread
= arg
;
315 struct drm_i915_private
*i915
= thread
->i915
;
316 struct rnd_state
*prng
= &thread
->prng
;
317 struct drm_i915_gem_object
*src
, *dst
;
318 struct i915_gem_context
*ctx
;
319 struct intel_context
*ce
;
326 ctx
= live_context(i915
, thread
->file
);
330 prio
= i915_prandom_u32_max_state(I915_PRIORITY_MAX
, prng
);
331 ctx
->sched
.priority
= I915_USER_PRIORITY(prio
);
334 ce
= i915_gem_context_get_engine(ctx
, BCS0
);
335 GEM_BUG_ON(IS_ERR(ce
));
338 const u32 max_block_size
= S16_MAX
* PAGE_SIZE
;
339 u32 val
= prandom_u32_state(prng
);
340 u64 total
= ce
->vm
->total
;
346 if (i915_is_ggtt(ce
->vm
))
347 total
= div64_u64(total
, thread
->n_cpus
);
349 sz
= min_t(u64
, total
>> 4, prandom_u32_state(prng
));
350 phys_sz
= sz
% (max_block_size
+ 1);
352 sz
= round_up(sz
, PAGE_SIZE
);
353 phys_sz
= round_up(phys_sz
, PAGE_SIZE
);
355 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__
,
358 src
= huge_gem_object(i915
, phys_sz
, sz
);
364 vaddr
= i915_gem_object_pin_map(src
, I915_MAP_WB
);
366 err
= PTR_ERR(vaddr
);
371 huge_gem_object_phys_size(src
) / sizeof(u32
));
373 i915_gem_object_unpin_map(src
);
375 if (!(src
->cache_coherent
& I915_BO_CACHE_COHERENT_FOR_READ
))
376 src
->cache_dirty
= true;
378 dst
= huge_gem_object(i915
, phys_sz
, sz
);
384 vaddr
= i915_gem_object_pin_map(dst
, I915_MAP_WB
);
386 err
= PTR_ERR(vaddr
);
390 memset32(vaddr
, val
^ 0xdeadbeaf,
391 huge_gem_object_phys_size(dst
) / sizeof(u32
));
393 if (!(dst
->cache_coherent
& I915_BO_CACHE_COHERENT_FOR_WRITE
))
394 dst
->cache_dirty
= true;
396 err
= i915_gem_object_copy_blt(src
, dst
, ce
);
400 i915_gem_object_lock(dst
);
401 err
= i915_gem_object_set_to_cpu_domain(dst
, false);
402 i915_gem_object_unlock(dst
);
406 for (i
= 0; i
< huge_gem_object_phys_size(dst
) / sizeof(u32
); ++i
) {
407 if (vaddr
[i
] != val
) {
408 pr_err("vaddr[%u]=%x, expected=%x\n", i
,
415 i915_gem_object_unpin_map(dst
);
417 i915_gem_object_put(src
);
418 i915_gem_object_put(dst
);
419 } while (!time_after(jiffies
, end
));
424 i915_gem_object_unpin_map(dst
);
426 i915_gem_object_put(dst
);
428 i915_gem_object_put(src
);
433 intel_context_put(ce
);
437 static int igt_threaded_blt(struct drm_i915_private
*i915
,
438 int (*blt_fn
)(void *arg
),
440 #define SINGLE_CTX BIT(0)
442 struct igt_thread_arg
*thread
;
443 struct task_struct
**tsk
;
444 unsigned int n_cpus
, i
;
445 I915_RND_STATE(prng
);
448 n_cpus
= num_online_cpus() + 1;
450 tsk
= kcalloc(n_cpus
, sizeof(struct task_struct
*), GFP_KERNEL
);
454 thread
= kcalloc(n_cpus
, sizeof(struct igt_thread_arg
), GFP_KERNEL
);
458 thread
[0].file
= mock_file(i915
);
459 if (IS_ERR(thread
[0].file
)) {
460 err
= PTR_ERR(thread
[0].file
);
464 if (flags
& SINGLE_CTX
) {
465 thread
[0].ctx
= live_context(i915
, thread
[0].file
);
466 if (IS_ERR(thread
[0].ctx
)) {
467 err
= PTR_ERR(thread
[0].ctx
);
472 for (i
= 0; i
< n_cpus
; ++i
) {
473 thread
[i
].i915
= i915
;
474 thread
[i
].file
= thread
[0].file
;
475 thread
[i
].ctx
= thread
[0].ctx
;
476 thread
[i
].n_cpus
= n_cpus
;
478 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng
));
480 tsk
[i
] = kthread_run(blt_fn
, &thread
[i
], "igt/blt-%d", i
);
481 if (IS_ERR(tsk
[i
])) {
482 err
= PTR_ERR(tsk
[i
]);
486 get_task_struct(tsk
[i
]);
489 yield(); /* start all threads before we kthread_stop() */
491 for (i
= 0; i
< n_cpus
; ++i
) {
494 if (IS_ERR_OR_NULL(tsk
[i
]))
497 status
= kthread_stop(tsk
[i
]);
501 put_task_struct(tsk
[i
]);
505 fput(thread
[0].file
);
513 static int igt_fill_blt(void *arg
)
515 return igt_threaded_blt(arg
, igt_fill_blt_thread
, 0);
518 static int igt_fill_blt_ctx0(void *arg
)
520 return igt_threaded_blt(arg
, igt_fill_blt_thread
, SINGLE_CTX
);
523 static int igt_copy_blt(void *arg
)
525 return igt_threaded_blt(arg
, igt_copy_blt_thread
, 0);
528 static int igt_copy_blt_ctx0(void *arg
)
530 return igt_threaded_blt(arg
, igt_copy_blt_thread
, SINGLE_CTX
);
533 int i915_gem_object_blt_live_selftests(struct drm_i915_private
*i915
)
535 static const struct i915_subtest tests
[] = {
536 SUBTEST(igt_fill_blt
),
537 SUBTEST(igt_fill_blt_ctx0
),
538 SUBTEST(igt_copy_blt
),
539 SUBTEST(igt_copy_blt_ctx0
),
542 if (intel_gt_is_wedged(&i915
->gt
))
545 if (!HAS_ENGINE(i915
, BCS0
))
548 return i915_live_subtests(tests
, i915
);
551 int i915_gem_object_blt_perf_selftests(struct drm_i915_private
*i915
)
553 static const struct i915_subtest tests
[] = {
554 SUBTEST(perf_fill_blt
),
555 SUBTEST(perf_copy_blt
),
558 if (intel_gt_is_wedged(&i915
->gt
))
561 return i915_live_subtests(tests
, i915
);