1 // SPDX-License-Identifier: MIT
3 * Copyright © 2019 Intel Corporation
6 #include <linux/sort.h>
8 #include "gt/intel_gt.h"
9 #include "gt/intel_engine_user.h"
11 #include "i915_selftest.h"
13 #include "gem/i915_gem_context.h"
14 #include "selftests/igt_flush_test.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/mock_drm.h"
17 #include "huge_gem_object.h"
18 #include "mock_context.h"
20 static int wrap_ktime_compare(const void *A
, const void *B
)
22 const ktime_t
*a
= A
, *b
= B
;
24 return ktime_compare(*a
, *b
);
27 static int __perf_fill_blt(struct drm_i915_gem_object
*obj
)
29 struct drm_i915_private
*i915
= to_i915(obj
->base
.dev
);
33 struct intel_engine_cs
*engine
;
38 engine
= intel_engine_lookup_user(i915
,
39 I915_ENGINE_CLASS_COPY
,
44 intel_engine_pm_get(engine
);
45 for (pass
= 0; pass
< ARRAY_SIZE(t
); pass
++) {
46 struct intel_context
*ce
= engine
->kernel_context
;
51 err
= i915_gem_object_fill_blt(obj
, ce
, 0);
55 err
= i915_gem_object_wait(obj
,
57 MAX_SCHEDULE_TIMEOUT
);
62 t
[pass
] = ktime_sub(t1
, t0
);
64 intel_engine_pm_put(engine
);
68 sort(t
, ARRAY_SIZE(t
), sizeof(*t
), wrap_ktime_compare
, NULL
);
69 pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
72 div64_u64(mul_u32_u32(4 * obj
->base
.size
,
74 t
[1] + 2 * t
[2] + t
[3]) >> 20);
78 static int perf_fill_blt(void *arg
)
80 struct drm_i915_private
*i915
= arg
;
81 static const unsigned long sizes
[] = {
89 for (i
= 0; i
< ARRAY_SIZE(sizes
); i
++) {
90 struct drm_i915_gem_object
*obj
;
93 obj
= i915_gem_object_create_internal(i915
, sizes
[i
]);
97 err
= __perf_fill_blt(obj
);
98 i915_gem_object_put(obj
);
106 static int __perf_copy_blt(struct drm_i915_gem_object
*src
,
107 struct drm_i915_gem_object
*dst
)
109 struct drm_i915_private
*i915
= to_i915(src
->base
.dev
);
113 struct intel_engine_cs
*engine
;
118 engine
= intel_engine_lookup_user(i915
,
119 I915_ENGINE_CLASS_COPY
,
124 intel_engine_pm_get(engine
);
125 for (pass
= 0; pass
< ARRAY_SIZE(t
); pass
++) {
126 struct intel_context
*ce
= engine
->kernel_context
;
131 err
= i915_gem_object_copy_blt(src
, dst
, ce
);
135 err
= i915_gem_object_wait(dst
,
137 MAX_SCHEDULE_TIMEOUT
);
142 t
[pass
] = ktime_sub(t1
, t0
);
144 intel_engine_pm_put(engine
);
148 sort(t
, ARRAY_SIZE(t
), sizeof(*t
), wrap_ktime_compare
, NULL
);
149 pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
151 src
->base
.size
>> 10,
152 div64_u64(mul_u32_u32(4 * src
->base
.size
,
154 t
[1] + 2 * t
[2] + t
[3]) >> 20);
158 static int perf_copy_blt(void *arg
)
160 struct drm_i915_private
*i915
= arg
;
161 static const unsigned long sizes
[] = {
169 for (i
= 0; i
< ARRAY_SIZE(sizes
); i
++) {
170 struct drm_i915_gem_object
*src
, *dst
;
173 src
= i915_gem_object_create_internal(i915
, sizes
[i
]);
177 dst
= i915_gem_object_create_internal(i915
, sizes
[i
]);
183 err
= __perf_copy_blt(src
, dst
);
185 i915_gem_object_put(dst
);
187 i915_gem_object_put(src
);
195 struct igt_thread_arg
{
196 struct intel_engine_cs
*engine
;
197 struct i915_gem_context
*ctx
;
199 struct rnd_state prng
;
203 static int igt_fill_blt_thread(void *arg
)
205 struct igt_thread_arg
*thread
= arg
;
206 struct intel_engine_cs
*engine
= thread
->engine
;
207 struct rnd_state
*prng
= &thread
->prng
;
208 struct drm_i915_gem_object
*obj
;
209 struct i915_gem_context
*ctx
;
210 struct intel_context
*ce
;
218 ctx
= live_context_for_engine(engine
, thread
->file
);
222 prio
= i915_prandom_u32_max_state(I915_PRIORITY_MAX
, prng
);
223 ctx
->sched
.priority
= I915_USER_PRIORITY(prio
);
226 ce
= i915_gem_context_get_engine(ctx
, 0);
227 GEM_BUG_ON(IS_ERR(ce
));
230 * If we have a tiny shared address space, like for the GGTT
231 * then we can't be too greedy.
234 if (i915_is_ggtt(ce
->vm
) || thread
->ctx
)
235 max
= div_u64(max
, thread
->n_cpus
);
240 /* Aim to keep the runtime under reasonable bounds! */
241 const u32 max_phys_size
= SZ_64K
;
242 u32 val
= prandom_u32_state(prng
);
248 total
= min(total
, max
);
249 sz
= i915_prandom_u32_max_state(total
, prng
) + 1;
250 phys_sz
= sz
% max_phys_size
+ 1;
252 sz
= round_up(sz
, PAGE_SIZE
);
253 phys_sz
= round_up(phys_sz
, PAGE_SIZE
);
254 phys_sz
= min(phys_sz
, sz
);
256 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__
,
259 obj
= huge_gem_object(engine
->i915
, phys_sz
, sz
);
265 vaddr
= i915_gem_object_pin_map(obj
, I915_MAP_WB
);
267 err
= PTR_ERR(vaddr
);
272 * Make sure the potentially async clflush does its job, if
275 memset32(vaddr
, val
^ 0xdeadbeaf,
276 huge_gem_object_phys_size(obj
) / sizeof(u32
));
278 if (!(obj
->cache_coherent
& I915_BO_CACHE_COHERENT_FOR_WRITE
))
279 obj
->cache_dirty
= true;
281 err
= i915_gem_object_fill_blt(obj
, ce
, val
);
285 err
= i915_gem_object_wait(obj
, 0, MAX_SCHEDULE_TIMEOUT
);
289 for (i
= 0; i
< huge_gem_object_phys_size(obj
) / sizeof(u32
); i
+= 17) {
290 if (!(obj
->cache_coherent
& I915_BO_CACHE_COHERENT_FOR_READ
))
291 drm_clflush_virt_range(&vaddr
[i
], sizeof(vaddr
[i
]));
293 if (vaddr
[i
] != val
) {
294 pr_err("vaddr[%u]=%x, expected=%x\n", i
,
301 i915_gem_object_unpin_map(obj
);
302 i915_gem_object_put(obj
);
305 } while (!time_after(jiffies
, end
));
310 i915_gem_object_unpin_map(obj
);
312 i915_gem_object_put(obj
);
317 intel_context_put(ce
);
321 static int igt_copy_blt_thread(void *arg
)
323 struct igt_thread_arg
*thread
= arg
;
324 struct intel_engine_cs
*engine
= thread
->engine
;
325 struct rnd_state
*prng
= &thread
->prng
;
326 struct drm_i915_gem_object
*src
, *dst
;
327 struct i915_gem_context
*ctx
;
328 struct intel_context
*ce
;
336 ctx
= live_context_for_engine(engine
, thread
->file
);
340 prio
= i915_prandom_u32_max_state(I915_PRIORITY_MAX
, prng
);
341 ctx
->sched
.priority
= I915_USER_PRIORITY(prio
);
344 ce
= i915_gem_context_get_engine(ctx
, 0);
345 GEM_BUG_ON(IS_ERR(ce
));
348 * If we have a tiny shared address space, like for the GGTT
349 * then we can't be too greedy.
352 if (i915_is_ggtt(ce
->vm
) || thread
->ctx
)
353 max
= div_u64(max
, thread
->n_cpus
);
358 /* Aim to keep the runtime under reasonable bounds! */
359 const u32 max_phys_size
= SZ_64K
;
360 u32 val
= prandom_u32_state(prng
);
366 total
= min(total
, max
);
367 sz
= i915_prandom_u32_max_state(total
, prng
) + 1;
368 phys_sz
= sz
% max_phys_size
+ 1;
370 sz
= round_up(sz
, PAGE_SIZE
);
371 phys_sz
= round_up(phys_sz
, PAGE_SIZE
);
372 phys_sz
= min(phys_sz
, sz
);
374 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__
,
377 src
= huge_gem_object(engine
->i915
, phys_sz
, sz
);
383 vaddr
= i915_gem_object_pin_map(src
, I915_MAP_WB
);
385 err
= PTR_ERR(vaddr
);
390 huge_gem_object_phys_size(src
) / sizeof(u32
));
392 i915_gem_object_unpin_map(src
);
394 if (!(src
->cache_coherent
& I915_BO_CACHE_COHERENT_FOR_READ
))
395 src
->cache_dirty
= true;
397 dst
= huge_gem_object(engine
->i915
, phys_sz
, sz
);
403 vaddr
= i915_gem_object_pin_map(dst
, I915_MAP_WB
);
405 err
= PTR_ERR(vaddr
);
409 memset32(vaddr
, val
^ 0xdeadbeaf,
410 huge_gem_object_phys_size(dst
) / sizeof(u32
));
412 if (!(dst
->cache_coherent
& I915_BO_CACHE_COHERENT_FOR_WRITE
))
413 dst
->cache_dirty
= true;
415 err
= i915_gem_object_copy_blt(src
, dst
, ce
);
419 err
= i915_gem_object_wait(dst
, 0, MAX_SCHEDULE_TIMEOUT
);
423 for (i
= 0; i
< huge_gem_object_phys_size(dst
) / sizeof(u32
); i
+= 17) {
424 if (!(dst
->cache_coherent
& I915_BO_CACHE_COHERENT_FOR_READ
))
425 drm_clflush_virt_range(&vaddr
[i
], sizeof(vaddr
[i
]));
427 if (vaddr
[i
] != val
) {
428 pr_err("vaddr[%u]=%x, expected=%x\n", i
,
435 i915_gem_object_unpin_map(dst
);
437 i915_gem_object_put(src
);
438 i915_gem_object_put(dst
);
441 } while (!time_after(jiffies
, end
));
446 i915_gem_object_unpin_map(dst
);
448 i915_gem_object_put(dst
);
450 i915_gem_object_put(src
);
455 intel_context_put(ce
);
459 static int igt_threaded_blt(struct intel_engine_cs
*engine
,
460 int (*blt_fn
)(void *arg
),
462 #define SINGLE_CTX BIT(0)
464 struct igt_thread_arg
*thread
;
465 struct task_struct
**tsk
;
466 unsigned int n_cpus
, i
;
467 I915_RND_STATE(prng
);
470 n_cpus
= num_online_cpus() + 1;
472 tsk
= kcalloc(n_cpus
, sizeof(struct task_struct
*), GFP_KERNEL
);
476 thread
= kcalloc(n_cpus
, sizeof(struct igt_thread_arg
), GFP_KERNEL
);
480 thread
[0].file
= mock_file(engine
->i915
);
481 if (IS_ERR(thread
[0].file
)) {
482 err
= PTR_ERR(thread
[0].file
);
486 if (flags
& SINGLE_CTX
) {
487 thread
[0].ctx
= live_context_for_engine(engine
, thread
[0].file
);
488 if (IS_ERR(thread
[0].ctx
)) {
489 err
= PTR_ERR(thread
[0].ctx
);
494 for (i
= 0; i
< n_cpus
; ++i
) {
495 thread
[i
].engine
= engine
;
496 thread
[i
].file
= thread
[0].file
;
497 thread
[i
].ctx
= thread
[0].ctx
;
498 thread
[i
].n_cpus
= n_cpus
;
500 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng
));
502 tsk
[i
] = kthread_run(blt_fn
, &thread
[i
], "igt/blt-%d", i
);
503 if (IS_ERR(tsk
[i
])) {
504 err
= PTR_ERR(tsk
[i
]);
508 get_task_struct(tsk
[i
]);
511 yield(); /* start all threads before we kthread_stop() */
513 for (i
= 0; i
< n_cpus
; ++i
) {
516 if (IS_ERR_OR_NULL(tsk
[i
]))
519 status
= kthread_stop(tsk
[i
]);
523 put_task_struct(tsk
[i
]);
527 fput(thread
[0].file
);
535 static int test_copy_engines(struct drm_i915_private
*i915
,
536 int (*fn
)(void *arg
),
539 struct intel_engine_cs
*engine
;
542 for_each_uabi_class_engine(engine
, I915_ENGINE_CLASS_COPY
, i915
) {
543 ret
= igt_threaded_blt(engine
, fn
, flags
);
551 static int igt_fill_blt(void *arg
)
553 return test_copy_engines(arg
, igt_fill_blt_thread
, 0);
556 static int igt_fill_blt_ctx0(void *arg
)
558 return test_copy_engines(arg
, igt_fill_blt_thread
, SINGLE_CTX
);
561 static int igt_copy_blt(void *arg
)
563 return test_copy_engines(arg
, igt_copy_blt_thread
, 0);
566 static int igt_copy_blt_ctx0(void *arg
)
568 return test_copy_engines(arg
, igt_copy_blt_thread
, SINGLE_CTX
);
571 int i915_gem_object_blt_live_selftests(struct drm_i915_private
*i915
)
573 static const struct i915_subtest tests
[] = {
574 SUBTEST(igt_fill_blt
),
575 SUBTEST(igt_fill_blt_ctx0
),
576 SUBTEST(igt_copy_blt
),
577 SUBTEST(igt_copy_blt_ctx0
),
580 if (intel_gt_is_wedged(&i915
->gt
))
583 return i915_live_subtests(tests
, i915
);
586 int i915_gem_object_blt_perf_selftests(struct drm_i915_private
*i915
)
588 static const struct i915_subtest tests
[] = {
589 SUBTEST(perf_fill_blt
),
590 SUBTEST(perf_copy_blt
),
593 if (intel_gt_is_wedged(&i915
->gt
))
596 return i915_live_subtests(tests
, i915
);