treewide: remove redundant IS_ERR() before error code check
[linux/fpc-iii.git] / drivers / gpu / drm / i915 / gem / selftests / i915_gem_object_blt.c
blob62077fe46715702282d40395b76ff5267500f08e
1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2019 Intel Corporation
4 */
6 #include <linux/sort.h>
8 #include "gt/intel_gt.h"
9 #include "gt/intel_engine_user.h"
11 #include "i915_selftest.h"
13 #include "gem/i915_gem_context.h"
14 #include "selftests/igt_flush_test.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/mock_drm.h"
17 #include "huge_gem_object.h"
18 #include "mock_context.h"
20 static int wrap_ktime_compare(const void *A, const void *B)
22 const ktime_t *a = A, *b = B;
24 return ktime_compare(*a, *b);
27 static int __perf_fill_blt(struct drm_i915_gem_object *obj)
29 struct drm_i915_private *i915 = to_i915(obj->base.dev);
30 int inst = 0;
32 do {
33 struct intel_engine_cs *engine;
34 ktime_t t[5];
35 int pass;
36 int err;
38 engine = intel_engine_lookup_user(i915,
39 I915_ENGINE_CLASS_COPY,
40 inst++);
41 if (!engine)
42 return 0;
44 intel_engine_pm_get(engine);
45 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
46 struct intel_context *ce = engine->kernel_context;
47 ktime_t t0, t1;
49 t0 = ktime_get();
51 err = i915_gem_object_fill_blt(obj, ce, 0);
52 if (err)
53 break;
55 err = i915_gem_object_wait(obj,
56 I915_WAIT_ALL,
57 MAX_SCHEDULE_TIMEOUT);
58 if (err)
59 break;
61 t1 = ktime_get();
62 t[pass] = ktime_sub(t1, t0);
64 intel_engine_pm_put(engine);
65 if (err)
66 return err;
68 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
69 pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
70 engine->name,
71 obj->base.size >> 10,
72 div64_u64(mul_u32_u32(4 * obj->base.size,
73 1000 * 1000 * 1000),
74 t[1] + 2 * t[2] + t[3]) >> 20);
75 } while (1);
78 static int perf_fill_blt(void *arg)
80 struct drm_i915_private *i915 = arg;
81 static const unsigned long sizes[] = {
82 SZ_4K,
83 SZ_64K,
84 SZ_2M,
85 SZ_64M
87 int i;
89 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
90 struct drm_i915_gem_object *obj;
91 int err;
93 obj = i915_gem_object_create_internal(i915, sizes[i]);
94 if (IS_ERR(obj))
95 return PTR_ERR(obj);
97 err = __perf_fill_blt(obj);
98 i915_gem_object_put(obj);
99 if (err)
100 return err;
103 return 0;
106 static int __perf_copy_blt(struct drm_i915_gem_object *src,
107 struct drm_i915_gem_object *dst)
109 struct drm_i915_private *i915 = to_i915(src->base.dev);
110 int inst = 0;
112 do {
113 struct intel_engine_cs *engine;
114 ktime_t t[5];
115 int pass;
116 int err = 0;
118 engine = intel_engine_lookup_user(i915,
119 I915_ENGINE_CLASS_COPY,
120 inst++);
121 if (!engine)
122 return 0;
124 intel_engine_pm_get(engine);
125 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
126 struct intel_context *ce = engine->kernel_context;
127 ktime_t t0, t1;
129 t0 = ktime_get();
131 err = i915_gem_object_copy_blt(src, dst, ce);
132 if (err)
133 break;
135 err = i915_gem_object_wait(dst,
136 I915_WAIT_ALL,
137 MAX_SCHEDULE_TIMEOUT);
138 if (err)
139 break;
141 t1 = ktime_get();
142 t[pass] = ktime_sub(t1, t0);
144 intel_engine_pm_put(engine);
145 if (err)
146 return err;
148 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
149 pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
150 engine->name,
151 src->base.size >> 10,
152 div64_u64(mul_u32_u32(4 * src->base.size,
153 1000 * 1000 * 1000),
154 t[1] + 2 * t[2] + t[3]) >> 20);
155 } while (1);
158 static int perf_copy_blt(void *arg)
160 struct drm_i915_private *i915 = arg;
161 static const unsigned long sizes[] = {
162 SZ_4K,
163 SZ_64K,
164 SZ_2M,
165 SZ_64M
167 int i;
169 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
170 struct drm_i915_gem_object *src, *dst;
171 int err;
173 src = i915_gem_object_create_internal(i915, sizes[i]);
174 if (IS_ERR(src))
175 return PTR_ERR(src);
177 dst = i915_gem_object_create_internal(i915, sizes[i]);
178 if (IS_ERR(dst)) {
179 err = PTR_ERR(dst);
180 goto err_src;
183 err = __perf_copy_blt(src, dst);
185 i915_gem_object_put(dst);
186 err_src:
187 i915_gem_object_put(src);
188 if (err)
189 return err;
192 return 0;
195 struct igt_thread_arg {
196 struct drm_i915_private *i915;
197 struct i915_gem_context *ctx;
198 struct file *file;
199 struct rnd_state prng;
200 unsigned int n_cpus;
203 static int igt_fill_blt_thread(void *arg)
205 struct igt_thread_arg *thread = arg;
206 struct drm_i915_private *i915 = thread->i915;
207 struct rnd_state *prng = &thread->prng;
208 struct drm_i915_gem_object *obj;
209 struct i915_gem_context *ctx;
210 struct intel_context *ce;
211 unsigned int prio;
212 IGT_TIMEOUT(end);
213 int err;
215 ctx = thread->ctx;
216 if (!ctx) {
217 ctx = live_context(i915, thread->file);
218 if (IS_ERR(ctx))
219 return PTR_ERR(ctx);
221 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
222 ctx->sched.priority = I915_USER_PRIORITY(prio);
225 ce = i915_gem_context_get_engine(ctx, BCS0);
226 GEM_BUG_ON(IS_ERR(ce));
228 do {
229 const u32 max_block_size = S16_MAX * PAGE_SIZE;
230 u32 val = prandom_u32_state(prng);
231 u64 total = ce->vm->total;
232 u32 phys_sz;
233 u32 sz;
234 u32 *vaddr;
235 u32 i;
238 * If we have a tiny shared address space, like for the GGTT
239 * then we can't be too greedy.
241 if (i915_is_ggtt(ce->vm))
242 total = div64_u64(total, thread->n_cpus);
244 sz = min_t(u64, total >> 4, prandom_u32_state(prng));
245 phys_sz = sz % (max_block_size + 1);
247 sz = round_up(sz, PAGE_SIZE);
248 phys_sz = round_up(phys_sz, PAGE_SIZE);
250 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
251 phys_sz, sz, val);
253 obj = huge_gem_object(i915, phys_sz, sz);
254 if (IS_ERR(obj)) {
255 err = PTR_ERR(obj);
256 goto err_flush;
259 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
260 if (IS_ERR(vaddr)) {
261 err = PTR_ERR(vaddr);
262 goto err_put;
266 * Make sure the potentially async clflush does its job, if
267 * required.
269 memset32(vaddr, val ^ 0xdeadbeaf,
270 huge_gem_object_phys_size(obj) / sizeof(u32));
272 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
273 obj->cache_dirty = true;
275 err = i915_gem_object_fill_blt(obj, ce, val);
276 if (err)
277 goto err_unpin;
279 i915_gem_object_lock(obj);
280 err = i915_gem_object_set_to_cpu_domain(obj, false);
281 i915_gem_object_unlock(obj);
282 if (err)
283 goto err_unpin;
285 for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) {
286 if (vaddr[i] != val) {
287 pr_err("vaddr[%u]=%x, expected=%x\n", i,
288 vaddr[i], val);
289 err = -EINVAL;
290 goto err_unpin;
294 i915_gem_object_unpin_map(obj);
295 i915_gem_object_put(obj);
296 } while (!time_after(jiffies, end));
298 goto err_flush;
300 err_unpin:
301 i915_gem_object_unpin_map(obj);
302 err_put:
303 i915_gem_object_put(obj);
304 err_flush:
305 if (err == -ENOMEM)
306 err = 0;
308 intel_context_put(ce);
309 return err;
312 static int igt_copy_blt_thread(void *arg)
314 struct igt_thread_arg *thread = arg;
315 struct drm_i915_private *i915 = thread->i915;
316 struct rnd_state *prng = &thread->prng;
317 struct drm_i915_gem_object *src, *dst;
318 struct i915_gem_context *ctx;
319 struct intel_context *ce;
320 unsigned int prio;
321 IGT_TIMEOUT(end);
322 int err;
324 ctx = thread->ctx;
325 if (!ctx) {
326 ctx = live_context(i915, thread->file);
327 if (IS_ERR(ctx))
328 return PTR_ERR(ctx);
330 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
331 ctx->sched.priority = I915_USER_PRIORITY(prio);
334 ce = i915_gem_context_get_engine(ctx, BCS0);
335 GEM_BUG_ON(IS_ERR(ce));
337 do {
338 const u32 max_block_size = S16_MAX * PAGE_SIZE;
339 u32 val = prandom_u32_state(prng);
340 u64 total = ce->vm->total;
341 u32 phys_sz;
342 u32 sz;
343 u32 *vaddr;
344 u32 i;
346 if (i915_is_ggtt(ce->vm))
347 total = div64_u64(total, thread->n_cpus);
349 sz = min_t(u64, total >> 4, prandom_u32_state(prng));
350 phys_sz = sz % (max_block_size + 1);
352 sz = round_up(sz, PAGE_SIZE);
353 phys_sz = round_up(phys_sz, PAGE_SIZE);
355 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
356 phys_sz, sz, val);
358 src = huge_gem_object(i915, phys_sz, sz);
359 if (IS_ERR(src)) {
360 err = PTR_ERR(src);
361 goto err_flush;
364 vaddr = i915_gem_object_pin_map(src, I915_MAP_WB);
365 if (IS_ERR(vaddr)) {
366 err = PTR_ERR(vaddr);
367 goto err_put_src;
370 memset32(vaddr, val,
371 huge_gem_object_phys_size(src) / sizeof(u32));
373 i915_gem_object_unpin_map(src);
375 if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
376 src->cache_dirty = true;
378 dst = huge_gem_object(i915, phys_sz, sz);
379 if (IS_ERR(dst)) {
380 err = PTR_ERR(dst);
381 goto err_put_src;
384 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB);
385 if (IS_ERR(vaddr)) {
386 err = PTR_ERR(vaddr);
387 goto err_put_dst;
390 memset32(vaddr, val ^ 0xdeadbeaf,
391 huge_gem_object_phys_size(dst) / sizeof(u32));
393 if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
394 dst->cache_dirty = true;
396 err = i915_gem_object_copy_blt(src, dst, ce);
397 if (err)
398 goto err_unpin;
400 i915_gem_object_lock(dst);
401 err = i915_gem_object_set_to_cpu_domain(dst, false);
402 i915_gem_object_unlock(dst);
403 if (err)
404 goto err_unpin;
406 for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); ++i) {
407 if (vaddr[i] != val) {
408 pr_err("vaddr[%u]=%x, expected=%x\n", i,
409 vaddr[i], val);
410 err = -EINVAL;
411 goto err_unpin;
415 i915_gem_object_unpin_map(dst);
417 i915_gem_object_put(src);
418 i915_gem_object_put(dst);
419 } while (!time_after(jiffies, end));
421 goto err_flush;
423 err_unpin:
424 i915_gem_object_unpin_map(dst);
425 err_put_dst:
426 i915_gem_object_put(dst);
427 err_put_src:
428 i915_gem_object_put(src);
429 err_flush:
430 if (err == -ENOMEM)
431 err = 0;
433 intel_context_put(ce);
434 return err;
437 static int igt_threaded_blt(struct drm_i915_private *i915,
438 int (*blt_fn)(void *arg),
439 unsigned int flags)
440 #define SINGLE_CTX BIT(0)
442 struct igt_thread_arg *thread;
443 struct task_struct **tsk;
444 unsigned int n_cpus, i;
445 I915_RND_STATE(prng);
446 int err = 0;
448 n_cpus = num_online_cpus() + 1;
450 tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
451 if (!tsk)
452 return 0;
454 thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
455 if (!thread)
456 goto out_tsk;
458 thread[0].file = mock_file(i915);
459 if (IS_ERR(thread[0].file)) {
460 err = PTR_ERR(thread[0].file);
461 goto out_thread;
464 if (flags & SINGLE_CTX) {
465 thread[0].ctx = live_context(i915, thread[0].file);
466 if (IS_ERR(thread[0].ctx)) {
467 err = PTR_ERR(thread[0].ctx);
468 goto out_file;
472 for (i = 0; i < n_cpus; ++i) {
473 thread[i].i915 = i915;
474 thread[i].file = thread[0].file;
475 thread[i].ctx = thread[0].ctx;
476 thread[i].n_cpus = n_cpus;
477 thread[i].prng =
478 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
480 tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
481 if (IS_ERR(tsk[i])) {
482 err = PTR_ERR(tsk[i]);
483 break;
486 get_task_struct(tsk[i]);
489 yield(); /* start all threads before we kthread_stop() */
491 for (i = 0; i < n_cpus; ++i) {
492 int status;
494 if (IS_ERR_OR_NULL(tsk[i]))
495 continue;
497 status = kthread_stop(tsk[i]);
498 if (status && !err)
499 err = status;
501 put_task_struct(tsk[i]);
504 out_file:
505 fput(thread[0].file);
506 out_thread:
507 kfree(thread);
508 out_tsk:
509 kfree(tsk);
510 return err;
513 static int igt_fill_blt(void *arg)
515 return igt_threaded_blt(arg, igt_fill_blt_thread, 0);
518 static int igt_fill_blt_ctx0(void *arg)
520 return igt_threaded_blt(arg, igt_fill_blt_thread, SINGLE_CTX);
523 static int igt_copy_blt(void *arg)
525 return igt_threaded_blt(arg, igt_copy_blt_thread, 0);
528 static int igt_copy_blt_ctx0(void *arg)
530 return igt_threaded_blt(arg, igt_copy_blt_thread, SINGLE_CTX);
533 int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
535 static const struct i915_subtest tests[] = {
536 SUBTEST(igt_fill_blt),
537 SUBTEST(igt_fill_blt_ctx0),
538 SUBTEST(igt_copy_blt),
539 SUBTEST(igt_copy_blt_ctx0),
542 if (intel_gt_is_wedged(&i915->gt))
543 return 0;
545 if (!HAS_ENGINE(i915, BCS0))
546 return 0;
548 return i915_live_subtests(tests, i915);
551 int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
553 static const struct i915_subtest tests[] = {
554 SUBTEST(perf_fill_blt),
555 SUBTEST(perf_copy_blt),
558 if (intel_gt_is_wedged(&i915->gt))
559 return 0;
561 return i915_live_subtests(tests, i915);