2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "../i915_selftest.h"
26 #include "i915_random.h"
28 #include "mock_gem_device.h"
29 #include "mock_timeline.h"
38 static int __igt_sync(struct intel_timeline
*tl
,
40 const struct __igt_sync
*p
,
45 if (__intel_timeline_sync_is_later(tl
, ctx
, p
->seqno
) != p
->expected
) {
46 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
47 name
, p
->name
, ctx
, p
->seqno
, yesno(p
->expected
));
52 ret
= __intel_timeline_sync_set(tl
, ctx
, p
->seqno
);
60 static int igt_sync(void *arg
)
62 const struct __igt_sync pass
[] = {
63 { "unset", 0, false, false },
64 { "new", 0, false, true },
65 { "0a", 0, true, true },
66 { "1a", 1, false, true },
67 { "1b", 1, true, true },
68 { "0b", 0, true, false },
69 { "2a", 2, false, true },
70 { "4", 4, false, true },
71 { "INT_MAX", INT_MAX
, false, true },
72 { "INT_MAX-1", INT_MAX
-1, true, false },
73 { "INT_MAX+1", (u32
)INT_MAX
+1, false, true },
74 { "INT_MAX", INT_MAX
, true, false },
75 { "UINT_MAX", UINT_MAX
, false, true },
76 { "wrap", 0, false, true },
77 { "unwrap", UINT_MAX
, true, false },
80 struct intel_timeline
*tl
;
84 tl
= mock_timeline(0);
88 for (p
= pass
; p
->name
; p
++) {
89 for (order
= 1; order
< 64; order
++) {
90 for (offset
= -1; offset
<= (order
> 1); offset
++) {
91 u64 ctx
= BIT_ULL(order
) + offset
;
93 ret
= __igt_sync(tl
, ctx
, p
, "1");
99 mock_timeline_destroy(tl
);
101 tl
= mock_timeline(0);
105 for (order
= 1; order
< 64; order
++) {
106 for (offset
= -1; offset
<= (order
> 1); offset
++) {
107 u64 ctx
= BIT_ULL(order
) + offset
;
109 for (p
= pass
; p
->name
; p
++) {
110 ret
= __igt_sync(tl
, ctx
, p
, "2");
118 mock_timeline_destroy(tl
);
122 static unsigned int random_engine(struct rnd_state
*rnd
)
124 return i915_prandom_u32_max_state(I915_NUM_ENGINES
, rnd
);
127 static int bench_sync(void *arg
)
129 struct rnd_state prng
;
130 struct intel_timeline
*tl
;
131 unsigned long end_time
, count
;
134 int order
, last_order
;
136 tl
= mock_timeline(0);
140 /* Lookups from cache are very fast and so the random number generation
141 * and the loop itself becomes a significant factor in the per-iteration
142 * timings. We try to compensate the results by measuring the overhead
143 * of the prng and subtract it from the reported results.
145 prandom_seed_state(&prng
, i915_selftest
.random_seed
);
148 end_time
= jiffies
+ HZ
/10;
152 /* Make sure the compiler doesn't optimise away the prng call */
153 WRITE_ONCE(x
, prandom_u32_state(&prng
));
156 } while (!time_after(jiffies
, end_time
));
157 kt
= ktime_sub(ktime_get(), kt
);
158 pr_debug("%s: %lu random evaluations, %lluns/prng\n",
159 __func__
, count
, (long long)div64_ul(ktime_to_ns(kt
), count
));
160 prng32_1M
= div64_ul(ktime_to_ns(kt
) << 20, count
);
162 /* Benchmark (only) setting random context ids */
163 prandom_seed_state(&prng
, i915_selftest
.random_seed
);
166 end_time
= jiffies
+ HZ
/10;
168 u64 id
= i915_prandom_u64_state(&prng
);
170 __intel_timeline_sync_set(tl
, id
, 0);
172 } while (!time_after(jiffies
, end_time
));
173 kt
= ktime_sub(ktime_get(), kt
);
174 kt
= ktime_sub_ns(kt
, (count
* prng32_1M
* 2) >> 20);
175 pr_info("%s: %lu random insertions, %lluns/insert\n",
176 __func__
, count
, (long long)div64_ul(ktime_to_ns(kt
), count
));
178 /* Benchmark looking up the exact same context ids as we just set */
179 prandom_seed_state(&prng
, i915_selftest
.random_seed
);
183 u64 id
= i915_prandom_u64_state(&prng
);
185 if (!__intel_timeline_sync_is_later(tl
, id
, 0)) {
186 mock_timeline_destroy(tl
);
187 pr_err("Lookup of %llu failed\n", id
);
191 kt
= ktime_sub(ktime_get(), kt
);
192 kt
= ktime_sub_ns(kt
, (count
* prng32_1M
* 2) >> 20);
193 pr_info("%s: %lu random lookups, %lluns/lookup\n",
194 __func__
, count
, (long long)div64_ul(ktime_to_ns(kt
), count
));
196 mock_timeline_destroy(tl
);
199 tl
= mock_timeline(0);
203 /* Benchmark setting the first N (in order) contexts */
206 end_time
= jiffies
+ HZ
/10;
208 __intel_timeline_sync_set(tl
, count
++, 0);
209 } while (!time_after(jiffies
, end_time
));
210 kt
= ktime_sub(ktime_get(), kt
);
211 pr_info("%s: %lu in-order insertions, %lluns/insert\n",
212 __func__
, count
, (long long)div64_ul(ktime_to_ns(kt
), count
));
214 /* Benchmark looking up the exact same context ids as we just set */
218 if (!__intel_timeline_sync_is_later(tl
, end_time
, 0)) {
219 pr_err("Lookup of %lu failed\n", end_time
);
220 mock_timeline_destroy(tl
);
224 kt
= ktime_sub(ktime_get(), kt
);
225 pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
226 __func__
, count
, (long long)div64_ul(ktime_to_ns(kt
), count
));
228 mock_timeline_destroy(tl
);
231 tl
= mock_timeline(0);
235 /* Benchmark searching for a random context id and maybe changing it */
236 prandom_seed_state(&prng
, i915_selftest
.random_seed
);
239 end_time
= jiffies
+ HZ
/10;
241 u32 id
= random_engine(&prng
);
242 u32 seqno
= prandom_u32_state(&prng
);
244 if (!__intel_timeline_sync_is_later(tl
, id
, seqno
))
245 __intel_timeline_sync_set(tl
, id
, seqno
);
248 } while (!time_after(jiffies
, end_time
));
249 kt
= ktime_sub(ktime_get(), kt
);
250 kt
= ktime_sub_ns(kt
, (count
* prng32_1M
* 2) >> 20);
251 pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
252 __func__
, count
, (long long)div64_ul(ktime_to_ns(kt
), count
));
253 mock_timeline_destroy(tl
);
256 /* Benchmark searching for a known context id and changing the seqno */
257 for (last_order
= 1, order
= 1; order
< 32;
258 ({ int tmp
= last_order
; last_order
= order
; order
+= tmp
; })) {
259 unsigned int mask
= BIT(order
) - 1;
261 tl
= mock_timeline(0);
267 end_time
= jiffies
+ HZ
/10;
269 /* Without assuming too many details of the underlying
270 * implementation, try to identify its phase-changes
273 u64 id
= (u64
)(count
& mask
) << order
;
275 __intel_timeline_sync_is_later(tl
, id
, 0);
276 __intel_timeline_sync_set(tl
, id
, 0);
279 } while (!time_after(jiffies
, end_time
));
280 kt
= ktime_sub(ktime_get(), kt
);
281 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
282 __func__
, count
, order
,
283 (long long)div64_ul(ktime_to_ns(kt
), count
));
284 mock_timeline_destroy(tl
);
291 int i915_gem_timeline_mock_selftests(void)
293 static const struct i915_subtest tests
[] = {
298 return i915_subtests(tests
, NULL
);