2 * Microbenchmark for math functions.
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
20 /* Enable the build of vector math code. */
24 /* Number of measurements, best result is reported. */
28 /* Iterations over the array. */
32 static size_t trace_size
;
35 static long measurecount
= MEASURE
;
36 static long itercount
= ITER
;
38 #if __aarch64__ && WANT_VMATH
39 typedef __f64x2_t v_double
;
41 #define v_double_len() 2
43 static inline v_double
44 v_double_load (const double *p
)
46 return (v_double
){p
[0], p
[1]};
49 static inline v_double
50 v_double_dup (double x
)
52 return (v_double
){x
, x
};
55 typedef __f32x4_t v_float
;
57 #define v_float_len() 4
60 v_float_load (const float *p
)
62 return (v_float
){p
[0], p
[1], p
[2], p
[3]};
68 return (v_float
){x
, x
, x
, x
};
71 /* dummy definitions to make things compile. */
72 typedef double v_double
;
73 typedef float v_float
;
74 #define v_double_len(x) 1
75 #define v_double_load(x) (x)[0]
76 #define v_double_dup(x) (x)
77 #define v_float_len(x) 1
78 #define v_float_load(x) (x)[0]
79 #define v_float_dup(x) (x)
97 __v_dummy (v_double x
)
103 __v_dummyf (v_float x
)
109 __vpcs
static v_double
110 __vn_dummy (v_double x
)
115 __vpcs
static v_float
116 __vn_dummyf (v_float x
)
121 __vpcs
static v_float
122 xy__vn_powf (v_float x
)
124 return __vn_powf (x
, x
);
127 __vpcs
static v_float
128 xy_Z_powf (v_float x
)
130 return _ZGVnN4vv_powf (x
, x
);
133 __vpcs
static v_double
134 xy__vn_pow (v_double x
)
136 return __vn_pow (x
, x
);
139 __vpcs
static v_double
140 xy_Z_pow (v_double x
)
142 return _ZGVnN2vv_pow (x
, x
);
147 xy__v_powf (v_float x
)
149 return __v_powf (x
, x
);
153 xy__v_pow (v_double x
)
155 return __v_pow (x
, x
);
162 return __s_powf (x
, x
);
168 return __s_pow (x
, x
);
187 return pow (x
, 23.4);
193 return powf (x
, 23.4f
);
199 return pow (2.34, x
);
205 return powf (2.34f
, x
);
209 sincosf_wrap (float x
)
216 static const struct fun
225 double (*d
) (double);
227 v_double (*vd
) (v_double
);
228 v_float (*vf
) (v_float
);
230 __vpcs
v_double (*vnd
) (v_double
);
231 __vpcs
v_float (*vnf
) (v_float
);
235 #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
236 #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
237 #define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
238 #define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
239 #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
240 #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
246 D (log
, 0.999, 1.001)
248 D (log2
, 0.999, 1.001)
249 {"pow", 'd', 0, 0.01, 11.1, {.d
= xypow
}},
257 F (log2f
, 0.01, 11.1)
258 {"powf", 'f', 0, 0.01, 11.1, {.f
= xypowf
}},
259 F (xpowf
, 0.01, 11.1)
261 {"sincosf", 'f', 0, 0.1, 0.7, {.f
= sincosf_wrap
}},
262 {"sincosf", 'f', 0, 0.8, 3.1, {.f
= sincosf_wrap
}},
263 {"sincosf", 'f', 0, -3.1, 3.1, {.f
= sincosf_wrap
}},
264 {"sincosf", 'f', 0, 3.3, 33.3, {.f
= sincosf_wrap
}},
265 {"sincosf", 'f', 0, 100, 1000, {.f
= sincosf_wrap
}},
266 {"sincosf", 'f', 0, 1e6
, 1e32
, {.f
= sincosf_wrap
}},
280 D (__s_sin
, -3.1, 3.1)
281 D (__s_cos
, -3.1, 3.1)
282 D (__s_exp
, -9.9, 9.9)
283 D (__s_log
, 0.01, 11.1)
284 {"__s_pow", 'd', 0, 0.01, 11.1, {.d
= xy__s_pow
}},
285 F (__s_expf
, -9.9, 9.9)
286 F (__s_expf_1u
, -9.9, 9.9)
287 F (__s_exp2f
, -9.9, 9.9)
288 F (__s_exp2f_1u
, -9.9, 9.9)
289 F (__s_logf
, 0.01, 11.1)
290 {"__s_powf", 'f', 0, 0.01, 11.1, {.f
= xy__s_powf
}},
291 F (__s_sinf
, -3.1, 3.1)
292 F (__s_cosf
, -3.1, 3.1)
294 VD (__v_dummy
, 1.0, 2.0)
295 VD (__v_sin
, -3.1, 3.1)
296 VD (__v_cos
, -3.1, 3.1)
297 VD (__v_exp
, -9.9, 9.9)
298 VD (__v_log
, 0.01, 11.1)
299 {"__v_pow", 'd', 'v', 0.01, 11.1, {.vd
= xy__v_pow
}},
300 VF (__v_dummyf
, 1.0, 2.0)
301 VF (__v_expf
, -9.9, 9.9)
302 VF (__v_expf_1u
, -9.9, 9.9)
303 VF (__v_exp2f
, -9.9, 9.9)
304 VF (__v_exp2f_1u
, -9.9, 9.9)
305 VF (__v_logf
, 0.01, 11.1)
306 {"__v_powf", 'f', 'v', 0.01, 11.1, {.vf
= xy__v_powf
}},
307 VF (__v_sinf
, -3.1, 3.1)
308 VF (__v_cosf
, -3.1, 3.1)
310 VND (__vn_dummy
, 1.0, 2.0)
311 VND (__vn_exp
, -9.9, 9.9)
312 VND (_ZGVnN2v_exp
, -9.9, 9.9)
313 VND (__vn_log
, 0.01, 11.1)
314 VND (_ZGVnN2v_log
, 0.01, 11.1)
315 {"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd
= xy__vn_pow
}},
316 {"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd
= xy_Z_pow
}},
317 VND (__vn_sin
, -3.1, 3.1)
318 VND (_ZGVnN2v_sin
, -3.1, 3.1)
319 VND (__vn_cos
, -3.1, 3.1)
320 VND (_ZGVnN2v_cos
, -3.1, 3.1)
321 VNF (__vn_dummyf
, 1.0, 2.0)
322 VNF (__vn_expf
, -9.9, 9.9)
323 VNF (_ZGVnN4v_expf
, -9.9, 9.9)
324 VNF (__vn_expf_1u
, -9.9, 9.9)
325 VNF (__vn_exp2f
, -9.9, 9.9)
326 VNF (_ZGVnN4v_exp2f
, -9.9, 9.9)
327 VNF (__vn_exp2f_1u
, -9.9, 9.9)
328 VNF (__vn_logf
, 0.01, 11.1)
329 VNF (_ZGVnN4v_logf
, 0.01, 11.1)
330 {"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf
= xy__vn_powf
}},
331 {"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf
= xy_Z_powf
}},
332 VNF (__vn_sinf
, -3.1, 3.1)
333 VNF (_ZGVnN4v_sinf
, -3.1, 3.1)
334 VNF (__vn_cosf
, -3.1, 3.1)
335 VNF (_ZGVnN4v_cosf
, -3.1, 3.1)
349 gen_linear (double lo
, double hi
)
351 for (int i
= 0; i
< N
; i
++)
352 A
[i
] = (lo
* (N
- i
) + hi
* i
) / N
;
356 genf_linear (double lo
, double hi
)
358 for (int i
= 0; i
< N
; i
++)
359 Af
[i
] = (float)(lo
* (N
- i
) + hi
* i
) / N
;
363 asdouble (uint64_t i
)
373 static uint64_t seed
= 0x0123456789abcdef;
376 frand (double lo
, double hi
)
378 seed
= 6364136223846793005ULL * seed
+ 1;
379 return lo
+ (hi
- lo
) * (asdouble (seed
>> 12 | 0x3ffULL
<< 52) - 1.0);
383 gen_rand (double lo
, double hi
)
385 for (int i
= 0; i
< N
; i
++)
386 A
[i
] = frand (lo
, hi
);
390 genf_rand (double lo
, double hi
)
392 for (int i
= 0; i
< N
; i
++)
393 Af
[i
] = (float)frand (lo
, hi
);
397 gen_trace (int index
)
399 for (int i
= 0; i
< N
; i
++)
400 A
[i
] = Trace
[index
+ i
];
404 genf_trace (int index
)
406 for (int i
= 0; i
< N
; i
++)
407 Af
[i
] = (float)Trace
[index
+ i
];
411 run_thruput (double f (double))
413 for (int i
= 0; i
< N
; i
++)
418 runf_thruput (float f (float))
420 for (int i
= 0; i
< N
; i
++)
424 volatile double zero
= 0;
427 run_latency (double f (double))
431 for (int i
= 0; i
< N
; i
++)
432 prev
= f (A
[i
] + prev
* z
);
436 runf_latency (float f (float))
438 float z
= (float)zero
;
440 for (int i
= 0; i
< N
; i
++)
441 prev
= f (Af
[i
] + prev
* z
);
445 run_v_thruput (v_double
f (v_double
))
447 for (int i
= 0; i
< N
; i
+= v_double_len ())
448 f (v_double_load (A
+i
));
452 runf_v_thruput (v_float
f (v_float
))
454 for (int i
= 0; i
< N
; i
+= v_float_len ())
455 f (v_float_load (Af
+i
));
459 run_v_latency (v_double
f (v_double
))
461 v_double z
= v_double_dup (zero
);
463 for (int i
= 0; i
< N
; i
+= v_double_len ())
464 prev
= f (v_double_load (A
+i
) + prev
* z
);
468 runf_v_latency (v_float
f (v_float
))
470 v_float z
= v_float_dup (zero
);
472 for (int i
= 0; i
< N
; i
+= v_float_len ())
473 prev
= f (v_float_load (Af
+i
) + prev
* z
);
478 run_vn_thruput (__vpcs v_double
f (v_double
))
480 for (int i
= 0; i
< N
; i
+= v_double_len ())
481 f (v_double_load (A
+i
));
485 runf_vn_thruput (__vpcs v_float
f (v_float
))
487 for (int i
= 0; i
< N
; i
+= v_float_len ())
488 f (v_float_load (Af
+i
));
492 run_vn_latency (__vpcs v_double
f (v_double
))
494 v_double z
= v_double_dup (zero
);
496 for (int i
= 0; i
< N
; i
+= v_double_len ())
497 prev
= f (v_double_load (A
+i
) + prev
* z
);
501 runf_vn_latency (__vpcs v_float
f (v_float
))
503 v_float z
= v_float_dup (zero
);
505 for (int i
= 0; i
< N
; i
+= v_float_len ())
506 prev
= f (v_float_load (Af
+i
) + prev
* z
);
514 if (clock_gettime (CLOCK_REALTIME
, &ts
))
516 return ts
.tv_sec
* 1000000000ULL + ts
.tv_nsec
;
519 #define TIMEIT(run, f) do { \
521 run (f); /* Warm up. */ \
522 for (int j = 0; j < measurecount; j++) \
524 uint64_t t0 = tic (); \
525 for (int i = 0; i < itercount; i++) \
527 uint64_t t1 = tic (); \
534 bench1 (const struct fun
*f
, int type
, double lo
, double hi
)
538 const char *s
= type
== 't' ? "rthruput" : "latency";
541 if (f
->vec
&& f
->prec
== 'd')
542 vlen
= v_double_len();
543 else if (f
->vec
&& f
->prec
== 'f')
544 vlen
= v_float_len();
546 if (f
->prec
== 'd' && type
== 't' && f
->vec
== 0)
547 TIMEIT (run_thruput
, f
->fun
.d
);
548 else if (f
->prec
== 'd' && type
== 'l' && f
->vec
== 0)
549 TIMEIT (run_latency
, f
->fun
.d
);
550 else if (f
->prec
== 'f' && type
== 't' && f
->vec
== 0)
551 TIMEIT (runf_thruput
, f
->fun
.f
);
552 else if (f
->prec
== 'f' && type
== 'l' && f
->vec
== 0)
553 TIMEIT (runf_latency
, f
->fun
.f
);
554 else if (f
->prec
== 'd' && type
== 't' && f
->vec
== 'v')
555 TIMEIT (run_v_thruput
, f
->fun
.vd
);
556 else if (f
->prec
== 'd' && type
== 'l' && f
->vec
== 'v')
557 TIMEIT (run_v_latency
, f
->fun
.vd
);
558 else if (f
->prec
== 'f' && type
== 't' && f
->vec
== 'v')
559 TIMEIT (runf_v_thruput
, f
->fun
.vf
);
560 else if (f
->prec
== 'f' && type
== 'l' && f
->vec
== 'v')
561 TIMEIT (runf_v_latency
, f
->fun
.vf
);
563 else if (f
->prec
== 'd' && type
== 't' && f
->vec
== 'n')
564 TIMEIT (run_vn_thruput
, f
->fun
.vnd
);
565 else if (f
->prec
== 'd' && type
== 'l' && f
->vec
== 'n')
566 TIMEIT (run_vn_latency
, f
->fun
.vnd
);
567 else if (f
->prec
== 'f' && type
== 't' && f
->vec
== 'n')
568 TIMEIT (runf_vn_thruput
, f
->fun
.vnf
);
569 else if (f
->prec
== 'f' && type
== 'l' && f
->vec
== 'n')
570 TIMEIT (runf_vn_latency
, f
->fun
.vnf
);
575 ns100
= (100 * dt
+ itercount
* N
/ 2) / (itercount
* N
);
576 printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f
->name
, s
,
577 (unsigned) (ns100
/ 100), (unsigned) (ns100
% 100),
578 (unsigned long long) dt
, lo
, hi
);
580 else if (type
== 'l')
582 ns100
= (100 * dt
+ itercount
* N
/ vlen
/ 2) / (itercount
* N
/ vlen
);
583 printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f
->name
, s
,
584 (unsigned) (ns100
/ 100), (unsigned) (ns100
% 100),
585 (unsigned long long) dt
, lo
, hi
);
591 bench (const struct fun
*f
, double lo
, double hi
, int type
, int gen
)
593 if (f
->prec
== 'd' && gen
== 'r')
595 else if (f
->prec
== 'd' && gen
== 'l')
597 else if (f
->prec
== 'd' && gen
== 't')
599 else if (f
->prec
== 'f' && gen
== 'r')
601 else if (f
->prec
== 'f' && gen
== 'l')
602 genf_linear (lo
, hi
);
603 else if (f
->prec
== 'f' && gen
== 't')
609 if (type
== 'b' || type
== 't')
610 bench1 (f
, 't', lo
, hi
);
612 if (type
== 'b' || type
== 'l')
613 bench1 (f
, 'l', lo
, hi
);
615 for (int i
= N
; i
< trace_size
; i
+= N
)
623 if (type
== 'b' || type
== 't')
624 bench1 (f
, 't', lo
, hi
);
626 if (type
== 'b' || type
== 'l')
627 bench1 (f
, 'l', lo
, hi
);
632 readtrace (const char *name
)
635 FILE *f
= strcmp (name
, "-") == 0 ? stdin
: fopen (name
, "r");
638 printf ("opening \"%s\" failed: %m\n", name
);
646 Trace
= realloc (Trace
, trace_size
* sizeof (Trace
[0]));
649 printf ("out of memory\n");
653 if (fscanf (f
, "%lf", Trace
+ n
) != 1)
657 if (ferror (f
) || n
== 0)
659 printf ("reading \"%s\" failed: %m\n", name
);
665 for (int i
= 0; n
< trace_size
; n
++, i
++)
672 printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
673 "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
676 printf ("%7s [run all benchmarks]\n", "all");
677 for (const struct fun
*f
= funtab
; f
->name
; f
++)
678 printf ("%7s [low: %g high: %g]\n", f
->name
, f
->lo
, f
->hi
);
683 main (int argc
, char *argv
[])
685 int usergen
= 0, gen
= 'r', type
= 'b', all
= 0;
686 double lo
= 0, hi
= 0;
687 const char *tracefile
= "-";
695 if (argv
[0][0] != '-')
697 else if (argc
>= 3 && strcmp (argv
[0], "-i") == 0)
700 lo
= strtod (argv
[1], 0);
701 hi
= strtod (argv
[2], 0);
705 else if (argc
>= 2 && strcmp (argv
[0], "-m") == 0)
707 measurecount
= strtol (argv
[1], 0, 0);
711 else if (argc
>= 2 && strcmp (argv
[0], "-c") == 0)
713 itercount
= strtol (argv
[1], 0, 0);
717 else if (argc
>= 2 && strcmp (argv
[0], "-g") == 0)
720 if (strchr ("rlt", gen
) == 0)
725 else if (argc
>= 2 && strcmp (argv
[0], "-f") == 0)
727 gen
= 't'; /* -f implies -g trace. */
732 else if (argc
>= 2 && strcmp (argv
[0], "-t") == 0)
735 if (strchr ("ltb", type
) == 0)
745 readtrace (tracefile
);
752 all
= strcmp (argv
[0], "all") == 0;
753 for (const struct fun
*f
= funtab
; f
->name
; f
++)
754 if (all
|| strcmp (argv
[0], f
->name
) == 0)
762 bench (f
, lo
, hi
, type
, gen
);
767 printf ("unknown function: %s\n", argv
[0]);