libc/AOR_v20.02/math/test/mathbench.c

   1 /*
   2  * Microbenchmark for math functions.
   3  *
   4  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   5  * See https://llvm.org/LICENSE.txt for license information.
   6  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   7  */
   8
   9 #undef _GNU_SOURCE
  10 #define _GNU_SOURCE 1
  11 #include <stdint.h>
  12 #include <stdlib.h>
  13 #include <stdio.h>
  14 #include <string.h>
  15 #include <time.h>
  16 #include <math.h>
  17 #include "mathlib.h"
  18
  19 #ifndef WANT_VMATH
  20 /* Enable the build of vector math code.  */
  21 # define WANT_VMATH 1
  22 #endif
  23
  24 /* Number of measurements, best result is reported.  */
  25 #define MEASURE 60
  26 /* Array size.  */
  27 #define N 8000
  28 /* Iterations over the array.  */
  29 #define ITER 125
  30
  31 static double *Trace;
  32 static size_t trace_size;
  33 static double A[N];
  34 static float Af[N];
  35 static long measurecount = MEASURE;
  36 static long itercount = ITER;
  37
  38 #if __aarch64__ && WANT_VMATH
  39 typedef __f64x2_t v_double;
  40
  41 #define v_double_len() 2
  42
  43 static inline v_double
  44 v_double_load (const double *p)
  45 {
  46   return (v_double){p[0], p[1]};
  47 }
  48
  49 static inline v_double
  50 v_double_dup (double x)
  51 {
  52   return (v_double){x, x};
  53 }
  54
  55 typedef __f32x4_t v_float;
  56
  57 #define v_float_len() 4
  58
  59 static inline v_float
  60 v_float_load (const float *p)
  61 {
  62   return (v_float){p[0], p[1], p[2], p[3]};
  63 }
  64
  65 static inline v_float
  66 v_float_dup (float x)
  67 {
  68   return (v_float){x, x, x, x};
  69 }
  70 #else
  71 /* dummy definitions to make things compile.  */
  72 typedef double v_double;
  73 typedef float v_float;
  74 #define v_double_len(x) 1
  75 #define v_double_load(x) (x)[0]
  76 #define v_double_dup(x) (x)
  77 #define v_float_len(x) 1
  78 #define v_float_load(x) (x)[0]
  79 #define v_float_dup(x) (x)
  80 #endif
  81
  82 static double
  83 dummy (double x)
  84 {
  85   return x;
  86 }
  87
  88 static float
  89 dummyf (float x)
  90 {
  91   return x;
  92 }
  93
  94 #if WANT_VMATH
  95 #if __aarch64__
  96 static v_double
  97 __v_dummy (v_double x)
  98 {
  99   return x;
 100 }
 101
 102 static v_float
 103 __v_dummyf (v_float x)
 104 {
 105   return x;
 106 }
 107
 108 #ifdef __vpcs
 109 __vpcs static v_double
 110 __vn_dummy (v_double x)
 111 {
 112   return x;
 113 }
 114
 115 __vpcs static v_float
 116 __vn_dummyf (v_float x)
 117 {
 118   return x;
 119 }
 120
 121 __vpcs static v_float
 122 xy__vn_powf (v_float x)
 123 {
 124   return __vn_powf (x, x);
 125 }
 126
 127 __vpcs static v_float
 128 xy_Z_powf (v_float x)
 129 {
 130   return _ZGVnN4vv_powf (x, x);
 131 }
 132
 133 __vpcs static v_double
 134 xy__vn_pow (v_double x)
 135 {
 136   return __vn_pow (x, x);
 137 }
 138
 139 __vpcs static v_double
 140 xy_Z_pow (v_double x)
 141 {
 142   return _ZGVnN2vv_pow (x, x);
 143 }
 144 #endif
 145
 146 static v_float
 147 xy__v_powf (v_float x)
 148 {
 149   return __v_powf (x, x);
 150 }
 151
 152 static v_double
 153 xy__v_pow (v_double x)
 154 {
 155   return __v_pow (x, x);
 156 }
 157 #endif
 158
 159 static float
 160 xy__s_powf (float x)
 161 {
 162   return __s_powf (x, x);
 163 }
 164
 165 static double
 166 xy__s_pow (double x)
 167 {
 168   return __s_pow (x, x);
 169 }
 170 #endif
 171
 172 static double
 173 xypow (double x)
 174 {
 175   return pow (x, x);
 176 }
 177
 178 static float
 179 xypowf (float x)
 180 {
 181   return powf (x, x);
 182 }
 183
 184 static double
 185 xpow (double x)
 186 {
 187   return pow (x, 23.4);
 188 }
 189
 190 static float
 191 xpowf (float x)
 192 {
 193   return powf (x, 23.4f);
 194 }
 195
 196 static double
 197 ypow (double x)
 198 {
 199   return pow (2.34, x);
 200 }
 201
 202 static float
 203 ypowf (float x)
 204 {
 205   return powf (2.34f, x);
 206 }
 207
 208 static float
 209 sincosf_wrap (float x)
 210 {
 211   float s, c;
 212   sincosf (x, &s, &c);
 213   return s + c;
 214 }
 215
 216 static const struct fun
 217 {
 218   const char *name;
 219   int prec;
 220   int vec;
 221   double lo;
 222   double hi;
 223   union
 224   {
 225     double (*d) (double);
 226     float (*f) (float);
 227     v_double (*vd) (v_double);
 228     v_float (*vf) (v_float);
 229 #ifdef __vpcs
 230     __vpcs v_double (*vnd) (v_double);
 231     __vpcs v_float (*vnf) (v_float);
 232 #endif
 233   } fun;
 234 } funtab[] = {
 235 #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
 236 #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
 237 #define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
 238 #define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
 239 #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
 240 #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
 241 D (dummy, 1.0, 2.0)
 242 D (exp, -9.9, 9.9)
 243 D (exp, 0.5, 1.0)
 244 D (exp2, -9.9, 9.9)
 245 D (log, 0.01, 11.1)
 246 D (log, 0.999, 1.001)
 247 D (log2, 0.01, 11.1)
 248 D (log2, 0.999, 1.001)
 249 {"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
 250 D (xpow, 0.01, 11.1)
 251 D (ypow, -9.9, 9.9)
 252
 253 F (dummyf, 1.0, 2.0)
 254 F (expf, -9.9, 9.9)
 255 F (exp2f, -9.9, 9.9)
 256 F (logf, 0.01, 11.1)
 257 F (log2f, 0.01, 11.1)
 258 {"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
 259 F (xpowf, 0.01, 11.1)
 260 F (ypowf, -9.9, 9.9)
 261 {"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
 262 {"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
 263 {"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
 264 {"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
 265 {"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
 266 {"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
 267 F (sinf, 0.1, 0.7)
 268 F (sinf, 0.8, 3.1)
 269 F (sinf, -3.1, 3.1)
 270 F (sinf, 3.3, 33.3)
 271 F (sinf, 100, 1000)
 272 F (sinf, 1e6, 1e32)
 273 F (cosf, 0.1, 0.7)
 274 F (cosf, 0.8, 3.1)
 275 F (cosf, -3.1, 3.1)
 276 F (cosf, 3.3, 33.3)
 277 F (cosf, 100, 1000)
 278 F (cosf, 1e6, 1e32)
 279 #if WANT_VMATH
 280 D (__s_sin, -3.1, 3.1)
 281 D (__s_cos, -3.1, 3.1)
 282 D (__s_exp, -9.9, 9.9)
 283 D (__s_log, 0.01, 11.1)
 284 {"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
 285 F (__s_expf, -9.9, 9.9)
 286 F (__s_expf_1u, -9.9, 9.9)
 287 F (__s_exp2f, -9.9, 9.9)
 288 F (__s_exp2f_1u, -9.9, 9.9)
 289 F (__s_logf, 0.01, 11.1)
 290 {"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}},
 291 F (__s_sinf, -3.1, 3.1)
 292 F (__s_cosf, -3.1, 3.1)
 293 #if __aarch64__
 294 VD (__v_dummy, 1.0, 2.0)
 295 VD (__v_sin, -3.1, 3.1)
 296 VD (__v_cos, -3.1, 3.1)
 297 VD (__v_exp, -9.9, 9.9)
 298 VD (__v_log, 0.01, 11.1)
 299 {"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
 300 VF (__v_dummyf, 1.0, 2.0)
 301 VF (__v_expf, -9.9, 9.9)
 302 VF (__v_expf_1u, -9.9, 9.9)
 303 VF (__v_exp2f, -9.9, 9.9)
 304 VF (__v_exp2f_1u, -9.9, 9.9)
 305 VF (__v_logf, 0.01, 11.1)
 306 {"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}},
 307 VF (__v_sinf, -3.1, 3.1)
 308 VF (__v_cosf, -3.1, 3.1)
 309 #ifdef __vpcs
 310 VND (__vn_dummy, 1.0, 2.0)
 311 VND (__vn_exp, -9.9, 9.9)
 312 VND (_ZGVnN2v_exp, -9.9, 9.9)
 313 VND (__vn_log, 0.01, 11.1)
 314 VND (_ZGVnN2v_log, 0.01, 11.1)
 315 {"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
 316 {"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
 317 VND (__vn_sin, -3.1, 3.1)
 318 VND (_ZGVnN2v_sin, -3.1, 3.1)
 319 VND (__vn_cos, -3.1, 3.1)
 320 VND (_ZGVnN2v_cos, -3.1, 3.1)
 321 VNF (__vn_dummyf, 1.0, 2.0)
 322 VNF (__vn_expf, -9.9, 9.9)
 323 VNF (_ZGVnN4v_expf, -9.9, 9.9)
 324 VNF (__vn_expf_1u, -9.9, 9.9)
 325 VNF (__vn_exp2f, -9.9, 9.9)
 326 VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
 327 VNF (__vn_exp2f_1u, -9.9, 9.9)
 328 VNF (__vn_logf, 0.01, 11.1)
 329 VNF (_ZGVnN4v_logf, 0.01, 11.1)
 330 {"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}},
 331 {"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
 332 VNF (__vn_sinf, -3.1, 3.1)
 333 VNF (_ZGVnN4v_sinf, -3.1, 3.1)
 334 VNF (__vn_cosf, -3.1, 3.1)
 335 VNF (_ZGVnN4v_cosf, -3.1, 3.1)
 336 #endif
 337 #endif
 338 #endif
 339 {0},
 340 #undef F
 341 #undef D
 342 #undef VF
 343 #undef VD
 344 #undef VNF
 345 #undef VND
 346 };
 347
 348 static void
 349 gen_linear (double lo, double hi)
 350 {
 351   for (int i = 0; i < N; i++)
 352     A[i] = (lo * (N - i) + hi * i) / N;
 353 }
 354
 355 static void
 356 genf_linear (double lo, double hi)
 357 {
 358   for (int i = 0; i < N; i++)
 359     Af[i] = (float)(lo * (N - i) + hi * i) / N;
 360 }
 361
 362 static inline double
 363 asdouble (uint64_t i)
 364 {
 365   union
 366   {
 367     uint64_t i;
 368     double f;
 369   } u = {i};
 370   return u.f;
 371 }
 372
 373 static uint64_t seed = 0x0123456789abcdef;
 374
 375 static double
 376 frand (double lo, double hi)
 377 {
 378   seed = 6364136223846793005ULL * seed + 1;
 379   return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
 380 }
 381
 382 static void
 383 gen_rand (double lo, double hi)
 384 {
 385   for (int i = 0; i < N; i++)
 386     A[i] = frand (lo, hi);
 387 }
 388
 389 static void
 390 genf_rand (double lo, double hi)
 391 {
 392   for (int i = 0; i < N; i++)
 393     Af[i] = (float)frand (lo, hi);
 394 }
 395
 396 static void
 397 gen_trace (int index)
 398 {
 399   for (int i = 0; i < N; i++)
 400     A[i] = Trace[index + i];
 401 }
 402
 403 static void
 404 genf_trace (int index)
 405 {
 406   for (int i = 0; i < N; i++)
 407     Af[i] = (float)Trace[index + i];
 408 }
 409
 410 static void
 411 run_thruput (double f (double))
 412 {
 413   for (int i = 0; i < N; i++)
 414     f (A[i]);
 415 }
 416
 417 static void
 418 runf_thruput (float f (float))
 419 {
 420   for (int i = 0; i < N; i++)
 421     f (Af[i]);
 422 }
 423
 424 volatile double zero = 0;
 425
 426 static void
 427 run_latency (double f (double))
 428 {
 429   double z = zero;
 430   double prev = z;
 431   for (int i = 0; i < N; i++)
 432     prev = f (A[i] + prev * z);
 433 }
 434
 435 static void
 436 runf_latency (float f (float))
 437 {
 438   float z = (float)zero;
 439   float prev = z;
 440   for (int i = 0; i < N; i++)
 441     prev = f (Af[i] + prev * z);
 442 }
 443
 444 static void
 445 run_v_thruput (v_double f (v_double))
 446 {
 447   for (int i = 0; i < N; i += v_double_len ())
 448     f (v_double_load (A+i));
 449 }
 450
 451 static void
 452 runf_v_thruput (v_float f (v_float))
 453 {
 454   for (int i = 0; i < N; i += v_float_len ())
 455     f (v_float_load (Af+i));
 456 }
 457
 458 static void
 459 run_v_latency (v_double f (v_double))
 460 {
 461   v_double z = v_double_dup (zero);
 462   v_double prev = z;
 463   for (int i = 0; i < N; i += v_double_len ())
 464     prev = f (v_double_load (A+i) + prev * z);
 465 }
 466
 467 static void
 468 runf_v_latency (v_float f (v_float))
 469 {
 470   v_float z = v_float_dup (zero);
 471   v_float prev = z;
 472   for (int i = 0; i < N; i += v_float_len ())
 473     prev = f (v_float_load (Af+i) + prev * z);
 474 }
 475
 476 #ifdef __vpcs
 477 static void
 478 run_vn_thruput (__vpcs v_double f (v_double))
 479 {
 480   for (int i = 0; i < N; i += v_double_len ())
 481     f (v_double_load (A+i));
 482 }
 483
 484 static void
 485 runf_vn_thruput (__vpcs v_float f (v_float))
 486 {
 487   for (int i = 0; i < N; i += v_float_len ())
 488     f (v_float_load (Af+i));
 489 }
 490
 491 static void
 492 run_vn_latency (__vpcs v_double f (v_double))
 493 {
 494   v_double z = v_double_dup (zero);
 495   v_double prev = z;
 496   for (int i = 0; i < N; i += v_double_len ())
 497     prev = f (v_double_load (A+i) + prev * z);
 498 }
 499
 500 static void
 501 runf_vn_latency (__vpcs v_float f (v_float))
 502 {
 503   v_float z = v_float_dup (zero);
 504   v_float prev = z;
 505   for (int i = 0; i < N; i += v_float_len ())
 506     prev = f (v_float_load (Af+i) + prev * z);
 507 }
 508 #endif
 509
 510 static uint64_t
 511 tic (void)
 512 {
 513   struct timespec ts;
 514   if (clock_gettime (CLOCK_REALTIME, &ts))
 515     abort ();
 516   return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
 517 }
 518
 519 #define TIMEIT(run, f) do { \
 520   dt = -1; \
 521   run (f); /* Warm up.  */ \
 522   for (int j = 0; j < measurecount; j++) \
 523     { \
 524       uint64_t t0 = tic (); \
 525       for (int i = 0; i < itercount; i++) \
 526         run (f); \
 527       uint64_t t1 = tic (); \
 528       if (t1 - t0 < dt) \
 529         dt = t1 - t0; \
 530     } \
 531 } while (0)
 532
 533 static void
 534 bench1 (const struct fun *f, int type, double lo, double hi)
 535 {
 536   uint64_t dt = 0;
 537   uint64_t ns100;
 538   const char *s = type == 't' ? "rthruput" : "latency";
 539   int vlen = 1;
 540
 541   if (f->vec && f->prec == 'd')
 542     vlen = v_double_len();
 543   else if (f->vec && f->prec == 'f')
 544     vlen = v_float_len();
 545
 546   if (f->prec == 'd' && type == 't' && f->vec == 0)
 547     TIMEIT (run_thruput, f->fun.d);
 548   else if (f->prec == 'd' && type == 'l' && f->vec == 0)
 549     TIMEIT (run_latency, f->fun.d);
 550   else if (f->prec == 'f' && type == 't' && f->vec == 0)
 551     TIMEIT (runf_thruput, f->fun.f);
 552   else if (f->prec == 'f' && type == 'l' && f->vec == 0)
 553     TIMEIT (runf_latency, f->fun.f);
 554   else if (f->prec == 'd' && type == 't' && f->vec == 'v')
 555     TIMEIT (run_v_thruput, f->fun.vd);
 556   else if (f->prec == 'd' && type == 'l' && f->vec == 'v')
 557     TIMEIT (run_v_latency, f->fun.vd);
 558   else if (f->prec == 'f' && type == 't' && f->vec == 'v')
 559     TIMEIT (runf_v_thruput, f->fun.vf);
 560   else if (f->prec == 'f' && type == 'l' && f->vec == 'v')
 561     TIMEIT (runf_v_latency, f->fun.vf);
 562 #ifdef __vpcs
 563   else if (f->prec == 'd' && type == 't' && f->vec == 'n')
 564     TIMEIT (run_vn_thruput, f->fun.vnd);
 565   else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
 566     TIMEIT (run_vn_latency, f->fun.vnd);
 567   else if (f->prec == 'f' && type == 't' && f->vec == 'n')
 568     TIMEIT (runf_vn_thruput, f->fun.vnf);
 569   else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
 570     TIMEIT (runf_vn_latency, f->fun.vnf);
 571 #endif
 572
 573   if (type == 't')
 574     {
 575       ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
 576       printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
 577               (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
 578               (unsigned long long) dt, lo, hi);
 579     }
 580   else if (type == 'l')
 581     {
 582       ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
 583       printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s,
 584               (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
 585               (unsigned long long) dt, lo, hi);
 586     }
 587   fflush (stdout);
 588 }
 589
 590 static void
 591 bench (const struct fun *f, double lo, double hi, int type, int gen)
 592 {
 593   if (f->prec == 'd' && gen == 'r')
 594     gen_rand (lo, hi);
 595   else if (f->prec == 'd' && gen == 'l')
 596     gen_linear (lo, hi);
 597   else if (f->prec == 'd' && gen == 't')
 598     gen_trace (0);
 599   else if (f->prec == 'f' && gen == 'r')
 600     genf_rand (lo, hi);
 601   else if (f->prec == 'f' && gen == 'l')
 602     genf_linear (lo, hi);
 603   else if (f->prec == 'f' && gen == 't')
 604     genf_trace (0);
 605
 606   if (gen == 't')
 607     hi = trace_size / N;
 608
 609   if (type == 'b' || type == 't')
 610     bench1 (f, 't', lo, hi);
 611
 612   if (type == 'b' || type == 'l')
 613     bench1 (f, 'l', lo, hi);
 614
 615   for (int i = N; i < trace_size; i += N)
 616     {
 617       if (f->prec == 'd')
 618         gen_trace (i);
 619       else
 620         genf_trace (i);
 621
 622       lo = i / N;
 623       if (type == 'b' || type == 't')
 624         bench1 (f, 't', lo, hi);
 625
 626       if (type == 'b' || type == 'l')
 627         bench1 (f, 'l', lo, hi);
 628     }
 629 }
 630
 631 static void
 632 readtrace (const char *name)
 633 {
 634         int n = 0;
 635         FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
 636         if (!f)
 637           {
 638             printf ("opening \"%s\" failed: %m\n", name);
 639             exit (1);
 640           }
 641         for (;;)
 642           {
 643             if (n >= trace_size)
 644               {
 645                 trace_size += N;
 646                 Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
 647                 if (Trace == NULL)
 648                   {
 649                     printf ("out of memory\n");
 650                     exit (1);
 651                   }
 652               }
 653             if (fscanf (f, "%lf", Trace + n) != 1)
 654               break;
 655             n++;
 656           }
 657         if (ferror (f) || n == 0)
 658           {
 659             printf ("reading \"%s\" failed: %m\n", name);
 660             exit (1);
 661           }
 662         fclose (f);
 663         if (n % N == 0)
 664           trace_size = n;
 665         for (int i = 0; n < trace_size; n++, i++)
 666           Trace[n] = Trace[i];
 667 }
 668
 669 static void
 670 usage (void)
 671 {
 672   printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
 673           "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
 674           "[func2 ..]\n");
 675   printf ("func:\n");
 676   printf ("%7s [run all benchmarks]\n", "all");
 677   for (const struct fun *f = funtab; f->name; f++)
 678     printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
 679   exit (1);
 680 }
 681
 682 int
 683 main (int argc, char *argv[])
 684 {
 685   int usergen = 0, gen = 'r', type = 'b', all = 0;
 686   double lo = 0, hi = 0;
 687   const char *tracefile = "-";
 688
 689   argv++;
 690   argc--;
 691   for (;;)
 692     {
 693       if (argc <= 0)
 694         usage ();
 695       if (argv[0][0] != '-')
 696         break;
 697       else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
 698         {
 699           usergen = 1;
 700           lo = strtod (argv[1], 0);
 701           hi = strtod (argv[2], 0);
 702           argv += 3;
 703           argc -= 3;
 704         }
 705       else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
 706         {
 707           measurecount = strtol (argv[1], 0, 0);
 708           argv += 2;
 709           argc -= 2;
 710         }
 711       else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
 712         {
 713           itercount = strtol (argv[1], 0, 0);
 714           argv += 2;
 715           argc -= 2;
 716         }
 717       else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
 718         {
 719           gen = argv[1][0];
 720           if (strchr ("rlt", gen) == 0)
 721             usage ();
 722           argv += 2;
 723           argc -= 2;
 724         }
 725       else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
 726         {
 727           gen = 't';  /* -f implies -g trace.  */
 728           tracefile = argv[1];
 729           argv += 2;
 730           argc -= 2;
 731         }
 732       else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
 733         {
 734           type = argv[1][0];
 735           if (strchr ("ltb", type) == 0)
 736             usage ();
 737           argv += 2;
 738           argc -= 2;
 739         }
 740       else
 741         usage ();
 742     }
 743   if (gen == 't')
 744     {
 745       readtrace (tracefile);
 746       lo = hi = 0;
 747       usergen = 1;
 748     }
 749   while (argc > 0)
 750     {
 751       int found = 0;
 752       all = strcmp (argv[0], "all") == 0;
 753       for (const struct fun *f = funtab; f->name; f++)
 754         if (all || strcmp (argv[0], f->name) == 0)
 755           {
 756             found = 1;
 757             if (!usergen)
 758               {
 759                 lo = f->lo;
 760                 hi = f->hi;
 761               }
 762             bench (f, lo, hi, type, gen);
 763             if (usergen && !all)
 764               break;
 765           }
 766       if (!found)
 767         printf ("unknown function: %s\n", argv[0]);
 768       argv++;
 769       argc--;
 770     }
 771   return 0;
 772 }