libc/AOR_v20.02/math/v_math.h

   1 /*
   2  * Vector math abstractions.
   3  *
   4  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   5  * See https://llvm.org/LICENSE.txt for license information.
   6  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   7  */
   8
   9 #ifndef _V_MATH_H
  10 #define _V_MATH_H
  11
  12 #ifndef WANT_VMATH
  13 /* Enable the build of vector math code.  */
  14 # define WANT_VMATH 1
  15 #endif
  16 #if WANT_VMATH
  17
  18 /* The goal of this header is to allow vector and scalar
  19    build of the same algorithm, the provided intrinsic
  20    wrappers are also vector length agnostic so they can
  21    be implemented for SVE too (or other simd architectures)
  22    and then the code should work on those targets too.  */
  23
  24 #if SCALAR
  25 #define V_NAME(x) __s_##x
  26 #elif VPCS && __aarch64__
  27 #define V_NAME(x) __vn_##x
  28 #define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
  29 #else
  30 #define V_NAME(x) __v_##x
  31 #endif
  32
  33 #ifndef VPCS_ATTR
  34 #define VPCS_ATTR
  35 #endif
  36 #ifndef VPCS_ALIAS
  37 #define VPCS_ALIAS
  38 #endif
  39
  40 #include <stdint.h>
  41 #include "math_config.h"
  42
  43 typedef float f32_t;
  44 typedef uint32_t u32_t;
  45 typedef int32_t s32_t;
  46 typedef double f64_t;
  47 typedef uint64_t u64_t;
  48 typedef int64_t s64_t;
  49
  50 /* reinterpret as type1 from type2.  */
  51 static inline u32_t
  52 as_u32_f32 (f32_t x)
  53 {
  54   union { f32_t f; u32_t u; } r = {x};
  55   return r.u;
  56 }
  57 static inline f32_t
  58 as_f32_u32 (u32_t x)
  59 {
  60   union { u32_t u; f32_t f; } r = {x};
  61   return r.f;
  62 }
  63 static inline s32_t
  64 as_s32_u32 (u32_t x)
  65 {
  66   union { u32_t u; s32_t i; } r = {x};
  67   return r.i;
  68 }
  69 static inline u32_t
  70 as_u32_s32 (s32_t x)
  71 {
  72   union { s32_t i; u32_t u; } r = {x};
  73   return r.u;
  74 }
  75 static inline u64_t
  76 as_u64_f64 (f64_t x)
  77 {
  78   union { f64_t f; u64_t u; } r = {x};
  79   return r.u;
  80 }
  81 static inline f64_t
  82 as_f64_u64 (u64_t x)
  83 {
  84   union { u64_t u; f64_t f; } r = {x};
  85   return r.f;
  86 }
  87 static inline s64_t
  88 as_s64_u64 (u64_t x)
  89 {
  90   union { u64_t u; s64_t i; } r = {x};
  91   return r.i;
  92 }
  93 static inline u64_t
  94 as_u64_s64 (s64_t x)
  95 {
  96   union { s64_t i; u64_t u; } r = {x};
  97   return r.u;
  98 }
  99
 100 #if SCALAR
 101 #define V_SUPPORTED 1
 102 typedef f32_t v_f32_t;
 103 typedef u32_t v_u32_t;
 104 typedef s32_t v_s32_t;
 105 typedef f64_t v_f64_t;
 106 typedef u64_t v_u64_t;
 107 typedef s64_t v_s64_t;
 108
 109 static inline int
 110 v_lanes32 (void)
 111 {
 112   return 1;
 113 }
 114
 115 static inline v_f32_t
 116 v_f32 (f32_t x)
 117 {
 118   return x;
 119 }
 120 static inline v_u32_t
 121 v_u32 (u32_t x)
 122 {
 123   return x;
 124 }
 125 static inline v_s32_t
 126 v_s32 (s32_t x)
 127 {
 128   return x;
 129 }
 130
 131 static inline f32_t
 132 v_get_f32 (v_f32_t x, int i)
 133 {
 134   return x;
 135 }
 136 static inline u32_t
 137 v_get_u32 (v_u32_t x, int i)
 138 {
 139   return x;
 140 }
 141 static inline s32_t
 142 v_get_s32 (v_s32_t x, int i)
 143 {
 144   return x;
 145 }
 146
 147 static inline void
 148 v_set_f32 (v_f32_t *x, int i, f32_t v)
 149 {
 150   *x = v;
 151 }
 152 static inline void
 153 v_set_u32 (v_u32_t *x, int i, u32_t v)
 154 {
 155   *x = v;
 156 }
 157 static inline void
 158 v_set_s32 (v_s32_t *x, int i, s32_t v)
 159 {
 160   *x = v;
 161 }
 162
 163 /* true if any elements of a v_cond result is non-zero.  */
 164 static inline int
 165 v_any_u32 (v_u32_t x)
 166 {
 167   return x != 0;
 168 }
 169 /* to wrap the result of relational operators.  */
 170 static inline v_u32_t
 171 v_cond_u32 (v_u32_t x)
 172 {
 173   return x ? -1 : 0;
 174 }
 175 static inline v_f32_t
 176 v_abs_f32 (v_f32_t x)
 177 {
 178   return __builtin_fabsf (x);
 179 }
 180 static inline v_f32_t
 181 v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
 182 {
 183   return __builtin_fmaf (x, y, z);
 184 }
 185 static inline v_f32_t
 186 v_round_f32 (v_f32_t x)
 187 {
 188   return __builtin_roundf (x);
 189 }
 190 static inline v_s32_t
 191 v_round_s32 (v_f32_t x)
 192 {
 193   return __builtin_lroundf (x); /* relies on -fno-math-errno.  */
 194 }
 195 /* convert to type1 from type2.  */
 196 static inline v_f32_t
 197 v_to_f32_s32 (v_s32_t x)
 198 {
 199   return x;
 200 }
 201 static inline v_f32_t
 202 v_to_f32_u32 (v_u32_t x)
 203 {
 204   return x;
 205 }
 206 /* reinterpret as type1 from type2.  */
 207 static inline v_u32_t
 208 v_as_u32_f32 (v_f32_t x)
 209 {
 210   union { v_f32_t f; v_u32_t u; } r = {x};
 211   return r.u;
 212 }
 213 static inline v_f32_t
 214 v_as_f32_u32 (v_u32_t x)
 215 {
 216   union { v_u32_t u; v_f32_t f; } r = {x};
 217   return r.f;
 218 }
 219 static inline v_s32_t
 220 v_as_s32_u32 (v_u32_t x)
 221 {
 222   union { v_u32_t u; v_s32_t i; } r = {x};
 223   return r.i;
 224 }
 225 static inline v_u32_t
 226 v_as_u32_s32 (v_s32_t x)
 227 {
 228   union { v_s32_t i; v_u32_t u; } r = {x};
 229   return r.u;
 230 }
 231 static inline v_f32_t
 232 v_lookup_f32 (const f32_t *tab, v_u32_t idx)
 233 {
 234   return tab[idx];
 235 }
 236 static inline v_u32_t
 237 v_lookup_u32 (const u32_t *tab, v_u32_t idx)
 238 {
 239   return tab[idx];
 240 }
 241 static inline v_f32_t
 242 v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
 243 {
 244   return f (x);
 245 }
 246 static inline v_f32_t
 247 v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
 248              v_u32_t p)
 249 {
 250   return f (x1, x2);
 251 }
 252
 253 static inline int
 254 v_lanes64 (void)
 255 {
 256   return 1;
 257 }
 258 static inline v_f64_t
 259 v_f64 (f64_t x)
 260 {
 261   return x;
 262 }
 263 static inline v_u64_t
 264 v_u64 (u64_t x)
 265 {
 266   return x;
 267 }
 268 static inline v_s64_t
 269 v_s64 (s64_t x)
 270 {
 271   return x;
 272 }
 273 static inline f64_t
 274 v_get_f64 (v_f64_t x, int i)
 275 {
 276   return x;
 277 }
 278 static inline void
 279 v_set_f64 (v_f64_t *x, int i, f64_t v)
 280 {
 281   *x = v;
 282 }
 283 /* true if any elements of a v_cond result is non-zero.  */
 284 static inline int
 285 v_any_u64 (v_u64_t x)
 286 {
 287   return x != 0;
 288 }
 289 /* to wrap the result of relational operators.  */
 290 static inline v_u64_t
 291 v_cond_u64 (v_u64_t x)
 292 {
 293   return x ? -1 : 0;
 294 }
 295 static inline v_f64_t
 296 v_abs_f64 (v_f64_t x)
 297 {
 298   return __builtin_fabs (x);
 299 }
 300 static inline v_f64_t
 301 v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
 302 {
 303   return __builtin_fma (x, y, z);
 304 }
 305 static inline v_f64_t
 306 v_round_f64 (v_f64_t x)
 307 {
 308   return __builtin_round (x);
 309 }
 310 static inline v_s64_t
 311 v_round_s64 (v_f64_t x)
 312 {
 313   return __builtin_lround (x); /* relies on -fno-math-errno.  */
 314 }
 315 /* convert to type1 from type2.  */
 316 static inline v_f64_t
 317 v_to_f64_s64 (v_s64_t x)
 318 {
 319   return x;
 320 }
 321 static inline v_f64_t
 322 v_to_f64_u64 (v_u64_t x)
 323 {
 324   return x;
 325 }
 326 /* reinterpret as type1 from type2.  */
 327 static inline v_u64_t
 328 v_as_u64_f64 (v_f64_t x)
 329 {
 330   union { v_f64_t f; v_u64_t u; } r = {x};
 331   return r.u;
 332 }
 333 static inline v_f64_t
 334 v_as_f64_u64 (v_u64_t x)
 335 {
 336   union { v_u64_t u; v_f64_t f; } r = {x};
 337   return r.f;
 338 }
 339 static inline v_s64_t
 340 v_as_s64_u64 (v_u64_t x)
 341 {
 342   union { v_u64_t u; v_s64_t i; } r = {x};
 343   return r.i;
 344 }
 345 static inline v_u64_t
 346 v_as_u64_s64 (v_s64_t x)
 347 {
 348   union { v_s64_t i; v_u64_t u; } r = {x};
 349   return r.u;
 350 }
 351 static inline v_f64_t
 352 v_lookup_f64 (const f64_t *tab, v_u64_t idx)
 353 {
 354   return tab[idx];
 355 }
 356 static inline v_u64_t
 357 v_lookup_u64 (const u64_t *tab, v_u64_t idx)
 358 {
 359   return tab[idx];
 360 }
 361 static inline v_f64_t
 362 v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
 363 {
 364   return f (x);
 365 }
 366
 367 #elif __aarch64__
 368 #define V_SUPPORTED 1
 369 #include <arm_neon.h>
 370 typedef float32x4_t v_f32_t;
 371 typedef uint32x4_t v_u32_t;
 372 typedef int32x4_t v_s32_t;
 373 typedef float64x2_t v_f64_t;
 374 typedef uint64x2_t v_u64_t;
 375 typedef int64x2_t v_s64_t;
 376
 377 static inline int
 378 v_lanes32 (void)
 379 {
 380   return 4;
 381 }
 382
 383 static inline v_f32_t
 384 v_f32 (f32_t x)
 385 {
 386   return (v_f32_t){x, x, x, x};
 387 }
 388 static inline v_u32_t
 389 v_u32 (u32_t x)
 390 {
 391   return (v_u32_t){x, x, x, x};
 392 }
 393 static inline v_s32_t
 394 v_s32 (s32_t x)
 395 {
 396   return (v_s32_t){x, x, x, x};
 397 }
 398
 399 static inline f32_t
 400 v_get_f32 (v_f32_t x, int i)
 401 {
 402   return x[i];
 403 }
 404 static inline u32_t
 405 v_get_u32 (v_u32_t x, int i)
 406 {
 407   return x[i];
 408 }
 409 static inline s32_t
 410 v_get_s32 (v_s32_t x, int i)
 411 {
 412   return x[i];
 413 }
 414
 415 static inline void
 416 v_set_f32 (v_f32_t *x, int i, f32_t v)
 417 {
 418   (*x)[i] = v;
 419 }
 420 static inline void
 421 v_set_u32 (v_u32_t *x, int i, u32_t v)
 422 {
 423   (*x)[i] = v;
 424 }
 425 static inline void
 426 v_set_s32 (v_s32_t *x, int i, s32_t v)
 427 {
 428   (*x)[i] = v;
 429 }
 430
 431 /* true if any elements of a v_cond result is non-zero.  */
 432 static inline int
 433 v_any_u32 (v_u32_t x)
 434 {
 435   /* assume elements in x are either 0 or -1u.  */
 436   return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
 437 }
 438 /* to wrap the result of relational operators.  */
 439 static inline v_u32_t
 440 v_cond_u32 (v_u32_t x)
 441 {
 442   return x;
 443 }
 444 static inline v_f32_t
 445 v_abs_f32 (v_f32_t x)
 446 {
 447   return vabsq_f32 (x);
 448 }
 449 static inline v_f32_t
 450 v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
 451 {
 452   return vfmaq_f32 (z, x, y);
 453 }
 454 static inline v_f32_t
 455 v_round_f32 (v_f32_t x)
 456 {
 457   return vrndaq_f32 (x);
 458 }
 459 static inline v_s32_t
 460 v_round_s32 (v_f32_t x)
 461 {
 462   return vcvtaq_s32_f32 (x);
 463 }
 464 /* convert to type1 from type2.  */
 465 static inline v_f32_t
 466 v_to_f32_s32 (v_s32_t x)
 467 {
 468   return (v_f32_t){x[0], x[1], x[2], x[3]};
 469 }
 470 static inline v_f32_t
 471 v_to_f32_u32 (v_u32_t x)
 472 {
 473   return (v_f32_t){x[0], x[1], x[2], x[3]};
 474 }
 475 /* reinterpret as type1 from type2.  */
 476 static inline v_u32_t
 477 v_as_u32_f32 (v_f32_t x)
 478 {
 479   union { v_f32_t f; v_u32_t u; } r = {x};
 480   return r.u;
 481 }
 482 static inline v_f32_t
 483 v_as_f32_u32 (v_u32_t x)
 484 {
 485   union { v_u32_t u; v_f32_t f; } r = {x};
 486   return r.f;
 487 }
 488 static inline v_s32_t
 489 v_as_s32_u32 (v_u32_t x)
 490 {
 491   union { v_u32_t u; v_s32_t i; } r = {x};
 492   return r.i;
 493 }
 494 static inline v_u32_t
 495 v_as_u32_s32 (v_s32_t x)
 496 {
 497   union { v_s32_t i; v_u32_t u; } r = {x};
 498   return r.u;
 499 }
 500 static inline v_f32_t
 501 v_lookup_f32 (const f32_t *tab, v_u32_t idx)
 502 {
 503   return (v_f32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
 504 }
 505 static inline v_u32_t
 506 v_lookup_u32 (const u32_t *tab, v_u32_t idx)
 507 {
 508   return (v_u32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
 509 }
 510 static inline v_f32_t
 511 v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
 512 {
 513   return (v_f32_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
 514                    p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]};
 515 }
 516 static inline v_f32_t
 517 v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
 518              v_u32_t p)
 519 {
 520   return (
 521     v_f32_t){p[0] ? f (x1[0], x2[0]) : y[0], p[1] ? f (x1[1], x2[1]) : y[1],
 522              p[2] ? f (x1[2], x2[2]) : y[2], p[3] ? f (x1[3], x2[3]) : y[3]};
 523 }
 524
 525 static inline int
 526 v_lanes64 (void)
 527 {
 528   return 2;
 529 }
 530 static inline v_f64_t
 531 v_f64 (f64_t x)
 532 {
 533   return (v_f64_t){x, x};
 534 }
 535 static inline v_u64_t
 536 v_u64 (u64_t x)
 537 {
 538   return (v_u64_t){x, x};
 539 }
 540 static inline v_s64_t
 541 v_s64 (s64_t x)
 542 {
 543   return (v_s64_t){x, x};
 544 }
 545 static inline f64_t
 546 v_get_f64 (v_f64_t x, int i)
 547 {
 548   return x[i];
 549 }
 550 static inline void
 551 v_set_f64 (v_f64_t *x, int i, f64_t v)
 552 {
 553   (*x)[i] = v;
 554 }
 555 /* true if any elements of a v_cond result is non-zero.  */
 556 static inline int
 557 v_any_u64 (v_u64_t x)
 558 {
 559   /* assume elements in x are either 0 or -1u.  */
 560   return vpaddd_u64 (x) != 0;
 561 }
 562 /* to wrap the result of relational operators.  */
 563 static inline v_u64_t
 564 v_cond_u64 (v_u64_t x)
 565 {
 566   return x;
 567 }
 568 static inline v_f64_t
 569 v_abs_f64 (v_f64_t x)
 570 {
 571   return vabsq_f64 (x);
 572 }
 573 static inline v_f64_t
 574 v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
 575 {
 576   return vfmaq_f64 (z, x, y);
 577 }
 578 static inline v_f64_t
 579 v_round_f64 (v_f64_t x)
 580 {
 581   return vrndaq_f64 (x);
 582 }
 583 static inline v_s64_t
 584 v_round_s64 (v_f64_t x)
 585 {
 586   return vcvtaq_s64_f64 (x);
 587 }
 588 /* convert to type1 from type2.  */
 589 static inline v_f64_t
 590 v_to_f64_s64 (v_s64_t x)
 591 {
 592   return (v_f64_t){x[0], x[1]};
 593 }
 594 static inline v_f64_t
 595 v_to_f64_u64 (v_u64_t x)
 596 {
 597   return (v_f64_t){x[0], x[1]};
 598 }
 599 /* reinterpret as type1 from type2.  */
 600 static inline v_u64_t
 601 v_as_u64_f64 (v_f64_t x)
 602 {
 603   union { v_f64_t f; v_u64_t u; } r = {x};
 604   return r.u;
 605 }
 606 static inline v_f64_t
 607 v_as_f64_u64 (v_u64_t x)
 608 {
 609   union { v_u64_t u; v_f64_t f; } r = {x};
 610   return r.f;
 611 }
 612 static inline v_s64_t
 613 v_as_s64_u64 (v_u64_t x)
 614 {
 615   union {  v_u64_t u; v_s64_t i; } r = {x};
 616   return r.i;
 617 }
 618 static inline v_u64_t
 619 v_as_u64_s64 (v_s64_t x)
 620 {
 621   union { v_s64_t i; v_u64_t u; } r = {x};
 622   return r.u;
 623 }
 624 static inline v_f64_t
 625 v_lookup_f64 (const f64_t *tab, v_u64_t idx)
 626 {
 627   return (v_f64_t){tab[idx[0]], tab[idx[1]]};
 628 }
 629 static inline v_u64_t
 630 v_lookup_u64 (const u64_t *tab, v_u64_t idx)
 631 {
 632   return (v_u64_t){tab[idx[0]], tab[idx[1]]};
 633 }
 634 static inline v_f64_t
 635 v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
 636 {
 637   return (v_f64_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1]};
 638 }
 639 #endif
 640
 641 #endif
 642 #endif