src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H
  37 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H
  38
  39 #include "config.h"
  40
  41 #include "gromacs/math/utilities.h"
  42 #include "gromacs/utility/basedefinitions.h"
  43
  44 #include "impl_ibm_vsx_definitions.h"
  45
  46 namespace gmx
  47 {
  48
  49 class SimdFloat
  50 {
  51     public:
  52         SimdFloat() {}
  53
  54         // gcc-4.9 does not recognize that we use the parameter
  55         SimdFloat(float gmx_unused f) : simdInternal_(vec_splats(f)) {}
  56
  57         // Internal utility constructor to simplify return statements
  58         SimdFloat(__vector float simd) : simdInternal_(simd) {}
  59
  60         __vector float  simdInternal_;
  61 };
  62
  63 class SimdFInt32
  64 {
  65     public:
  66         SimdFInt32() {}
  67
  68         // gcc-4.9 does not recognize that we use the parameter
  69         SimdFInt32(std::int32_t gmx_unused i) : simdInternal_(vec_splats(i)) {}
  70
  71         // Internal utility constructor to simplify return statements
  72         SimdFInt32(__vector signed int simd) : simdInternal_(simd) {}
  73
  74         __vector signed int  simdInternal_;
  75 };
  76
  77 class SimdFBool
  78 {
  79     public:
  80         SimdFBool() {}
  81
  82         SimdFBool(bool b) : simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats( b ? 0xFFFFFFFF : 0))) {}
  83
  84         // Internal utility constructor to simplify return statements
  85         SimdFBool(__vector vsxBool int simd) : simdInternal_(simd) {}
  86
  87         __vector vsxBool int  simdInternal_;
  88 };
  89
  90 class SimdFIBool
  91 {
  92     public:
  93         SimdFIBool() {}
  94
  95         SimdFIBool(bool b) : simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats( b ? 0xFFFFFFFF : 0))) {}
  96
  97         // Internal utility constructor to simplify return statements
  98         SimdFIBool(__vector vsxBool int simd) : simdInternal_(simd) {}
  99
 100         __vector vsxBool int  simdInternal_;
 101 };
 102
 103 // Note that the interfaces we use here have been a mess in xlc;
 104 // currently version 13.1.5 is required.
 105
 106 static inline SimdFloat gmx_simdcall
 107 simdLoad(const float *m, SimdFloatTag = {})
 108 {
 109     return {
 110                *reinterpret_cast<const __vector float *>(m)
 111     };
 112 }
 113
 114 static inline void gmx_simdcall
 115 store(float *m, SimdFloat a)
 116 {
 117     *reinterpret_cast<__vector float *>(m) = a.simdInternal_;
 118 }
 119
 120 static inline SimdFloat gmx_simdcall
 121 simdLoadU(const float *m, SimdFloatTag = {})
 122 {
 123     return {
 124 #if __GNUC__ < 7
 125                *reinterpret_cast<const __vector float *>(m)
 126 #else
 127                vec_xl(0, m)
 128 #endif
 129     };
 130 }
 131
 132 static inline void gmx_simdcall
 133 storeU(float *m, SimdFloat a)
 134 {
 135 #if __GNUC__ < 7
 136     *reinterpret_cast<__vector float *>(m) = a.simdInternal_;
 137 #else
 138     vec_xst(a.simdInternal_, 0, m);
 139 #endif
 140 }
 141
 142 static inline SimdFloat gmx_simdcall
 143 setZeroF()
 144 {
 145     return {
 146                vec_splats(0.0f)
 147     };
 148 }
 149
 150 static inline SimdFInt32 gmx_simdcall
 151 simdLoad(const std::int32_t * m, SimdFInt32Tag)
 152 {
 153     return {
 154                *reinterpret_cast<const __vector int *>(m)
 155     };
 156 }
 157
 158 static inline void gmx_simdcall
 159 store(std::int32_t * m, SimdFInt32 a)
 160 {
 161     *reinterpret_cast<__vector int *>(m) = a.simdInternal_;
 162 }
 163
 164 static inline SimdFInt32 gmx_simdcall
 165 simdLoadU(const std::int32_t *m, SimdFInt32Tag)
 166 {
 167     return {
 168 #if __GNUC__ < 7
 169                *reinterpret_cast<const __vector int *>(m)
 170 #else
 171                vec_xl(0, m)
 172 #endif
 173     };
 174 }
 175
 176 static inline void gmx_simdcall
 177 storeU(std::int32_t * m, SimdFInt32 a)
 178 {
 179 #if __GNUC__ < 7
 180     *reinterpret_cast<__vector int *>(m) = a.simdInternal_;
 181 #else
 182     vec_xst(a.simdInternal_, 0, m);
 183 #endif
 184 }
 185
 186 static inline SimdFInt32 gmx_simdcall
 187 setZeroFI()
 188 {
 189     return {
 190                vec_splats(static_cast<int>(0))
 191     };
 192 }
 193
 194 // gcc-4.9 does not detect that vec_extract() uses its argument
 195 template<int index>
 196 static inline std::int32_t gmx_simdcall
 197 extract(SimdFInt32 gmx_unused a)
 198 {
 199     return vec_extract(a.simdInternal_, index);
 200 }
 201
 202 static inline SimdFloat gmx_simdcall
 203 operator&(SimdFloat a, SimdFloat b)
 204 {
 205     return {
 206                vec_and(a.simdInternal_, b.simdInternal_)
 207     };
 208 }
 209
 210 static inline SimdFloat gmx_simdcall
 211 andNot(SimdFloat a, SimdFloat b)
 212 {
 213     return {
 214                vec_andc(b.simdInternal_, a.simdInternal_)
 215     };
 216 }
 217
 218 static inline SimdFloat gmx_simdcall
 219 operator|(SimdFloat a, SimdFloat b)
 220 {
 221     return {
 222                vec_or(a.simdInternal_, b.simdInternal_)
 223     };
 224 }
 225
 226 static inline SimdFloat gmx_simdcall
 227 operator^(SimdFloat a, SimdFloat b)
 228 {
 229     return {
 230                vec_xor(a.simdInternal_, b.simdInternal_)
 231     };
 232 }
 233
 234 static inline SimdFloat gmx_simdcall
 235 operator+(SimdFloat a, SimdFloat b)
 236 {
 237     return {
 238                vec_add(a.simdInternal_, b.simdInternal_)
 239     };
 240 }
 241
 242 static inline SimdFloat gmx_simdcall
 243 operator-(SimdFloat a, SimdFloat b)
 244 {
 245     return {
 246                vec_sub(a.simdInternal_, b.simdInternal_)
 247     };
 248 }
 249
 250 static inline SimdFloat gmx_simdcall
 251 operator-(SimdFloat x)
 252 {
 253     return {
 254                -x.simdInternal_
 255     };
 256 }
 257
 258 static inline SimdFloat gmx_simdcall
 259 operator*(SimdFloat a, SimdFloat b)
 260 {
 261     return {
 262                vec_mul(a.simdInternal_, b.simdInternal_)
 263     };
 264 }
 265
 266 static inline SimdFloat gmx_simdcall
 267 fma(SimdFloat a, SimdFloat b, SimdFloat c)
 268 {
 269     return {
 270                vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 271     };
 272 }
 273
 274 static inline SimdFloat gmx_simdcall
 275 fms(SimdFloat a, SimdFloat b, SimdFloat c)
 276 {
 277     return {
 278                vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 279     };
 280 }
 281
 282 static inline SimdFloat gmx_simdcall
 283 fnma(SimdFloat a, SimdFloat b, SimdFloat c)
 284 {
 285     return {
 286                vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 287     };
 288 }
 289
 290 static inline SimdFloat gmx_simdcall
 291 fnms(SimdFloat a, SimdFloat b, SimdFloat c)
 292 {
 293     return {
 294                vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 295     };
 296 }
 297
 298 static inline SimdFloat gmx_simdcall
 299 rsqrt(SimdFloat x)
 300 {
 301     return {
 302                vec_rsqrte(x.simdInternal_)
 303     };
 304 }
 305
 306 static inline SimdFloat gmx_simdcall
 307 rcp(SimdFloat x)
 308 {
 309     return {
 310                vec_re(x.simdInternal_)
 311     };
 312 }
 313
 314 static inline SimdFloat gmx_simdcall
 315 maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
 316 {
 317     return {
 318                vec_add(a.simdInternal_, vec_and(b.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)))
 319     };
 320 }
 321
 322 static inline SimdFloat gmx_simdcall
 323 maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
 324 {
 325     SimdFloat prod = a * b;
 326
 327     return {
 328                vec_and(prod.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))
 329     };
 330 }
 331
 332 static inline SimdFloat gmx_simdcall
 333 maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
 334 {
 335     SimdFloat prod = fma(a, b, c);
 336
 337     return {
 338                vec_and(prod.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))
 339     };
 340 }
 341
 342 static inline SimdFloat gmx_simdcall
 343 maskzRsqrt(SimdFloat x, SimdFBool m)
 344 {
 345 #ifndef NDEBUG
 346     x.simdInternal_ = vec_sel(vec_splats(1.0f), x.simdInternal_, m.simdInternal_);
 347 #endif
 348     return {
 349                vec_and(vec_rsqrte(x.simdInternal_), reinterpret_cast<__vector float>(m.simdInternal_))
 350     };
 351 }
 352
 353 static inline SimdFloat gmx_simdcall
 354 maskzRcp(SimdFloat x, SimdFBool m)
 355 {
 356 #ifndef NDEBUG
 357     x.simdInternal_ = vec_sel(vec_splats(1.0f), x.simdInternal_, m.simdInternal_);
 358 #endif
 359     return {
 360                vec_and(vec_re(x.simdInternal_), reinterpret_cast<__vector float>(m.simdInternal_))
 361     };
 362 }
 363
 364 static inline SimdFloat gmx_simdcall
 365 abs(SimdFloat x)
 366 {
 367     return {
 368                vec_abs( x.simdInternal_ )
 369     };
 370 }
 371
 372 static inline SimdFloat gmx_simdcall
 373 max(SimdFloat a, SimdFloat b)
 374 {
 375     return {
 376                vec_max(a.simdInternal_, b.simdInternal_)
 377     };
 378 }
 379
 380 static inline SimdFloat gmx_simdcall
 381 min(SimdFloat a, SimdFloat b)
 382 {
 383     return {
 384                vec_min(a.simdInternal_, b.simdInternal_)
 385     };
 386 }
 387
 388 static inline SimdFloat gmx_simdcall
 389 round(SimdFloat x)
 390 {
 391     return {
 392                vec_round( x.simdInternal_ )
 393     };
 394 }
 395
 396 static inline SimdFloat gmx_simdcall
 397 trunc(SimdFloat x)
 398 {
 399     return {
 400                vec_trunc( x.simdInternal_ )
 401     };
 402 }
 403
 404 static inline SimdFloat gmx_simdcall
 405 frexp(SimdFloat value, SimdFInt32 * exponent)
 406 {
 407     const __vector float      exponentMask   = reinterpret_cast<__vector float>(vec_splats(0x7F800000U));
 408     const __vector signed int exponentBias   = vec_splats(126);
 409     const __vector float      half           = vec_splats(0.5f);
 410     __vector signed int       iExponent;
 411
 412     iExponent               = reinterpret_cast<__vector signed int>(vec_and(value.simdInternal_, exponentMask));
 413     iExponent               = vec_sub(vec_sr(iExponent, vec_splats(23U)), exponentBias);
 414     exponent->simdInternal_ = iExponent;
 415
 416     return {
 417                vec_or( vec_andc(value.simdInternal_, exponentMask), half)
 418     };
 419 }
 420
 421 template <MathOptimization opt = MathOptimization::Safe>
 422 static inline SimdFloat gmx_simdcall
 423 ldexp(SimdFloat value, SimdFInt32 exponent)
 424 {
 425     const __vector signed int exponentBias   = vec_splats(127);
 426     __vector signed int       iExponent;
 427
 428     iExponent  = vec_add(exponent.simdInternal_, exponentBias);
 429
 430     if (opt == MathOptimization::Safe)
 431     {
 432         // Make sure biased argument is not negative
 433         iExponent = vec_max(iExponent, vec_splat_s32(0));
 434     }
 435
 436     iExponent = vec_sl( iExponent, vec_splats(23U));
 437
 438     return {
 439                vec_mul(value.simdInternal_, reinterpret_cast<__vector float>(iExponent))
 440     };
 441 }
 442
 443 static inline float gmx_simdcall
 444 reduce(SimdFloat x)
 445 {
 446     const __vector unsigned char perm1 = { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
 447     const __vector unsigned char perm2 = { 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3 };
 448
 449     x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm1));
 450     x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm2));
 451     return vec_extract(x.simdInternal_, 0);
 452 }
 453
 454 static inline SimdFBool gmx_simdcall
 455 operator==(SimdFloat a, SimdFloat b)
 456 {
 457     return {
 458                vec_cmpeq(a.simdInternal_, b.simdInternal_)
 459     };
 460 }
 461
 462 static inline SimdFBool gmx_simdcall
 463 operator!=(SimdFloat a, SimdFloat b)
 464 {
 465     return {
 466                vec_or(vec_cmpgt(a.simdInternal_, b.simdInternal_),
 467                       vec_cmplt(a.simdInternal_, b.simdInternal_))
 468     };
 469 }
 470
 471 static inline SimdFBool gmx_simdcall
 472 operator<(SimdFloat a, SimdFloat b)
 473 {
 474     return {
 475                vec_cmplt(a.simdInternal_, b.simdInternal_)
 476     };
 477 }
 478
 479 static inline SimdFBool gmx_simdcall
 480 operator<=(SimdFloat a, SimdFloat b)
 481 {
 482     return {
 483                vec_cmple(a.simdInternal_, b.simdInternal_)
 484     };
 485 }
 486
 487 static inline SimdFBool gmx_simdcall
 488 testBits(SimdFloat a)
 489 {
 490     return {
 491                vec_cmpgt( reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U))
 492     };
 493 }
 494
 495 static inline SimdFBool gmx_simdcall
 496 operator&&(SimdFBool a, SimdFBool b)
 497 {
 498     return {
 499                vec_and(a.simdInternal_, b.simdInternal_)
 500     };
 501 }
 502
 503 static inline SimdFBool gmx_simdcall
 504 operator||(SimdFBool a, SimdFBool b)
 505 {
 506     return {
 507                vec_or(a.simdInternal_, b.simdInternal_)
 508     };
 509 }
 510
 511 static inline bool gmx_simdcall
 512 anyTrue(SimdFBool a)
 513 {
 514     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 515 }
 516
 517 static inline SimdFloat gmx_simdcall
 518 selectByMask(SimdFloat a, SimdFBool m)
 519 {
 520     return {
 521                vec_and(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))
 522     };
 523 }
 524
 525 static inline SimdFloat gmx_simdcall
 526 selectByNotMask(SimdFloat a, SimdFBool m)
 527 {
 528     return {
 529                vec_andc(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))
 530     };
 531 }
 532
 533 static inline SimdFloat gmx_simdcall
 534 blend(SimdFloat a, SimdFloat b, SimdFBool sel)
 535 {
 536     return {
 537                vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
 538     };
 539 }
 540
 541 static inline SimdFInt32 gmx_simdcall
 542 operator&(SimdFInt32 a, SimdFInt32 b)
 543 {
 544     return {
 545                vec_and(a.simdInternal_, b.simdInternal_)
 546     };
 547 }
 548
 549 static inline SimdFInt32 gmx_simdcall
 550 andNot(SimdFInt32 a, SimdFInt32 b)
 551 {
 552     return {
 553                vec_andc(b.simdInternal_, a.simdInternal_)
 554     };
 555 }
 556
 557 static inline SimdFInt32 gmx_simdcall
 558 operator|(SimdFInt32 a, SimdFInt32 b)
 559 {
 560     return {
 561                vec_or(a.simdInternal_, b.simdInternal_)
 562     };
 563 }
 564
 565 static inline SimdFInt32 gmx_simdcall
 566 operator^(SimdFInt32 a, SimdFInt32 b)
 567 {
 568     return {
 569                vec_xor(a.simdInternal_, b.simdInternal_)
 570     };
 571 }
 572
 573 static inline SimdFInt32 gmx_simdcall
 574 operator+(SimdFInt32 a, SimdFInt32 b)
 575 {
 576     return {
 577                vec_add(a.simdInternal_, b.simdInternal_)
 578     };
 579 }
 580
 581 static inline SimdFInt32 gmx_simdcall
 582 operator-(SimdFInt32 a, SimdFInt32 b)
 583 {
 584     return {
 585                vec_sub(a.simdInternal_, b.simdInternal_)
 586     };
 587 }
 588
 589 static inline SimdFInt32 gmx_simdcall
 590 operator*(SimdFInt32 a, SimdFInt32 b)
 591 {
 592     return {
 593                a.simdInternal_ * b.simdInternal_
 594     };
 595 }
 596
 597 static inline SimdFIBool gmx_simdcall
 598 operator==(SimdFInt32 a, SimdFInt32 b)
 599 {
 600     return {
 601                vec_cmpeq(a.simdInternal_, b.simdInternal_)
 602     };
 603 }
 604
 605 static inline SimdFIBool gmx_simdcall
 606 testBits(SimdFInt32 a)
 607 {
 608     return {
 609                vec_cmpgt( reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U))
 610     };
 611 }
 612
 613 static inline SimdFIBool gmx_simdcall
 614 operator<(SimdFInt32 a, SimdFInt32 b)
 615 {
 616     return {
 617                vec_cmplt(a.simdInternal_, b.simdInternal_)
 618     };
 619 }
 620
 621 static inline SimdFIBool gmx_simdcall
 622 operator&&(SimdFIBool a, SimdFIBool b)
 623 {
 624     return {
 625                vec_and(a.simdInternal_, b.simdInternal_)
 626     };
 627 }
 628
 629 static inline SimdFIBool gmx_simdcall
 630 operator||(SimdFIBool a, SimdFIBool b)
 631 {
 632     return {
 633                vec_or(a.simdInternal_, b.simdInternal_)
 634     };
 635 }
 636
 637 static inline bool gmx_simdcall
 638 anyTrue(SimdFIBool a)
 639 {
 640     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 641 }
 642
 643 static inline SimdFInt32 gmx_simdcall
 644 selectByMask(SimdFInt32 a, SimdFIBool m)
 645 {
 646     return {
 647                vec_and(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_))
 648     };
 649 }
 650
 651 static inline SimdFInt32 gmx_simdcall
 652 selectByNotMask(SimdFInt32 a, SimdFIBool m)
 653 {
 654     return {
 655                vec_andc(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_))
 656     };
 657 }
 658
 659 static inline SimdFInt32 gmx_simdcall
 660 blend(SimdFInt32 a, SimdFInt32 b, SimdFIBool sel)
 661 {
 662     return {
 663                vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
 664     };
 665 }
 666
 667 static inline SimdFInt32 gmx_simdcall
 668 cvtR2I(SimdFloat a)
 669 {
 670     return {
 671                vec_cts(vec_round(a.simdInternal_), 0)
 672     };
 673 }
 674
 675 static inline SimdFInt32 gmx_simdcall
 676 cvttR2I(SimdFloat a)
 677 {
 678     return {
 679                vec_cts(a.simdInternal_, 0)
 680     };
 681 }
 682
 683 static inline SimdFloat gmx_simdcall
 684 cvtI2R(SimdFInt32 a)
 685 {
 686     return {
 687                vec_ctf(a.simdInternal_, 0)
 688     };
 689 }
 690
 691 static inline SimdFIBool gmx_simdcall
 692 cvtB2IB(SimdFBool a)
 693 {
 694     return {
 695                a.simdInternal_
 696     };
 697 }
 698
 699 static inline SimdFBool gmx_simdcall
 700 cvtIB2B(SimdFIBool a)
 701 {
 702     return {
 703                a.simdInternal_
 704     };
 705 }
 706
 707 static inline SimdFloat gmx_simdcall
 708 copysign(SimdFloat x, SimdFloat y)
 709 {
 710 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 711     __vector float res;
 712     __asm__ ("xvcpsgnsp %x0,%x1,%x2" : "=wf" (res) : "wf" (y.simdInternal_), "wf" (x.simdInternal_));
 713     return {
 714                res
 715     };
 716 #else
 717     return {
 718                vec_cpsgn(y.simdInternal_, x.simdInternal_)
 719     };
 720 #endif
 721 }
 722
 723 }      // namespace gmx
 724
 725 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H