src/gromacs/simd/impl_ibm_vmx/impl_ibm_vmx_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2019,2020, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VMX_SIMD_FLOAT_H
  37 #define GMX_SIMD_IMPLEMENTATION_IBM_VMX_SIMD_FLOAT_H
  38
  39 #include "config.h"
  40
  41 #include <cstdint>
  42
  43 #include "gromacs/math/utilities.h"
  44 #include "gromacs/utility/basedefinitions.h"
  45
  46 #include "impl_ibm_vmx_definitions.h"
  47
  48 namespace gmx
  49 {
  50
  51 class SimdFloat
  52 {
  53 public:
  54     SimdFloat() {}
  55
  56     SimdFloat(float f)
  57     {
  58         __vector unsigned char perm;
  59
  60         simdInternal_ = vec_lde(0, const_cast<float*>(&f));
  61         perm          = vec_lvsl(0, const_cast<float*>(&f));
  62         simdInternal_ = vec_perm(simdInternal_, simdInternal_, perm);
  63         simdInternal_ = vec_splat(simdInternal_, 0);
  64     }
  65
  66     // Internal utility constructor to simplify return statements
  67     SimdFloat(__vector float simd) : simdInternal_(simd) {}
  68
  69     __vector float simdInternal_;
  70 };
  71
  72 class SimdFInt32
  73 {
  74 public:
  75     SimdFInt32() {}
  76
  77     SimdFInt32(std::int32_t i)
  78     {
  79         __vector unsigned char perm;
  80
  81         simdInternal_ = vec_lde(0, const_cast<int*>(&i));
  82         perm          = vec_lvsl(0, const_cast<int*>(&i));
  83         simdInternal_ = vec_perm(simdInternal_, simdInternal_, perm);
  84         simdInternal_ = vec_splat(simdInternal_, 0);
  85     }
  86
  87
  88     // Internal utility constructor to simplify return statements
  89     SimdFInt32(__vector signed int simd) : simdInternal_(simd) {}
  90
  91     __vector signed int simdInternal_;
  92 };
  93
  94 class SimdFBool
  95 {
  96 public:
  97     SimdFBool() {}
  98
  99     SimdFBool(bool b) :
 100         simdInternal_(reinterpret_cast<__vector vmxBool int>(vec_splats(b ? 0xFFFFFFFF : 0)))
 101     {
 102     }
 103
 104     // Internal utility constructor to simplify return statements
 105     SimdFBool(__vector vmxBool int simd) : simdInternal_(simd) {}
 106
 107     __vector vmxBool int simdInternal_;
 108 };
 109
 110 class SimdFIBool
 111 {
 112 public:
 113     SimdFIBool() {}
 114
 115     SimdFIBool(bool b) :
 116         simdInternal_(reinterpret_cast<__vector vmxBool int>(vec_splat_u32(b ? 0xFFFFFFFF : 0)))
 117     {
 118     }
 119
 120     // Internal utility constructor to simplify return statements
 121     SimdFIBool(__vector vmxBool int simd) : simdInternal_(simd) {}
 122
 123     __vector vmxBool int simdInternal_;
 124 };
 125
 126 static inline SimdFloat gmx_simdcall simdLoad(const float* m, SimdFloatTag = {})
 127 {
 128     return { vec_ld(0, const_cast<float*>(m)) };
 129 }
 130
 131 static inline void gmx_simdcall store(float* m, SimdFloat a)
 132 {
 133     vec_st(a.simdInternal_, 0, const_cast<float*>(m));
 134 }
 135
 136 static inline SimdFloat gmx_simdcall setZeroF()
 137 {
 138     return { reinterpret_cast<__vector float>(vec_splat_u32(0)) };
 139 }
 140
 141 static inline SimdFInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdFInt32Tag)
 142 {
 143     return { vec_ld(0, const_cast<int*>(m)) };
 144 }
 145
 146 static inline void gmx_simdcall store(std::int32_t* m, SimdFInt32 a)
 147 {
 148     vec_st(a.simdInternal_, 0, const_cast<int*>(m));
 149 }
 150
 151 static inline SimdFInt32 gmx_simdcall setZeroFI()
 152 {
 153     return { vec_splat_s32(0) };
 154 }
 155
 156 static inline SimdFloat gmx_simdcall operator&(SimdFloat a, SimdFloat b)
 157 {
 158     return { vec_and(a.simdInternal_, b.simdInternal_) };
 159 }
 160
 161 static inline SimdFloat gmx_simdcall andNot(SimdFloat a, SimdFloat b)
 162 {
 163     return { vec_andc(b.simdInternal_, a.simdInternal_) };
 164 }
 165
 166 static inline SimdFloat gmx_simdcall operator|(SimdFloat a, SimdFloat b)
 167 {
 168     return { vec_or(a.simdInternal_, b.simdInternal_) };
 169 }
 170
 171 static inline SimdFloat gmx_simdcall operator^(SimdFloat a, SimdFloat b)
 172 {
 173     return { vec_xor(a.simdInternal_, b.simdInternal_) };
 174 }
 175
 176 static inline SimdFloat gmx_simdcall operator+(SimdFloat a, SimdFloat b)
 177 {
 178     return { vec_add(a.simdInternal_, b.simdInternal_) };
 179 }
 180
 181 static inline SimdFloat gmx_simdcall operator-(SimdFloat a, SimdFloat b)
 182 {
 183     return { vec_sub(a.simdInternal_, b.simdInternal_) };
 184 }
 185
 186 static inline SimdFloat gmx_simdcall operator-(SimdFloat x)
 187 {
 188     return { vec_xor(x.simdInternal_,
 189                      reinterpret_cast<__vector float>(vec_sl(vec_splat_u32(-1), vec_splat_u32(-1)))) };
 190 }
 191
 192 static inline SimdFloat gmx_simdcall operator*(SimdFloat a, SimdFloat b)
 193 {
 194     return { vec_madd(a.simdInternal_, b.simdInternal_,
 195                       reinterpret_cast<__vector float>(vec_splat_u32(0))) };
 196 }
 197
 198 static inline SimdFloat gmx_simdcall fma(SimdFloat a, SimdFloat b, SimdFloat c)
 199 {
 200     return { vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 201 }
 202
 203 static inline SimdFloat gmx_simdcall fms(SimdFloat a, SimdFloat b, SimdFloat c)
 204 {
 205     return { vec_madd(a.simdInternal_, b.simdInternal_, -c.simdInternal_) };
 206 }
 207
 208 static inline SimdFloat gmx_simdcall fnma(SimdFloat a, SimdFloat b, SimdFloat c)
 209 {
 210     return { vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 211 }
 212
 213 static inline SimdFloat gmx_simdcall fnms(SimdFloat a, SimdFloat b, SimdFloat c)
 214 {
 215     return { -vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 216 }
 217
 218 static inline SimdFloat gmx_simdcall rsqrt(SimdFloat x)
 219 {
 220     return { vec_rsqrte(x.simdInternal_) };
 221 }
 222
 223 static inline SimdFloat gmx_simdcall rcp(SimdFloat x)
 224 {
 225     return { vec_re(x.simdInternal_) };
 226 }
 227
 228 static inline SimdFloat gmx_simdcall maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
 229 {
 230     return { vec_add(a.simdInternal_,
 231                      vec_and(b.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))) };
 232 }
 233
 234 static inline SimdFloat gmx_simdcall maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
 235 {
 236     SimdFloat prod = a * b;
 237
 238     return { vec_and(prod.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
 239 }
 240
 241 static inline SimdFloat gmx_simdcall maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
 242 {
 243     SimdFloat prod = fma(a, b, c);
 244
 245     return { vec_and(prod.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
 246 }
 247
 248 static inline SimdFloat gmx_simdcall maskzRsqrt(SimdFloat x, SimdFBool m)
 249 {
 250 #ifndef NDEBUG
 251     SimdFloat one(1.0F);
 252     x.simdInternal_ = vec_sel(one.simdInternal_, x.simdInternal_, m.simdInternal_);
 253 #endif
 254     return { vec_and(vec_rsqrte(x.simdInternal_), reinterpret_cast<__vector float>(m.simdInternal_)) };
 255 }
 256
 257 static inline SimdFloat gmx_simdcall maskzRcp(SimdFloat x, SimdFBool m)
 258 {
 259 #ifndef NDEBUG
 260     SimdFloat one(1.0F);
 261     x.simdInternal_ = vec_sel(one.simdInternal_, x.simdInternal_, m.simdInternal_);
 262 #endif
 263     return { vec_and(vec_re(x.simdInternal_), reinterpret_cast<__vector float>(m.simdInternal_)) };
 264 }
 265
 266 static inline SimdFloat gmx_simdcall abs(SimdFloat x)
 267 {
 268     return { vec_abs(x.simdInternal_) };
 269 }
 270
 271 static inline SimdFloat gmx_simdcall max(SimdFloat a, SimdFloat b)
 272 {
 273     return { vec_max(a.simdInternal_, b.simdInternal_) };
 274 }
 275
 276 static inline SimdFloat gmx_simdcall min(SimdFloat a, SimdFloat b)
 277 {
 278     return { vec_min(a.simdInternal_, b.simdInternal_) };
 279 }
 280
 281 static inline SimdFloat gmx_simdcall round(SimdFloat x)
 282 {
 283     return { vec_round(x.simdInternal_) };
 284 }
 285
 286 static inline SimdFloat gmx_simdcall trunc(SimdFloat x)
 287 {
 288     return { vec_trunc(x.simdInternal_) };
 289 }
 290
 291 template<MathOptimization opt = MathOptimization::Safe>
 292 static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent)
 293 {
 294     // Generate constants without memory operations
 295     const __vector signed int exponentMask =
 296             vec_sl(vec_add(vec_splat_s32(15), vec_sl(vec_splat_s32(15), vec_splat_u32(4))),
 297                    vec_add(vec_splat_u32(15), vec_splat_u32(8))); // 0x7F800000
 298     const __vector signed int exponentBias =
 299             vec_sub(vec_sl(vec_splat_s32(1), vec_splat_u32(7)), vec_splat_s32(2)); // 126
 300     const SimdFloat     half(0.5F);
 301     __vector signed int iExponent;
 302
 303     __vector vmxBool int valueIsZero =
 304             vec_cmpeq(value.simdInternal_, reinterpret_cast<__vector float>(vec_splat_u32(0)));
 305
 306     iExponent = vec_and(reinterpret_cast<__vector signed int>(value.simdInternal_), exponentMask);
 307     iExponent = vec_sr(iExponent, vec_add(vec_splat_u32(15), vec_splat_u32(8)));
 308     iExponent = vec_sub(iExponent, exponentBias);
 309     iExponent = vec_andc(iExponent, reinterpret_cast<__vector int>(valueIsZero));
 310
 311     __vector float result =
 312             vec_or(vec_andc(value.simdInternal_, reinterpret_cast<__vector float>(exponentMask)),
 313                    half.simdInternal_);
 314     result = vec_sel(result, value.simdInternal_, valueIsZero);
 315
 316     exponent->simdInternal_ = iExponent;
 317
 318     return { result };
 319 }
 320
 321 template<MathOptimization opt = MathOptimization::Safe>
 322 static inline SimdFloat gmx_simdcall ldexp(SimdFloat value, SimdFInt32 exponent)
 323 {
 324     const __vector signed int exponentBias =
 325             vec_sub(vec_sl(vec_splat_s32(1), vec_splat_u32(7)), vec_splat_s32(1)); // 127
 326     __vector signed int iExponent;
 327
 328     iExponent = vec_add(exponent.simdInternal_, exponentBias);
 329
 330     if (opt == MathOptimization::Safe)
 331     {
 332         // Make sure biased argument is not negative
 333         iExponent = vec_max(iExponent, vec_splat_s32(0));
 334     }
 335
 336     iExponent = vec_sl(iExponent, vec_add(vec_splat_u32(15), vec_splat_u32(8)));
 337
 338     return { vec_madd(value.simdInternal_, reinterpret_cast<__vector float>(iExponent),
 339                       reinterpret_cast<__vector float>(vec_splat_u32(0))) };
 340 }
 341
 342 static inline float gmx_simdcall reduce(SimdFloat a)
 343 {
 344     __vector float c = a.simdInternal_;
 345     float          res;
 346
 347     // calculate sum
 348     c = vec_add(c, vec_sld(c, c, 8));
 349     c = vec_add(c, vec_sld(c, c, 4));
 350     vec_ste(c, 0, &res);
 351     return res;
 352 }
 353
 354 static inline SimdFBool gmx_simdcall operator==(SimdFloat a, SimdFloat b)
 355 {
 356     return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
 357 }
 358
 359 static inline SimdFBool gmx_simdcall operator!=(SimdFloat a, SimdFloat b)
 360 {
 361     return { vec_or(vec_cmpgt(a.simdInternal_, b.simdInternal_),
 362                     vec_cmplt(a.simdInternal_, b.simdInternal_)) };
 363 }
 364
 365 static inline SimdFBool gmx_simdcall operator<(SimdFloat a, SimdFloat b)
 366 {
 367     return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
 368 }
 369
 370 static inline SimdFBool gmx_simdcall operator<=(SimdFloat a, SimdFloat b)
 371 {
 372     return { vec_cmple(a.simdInternal_, b.simdInternal_) };
 373 }
 374
 375 static inline SimdFBool gmx_simdcall testBits(SimdFloat a)
 376 {
 377     return { vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splat_u32(0)) };
 378 }
 379
 380 static inline SimdFBool gmx_simdcall operator&&(SimdFBool a, SimdFBool b)
 381 {
 382     return { vec_and(a.simdInternal_, b.simdInternal_) };
 383 }
 384
 385 static inline SimdFBool gmx_simdcall operator||(SimdFBool a, SimdFBool b)
 386 {
 387     return { vec_or(a.simdInternal_, b.simdInternal_) };
 388 }
 389
 390 static inline bool gmx_simdcall anyTrue(SimdFBool a)
 391 {
 392     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vmxBool int>(vec_splat_u32(0)));
 393 }
 394
 395 static inline SimdFloat gmx_simdcall selectByMask(SimdFloat a, SimdFBool m)
 396 {
 397     return { vec_and(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
 398 }
 399
 400 static inline SimdFloat gmx_simdcall selectByNotMask(SimdFloat a, SimdFBool m)
 401 {
 402     return { vec_andc(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
 403 }
 404
 405 static inline SimdFloat gmx_simdcall blend(SimdFloat a, SimdFloat b, SimdFBool sel)
 406 {
 407     return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
 408 }
 409
 410 static inline SimdFInt32 gmx_simdcall operator&(SimdFInt32 a, SimdFInt32 b)
 411 {
 412     return { vec_and(a.simdInternal_, b.simdInternal_) };
 413 }
 414
 415 static inline SimdFInt32 gmx_simdcall andNot(SimdFInt32 a, SimdFInt32 b)
 416 {
 417     return { vec_andc(b.simdInternal_, a.simdInternal_) };
 418 }
 419
 420 static inline SimdFInt32 gmx_simdcall operator|(SimdFInt32 a, SimdFInt32 b)
 421 {
 422     return { vec_or(a.simdInternal_, b.simdInternal_) };
 423 }
 424
 425 static inline SimdFInt32 gmx_simdcall operator^(SimdFInt32 a, SimdFInt32 b)
 426 {
 427     return { vec_xor(a.simdInternal_, b.simdInternal_) };
 428 }
 429
 430 static inline SimdFInt32 gmx_simdcall operator+(SimdFInt32 a, SimdFInt32 b)
 431 {
 432     return { vec_add(a.simdInternal_, b.simdInternal_) };
 433 }
 434
 435 static inline SimdFInt32 gmx_simdcall operator-(SimdFInt32 a, SimdFInt32 b)
 436 {
 437     return { vec_sub(a.simdInternal_, b.simdInternal_) };
 438 }
 439
 440 static inline SimdFInt32 gmx_simdcall operator*(SimdFInt32 a, SimdFInt32 b)
 441 {
 442     return { a.simdInternal_ * b.simdInternal_ };
 443 }
 444
 445 static inline SimdFIBool gmx_simdcall operator==(SimdFInt32 a, SimdFInt32 b)
 446 {
 447     return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
 448 }
 449
 450 static inline SimdFIBool gmx_simdcall testBits(SimdFInt32 a)
 451 {
 452     return { vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splat_u32(0)) };
 453 }
 454
 455 static inline SimdFIBool gmx_simdcall operator<(SimdFInt32 a, SimdFInt32 b)
 456 {
 457     return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
 458 }
 459
 460 static inline SimdFIBool gmx_simdcall operator&&(SimdFIBool a, SimdFIBool b)
 461 {
 462     return { vec_and(a.simdInternal_, b.simdInternal_) };
 463 }
 464
 465 static inline SimdFIBool gmx_simdcall operator||(SimdFIBool a, SimdFIBool b)
 466 {
 467     return { vec_or(a.simdInternal_, b.simdInternal_) };
 468 }
 469
 470 static inline bool gmx_simdcall anyTrue(SimdFIBool a)
 471 {
 472     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vmxBool int>(vec_splat_u32(0)));
 473 }
 474
 475 static inline SimdFInt32 gmx_simdcall selectByMask(SimdFInt32 a, SimdFIBool m)
 476 {
 477     return { vec_and(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
 478 }
 479
 480 static inline SimdFInt32 gmx_simdcall selectByNotMask(SimdFInt32 a, SimdFIBool m)
 481 {
 482     return { vec_andc(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
 483 }
 484
 485 static inline SimdFInt32 gmx_simdcall blend(SimdFInt32 a, SimdFInt32 b, SimdFIBool sel)
 486 {
 487     return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
 488 }
 489
 490 static inline SimdFInt32 gmx_simdcall cvtR2I(SimdFloat a)
 491 {
 492     return { vec_cts(vec_round(a.simdInternal_), 0) };
 493 }
 494
 495 static inline SimdFInt32 gmx_simdcall cvttR2I(SimdFloat a)
 496 {
 497     return { vec_cts(a.simdInternal_, 0) };
 498 }
 499
 500 static inline SimdFloat gmx_simdcall cvtI2R(SimdFInt32 a)
 501 {
 502     return { vec_ctf(a.simdInternal_, 0) };
 503 }
 504
 505 static inline SimdFIBool gmx_simdcall cvtB2IB(SimdFBool a)
 506 {
 507     return { a.simdInternal_ };
 508 }
 509
 510 static inline SimdFBool gmx_simdcall cvtIB2B(SimdFIBool a)
 511 {
 512     return { a.simdInternal_ };
 513 }
 514
 515 } // namespace gmx
 516
 517 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VMX_SIMD_FLOAT_H