src/gromacs/simd/impl_ibm_qpx/impl_ibm_qpx_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_FLOAT_H
  37 #define GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_FLOAT_H
  38
  39 #include "config.h"
  40
  41 // Assert is buggy on xlc with high optimization, so we skip it for QPX
  42 #include <cstddef>
  43 #include <cstdint>
  44
  45 #ifdef __clang__
  46 #include <qpxmath.h>
  47 #endif
  48
  49 #include "gromacs/math/utilities.h"
  50 #include "gromacs/utility/basedefinitions.h"
  51
  52 namespace gmx
  53 {
  54
  55 class SimdFloat
  56 {
  57     public:
  58         SimdFloat() {}
  59
  60         SimdFloat(float f) : simdInternal_(vec_splats(f)) {}
  61
  62         // Internal utility constructor to simplify return statements
  63         SimdFloat(vector4double simd) : simdInternal_(simd) {}
  64
  65         vector4double  simdInternal_;
  66 };
  67
  68 class SimdFInt32
  69 {
  70     public:
  71         SimdFInt32() {}
  72
  73         SimdFInt32(std::int32_t i)
  74         {
  75             GMX_ALIGNED(int, GMX_SIMD_FINT32_WIDTH) idata[GMX_SIMD_FINT32_WIDTH];
  76             idata[0]      = i;
  77             simdInternal_ = vec_splat(vec_ldia(0, idata), 0);
  78         }
  79
  80         // Internal utility constructor to simplify return statements
  81         SimdFInt32(vector4double simd) : simdInternal_(simd) {}
  82
  83         vector4double  simdInternal_;
  84 };
  85
  86 class SimdFBool
  87 {
  88     public:
  89         SimdFBool() {}
  90
  91         SimdFBool(bool b) : simdInternal_(vec_splats(b ? 1.0 : -1.0)) {}
  92
  93         // Internal utility constructor to simplify return statements
  94         SimdFBool(vector4double simd) : simdInternal_(simd) {}
  95
  96         vector4double  simdInternal_;
  97 };
  98
  99 static inline SimdFloat gmx_simdcall
 100 simdLoad(const float *m)
 101 {
 102 #ifdef NDEBUG
 103     return {
 104                vec_ld(0, const_cast<float *>(m))
 105     };
 106 #else
 107     return {
 108                vec_lda(0, const_cast<float *>(m))
 109     };
 110 #endif
 111 }
 112
 113 static inline void gmx_simdcall
 114 store(float *m, SimdFloat a)
 115 {
 116 #ifdef NDEBUG
 117     vec_st(a.simdInternal_, 0, m);
 118 #else
 119     vec_sta(a.simdInternal_, 0, m);
 120 #endif
 121 }
 122
 123 static inline SimdFloat gmx_simdcall
 124 setZeroF()
 125 {
 126     return {
 127                vec_splats(0.0)
 128     };
 129 }
 130
 131 static inline SimdFInt32 gmx_simdcall
 132 simdLoadFI(const std::int32_t * m)
 133 {
 134 #ifdef NDEBUG
 135     return {
 136                vec_ldia(0, const_cast<int *>(m))
 137     };
 138 #else
 139     return {
 140                vec_ldiaa(0, const_cast<int *>(m))
 141     };
 142 #endif
 143 }
 144
 145 static inline void gmx_simdcall
 146 store(std::int32_t * m, SimdFInt32 a)
 147 {
 148     vec_st(a.simdInternal_, 0, m);
 149 }
 150
 151 static inline SimdFInt32 gmx_simdcall
 152 setZeroFI()
 153 {
 154     return {
 155                vec_splats(0.0)
 156     };
 157 }
 158
 159 static inline SimdFloat gmx_simdcall
 160 operator+(SimdFloat a, SimdFloat b)
 161 {
 162     return {
 163                vec_add(a.simdInternal_, b.simdInternal_)
 164     };
 165 }
 166
 167 static inline SimdFloat gmx_simdcall
 168 operator-(SimdFloat a, SimdFloat b)
 169 {
 170     return {
 171                vec_sub(a.simdInternal_, b.simdInternal_)
 172     };
 173 }
 174
 175 static inline SimdFloat gmx_simdcall
 176 operator-(SimdFloat x)
 177 {
 178     return {
 179                vec_neg(x.simdInternal_)
 180     };
 181 }
 182
 183 static inline SimdFloat gmx_simdcall
 184 operator*(SimdFloat a, SimdFloat b)
 185 {
 186     return {
 187                vec_mul(a.simdInternal_, b.simdInternal_)
 188     };
 189 }
 190
 191 static inline SimdFloat gmx_simdcall
 192 fma(SimdFloat a, SimdFloat b, SimdFloat c)
 193 {
 194     return {
 195                vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 196     };
 197 }
 198
 199 static inline SimdFloat gmx_simdcall
 200 fms(SimdFloat a, SimdFloat b, SimdFloat c)
 201 {
 202     return {
 203                vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 204     };
 205 }
 206
 207 static inline SimdFloat gmx_simdcall
 208 fnma(SimdFloat a, SimdFloat b, SimdFloat c)
 209 {
 210     return {
 211                vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 212     };
 213 }
 214
 215 static inline SimdFloat gmx_simdcall
 216 fnms(SimdFloat a, SimdFloat b, SimdFloat c)
 217 {
 218     return {
 219                vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 220     };
 221 }
 222
 223 static inline SimdFloat gmx_simdcall
 224 rsqrt(SimdFloat x)
 225 {
 226     return {
 227                vec_rsqrte(x.simdInternal_)
 228     };
 229 }
 230
 231 static inline SimdFloat gmx_simdcall
 232 rcp(SimdFloat x)
 233 {
 234     return {
 235                vec_re(x.simdInternal_)
 236     };
 237 }
 238
 239 static inline SimdFloat gmx_simdcall
 240 maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
 241 {
 242     return {
 243                vec_add(a.simdInternal_, vec_sel(vec_splats(0.0), b.simdInternal_, m.simdInternal_))
 244     };
 245 }
 246
 247 static inline SimdFloat gmx_simdcall
 248 maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
 249 {
 250     return {
 251                vec_sel(vec_splats(0.0), vec_mul(a.simdInternal_, b.simdInternal_), m.simdInternal_)
 252     };
 253 }
 254
 255 static inline SimdFloat
 256 maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
 257 {
 258     return {
 259                vec_sel(vec_splats(0.0), vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_), m.simdInternal_)
 260     };
 261 }
 262
 263 static inline SimdFloat
 264 maskzRsqrt(SimdFloat x, SimdFBool m)
 265 {
 266 #ifndef NDEBUG
 267     x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
 268 #endif
 269     return {
 270                vec_sel(vec_splats(0.0), vec_rsqrte(x.simdInternal_), m.simdInternal_)
 271     };
 272 }
 273
 274 static inline SimdFloat
 275 maskzRcp(SimdFloat x, SimdFBool m)
 276 {
 277 #ifndef NDEBUG
 278     x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
 279 #endif
 280     return {
 281                vec_sel(vec_splats(0.0), vec_re(x.simdInternal_), m.simdInternal_)
 282     };
 283 }
 284
 285 static inline SimdFloat gmx_simdcall
 286 abs(SimdFloat x)
 287 {
 288     return {
 289                vec_abs( x.simdInternal_ )
 290     };
 291 }
 292
 293 static inline SimdFloat gmx_simdcall
 294 max(SimdFloat a, SimdFloat b)
 295 {
 296     return {
 297                vec_sel(b.simdInternal_, a.simdInternal_, vec_sub(a.simdInternal_, b.simdInternal_))
 298     };
 299 }
 300
 301 static inline SimdFloat gmx_simdcall
 302 min(SimdFloat a, SimdFloat b)
 303 {
 304     return {
 305                vec_sel(b.simdInternal_, a.simdInternal_, vec_sub(b.simdInternal_, a.simdInternal_))
 306     };
 307 }
 308
 309 static inline SimdFloat gmx_simdcall
 310 round(SimdFloat x)
 311 {
 312     // Note: It is critical to use vec_cfid(vec_ctid(a)) for the implementation
 313     // here, since vec_round() does not adhere to the FP control
 314     // word rounding scheme. We rely on float-to-float and float-to-integer
 315     // rounding being the same for half-way values in a few algorithms.
 316     return {
 317                vec_cfid(vec_ctid(x.simdInternal_))
 318     };
 319 }
 320
 321 static inline SimdFloat gmx_simdcall
 322 trunc(SimdFloat x)
 323 {
 324     return {
 325                vec_trunc(x.simdInternal_)
 326     };
 327 }
 328
 329 static inline SimdFloat
 330 frexp(SimdFloat value, SimdFInt32 * exponent)
 331 {
 332     GMX_ALIGNED(float, GMX_SIMD_FLOAT_WIDTH)  rdata[GMX_SIMD_FLOAT_WIDTH];
 333     GMX_ALIGNED(int, GMX_SIMD_FLOAT_WIDTH)    idata[GMX_SIMD_FLOAT_WIDTH];
 334
 335     vec_st(value.simdInternal_, 0, rdata);
 336
 337     for (std::size_t i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
 338     {
 339         rdata[i] = std::frexp(rdata[i], idata + i);
 340     }
 341
 342     exponent->simdInternal_ = vec_ldia(0, idata);
 343     value.simdInternal_     = vec_ld(0, rdata);
 344
 345     return value;
 346 }
 347
 348 template <MathOptimization opt = MathOptimization::Safe>
 349 static inline SimdFloat
 350 ldexp(SimdFloat value, SimdFInt32 exponent)
 351 {
 352     GMX_ALIGNED(float, GMX_SIMD_FLOAT_WIDTH)  rdata[GMX_SIMD_FLOAT_WIDTH];
 353     GMX_ALIGNED(int, GMX_SIMD_FLOAT_WIDTH)    idata[GMX_SIMD_FLOAT_WIDTH];
 354
 355     vec_st(value.simdInternal_,    0, rdata);
 356     vec_st(exponent.simdInternal_, 0, idata);
 357
 358     for (std::size_t i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
 359     {
 360         rdata[i] = std::ldexp(rdata[i], idata[i]);
 361     }
 362
 363     value.simdInternal_     = vec_ld(0, rdata);
 364
 365     return value;
 366 }
 367
 368 static inline float gmx_simdcall
 369 reduce(SimdFloat x)
 370 {
 371     vector4double y = vec_sldw(x.simdInternal_, x.simdInternal_, 2);
 372     vector4double z;
 373
 374     y = vec_add(y, x.simdInternal_);
 375     z = vec_sldw(y, y, 1);
 376     y = vec_add(y, z);
 377     return vec_extract(y, 0);
 378 }
 379
 380 static inline SimdFBool gmx_simdcall
 381 operator==(SimdFloat a, SimdFloat b)
 382 {
 383     return {
 384                vec_cmpeq(a.simdInternal_, b.simdInternal_)
 385     };
 386 }
 387
 388 static inline SimdFBool gmx_simdcall
 389 operator!=(SimdFloat a, SimdFloat b)
 390 {
 391     return {
 392                vec_not(vec_cmpeq(a.simdInternal_, b.simdInternal_))
 393     };
 394 }
 395
 396 static inline SimdFBool gmx_simdcall
 397 operator<(SimdFloat a, SimdFloat b)
 398 {
 399     return {
 400                vec_cmplt(a.simdInternal_, b.simdInternal_)
 401     };
 402 }
 403
 404 static inline SimdFBool gmx_simdcall
 405 operator<=(SimdFloat a, SimdFloat b)
 406 {
 407     return {
 408                vec_or(vec_cmplt(a.simdInternal_, b.simdInternal_), vec_cmpeq(a.simdInternal_, b.simdInternal_))
 409     };
 410 }
 411
 412 static inline SimdFBool gmx_simdcall
 413 operator&&(SimdFBool a, SimdFBool b)
 414 {
 415     return {
 416                vec_and(a.simdInternal_, b.simdInternal_)
 417     };
 418 }
 419
 420 static inline SimdFBool gmx_simdcall
 421 operator||(SimdFBool a, SimdFBool b)
 422 {
 423     return {
 424                vec_or(a.simdInternal_, b.simdInternal_)
 425     };
 426 }
 427
 428 static inline bool gmx_simdcall
 429 anyTrue(SimdFBool a)
 430 {
 431     vector4double b = vec_sldw(a.simdInternal_, a.simdInternal_, 2);
 432
 433     a.simdInternal_ = vec_or(a.simdInternal_, b);
 434     b               = vec_sldw(a.simdInternal_, a.simdInternal_, 1);
 435     b               = vec_or(a.simdInternal_, b);
 436     return (vec_extract(b, 0) > 0);
 437 }
 438
 439 static inline SimdFloat gmx_simdcall
 440 selectByMask(SimdFloat a, SimdFBool m)
 441 {
 442     return {
 443                vec_sel(vec_splats(0.0), a.simdInternal_, m.simdInternal_)
 444     };
 445 }
 446
 447 static inline SimdFloat gmx_simdcall
 448 selectByNotMask(SimdFloat a, SimdFBool m)
 449 {
 450     return {
 451                vec_sel(a.simdInternal_, vec_splats(0.0), m.simdInternal_)
 452     };
 453 }
 454
 455 static inline SimdFloat gmx_simdcall
 456 blend(SimdFloat a, SimdFloat b, SimdFBool sel)
 457 {
 458     return {
 459                vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
 460     };
 461 }
 462
 463 static inline SimdFInt32 gmx_simdcall
 464 cvtR2I(SimdFloat a)
 465 {
 466     return {
 467                vec_ctiw(a.simdInternal_)
 468     };
 469 }
 470
 471 static inline SimdFInt32 gmx_simdcall
 472 cvttR2I(SimdFloat a)
 473 {
 474     return {
 475                vec_ctiwz(a.simdInternal_)
 476     };
 477 }
 478
 479 static inline SimdFloat gmx_simdcall
 480 cvtI2R(SimdFInt32 a)
 481 {
 482     return {
 483                vec_cfid(a.simdInternal_)
 484     };
 485 }
 486
 487 static inline SimdFloat gmx_simdcall
 488 copysign(SimdFloat x, SimdFloat y)
 489 {
 490     return {
 491                vec_cpsgn(y.simdInternal_, x.simdInternal_)
 492     };
 493 }
 494
 495 }      // namespace gmx
 496
 497 #endif // GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_FLOAT_H