Update instructions in containers.rst
[gromacs.git] / src / gromacs / simd / scalar / scalar_util.h
blob5002726307056863e862958dbbe535c3e1f6cd08
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2016,2017,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #ifndef GMX_SIMD_SCALAR_UTIL_H
36 #define GMX_SIMD_SCALAR_UTIL_H
38 #include <cmath>
40 /*! \libinternal \file
42 * \brief Scalar utility functions mimicking GROMACS SIMD utility functions
44 * These versions make it possible to write functions that are templated with
45 * either a SIMD or scalar type. While some of these functions might not appear
46 * SIMD-specific, we have placed them here because the only reason to use these
47 * instead of generic function is in templated combined SIMD/non-SIMD code.
49 * \author Erik Lindahl <erik.lindahl@gmail.com>
51 * \inlibraryapi
52 * \ingroup module_simd
55 namespace gmx
58 /*****************************************************************************
59 * Single-precision utility load/store functions mimicking SIMD versions *
60 *****************************************************************************/
62 /*! \brief Load 4 consecutive floats from base/offset into four variables
64 * \tparam align Alignment of the memory from which we read.
65 * \param base Pointer to the start of the memory area
66 * \param offset Index to data.
67 * \param[out] v0 1st float, base[align*offset[0]].
68 * \param[out] v1 2nd float, base[align*offset[0] + 1].
69 * \param[out] v2 3rd float, base[align*offset[0] + 2].
70 * \param[out] v3 4th float, base[align*offset[0] + 3].
72 * \note This function might be superficially meaningless, but it helps us to
73 * write templated SIMD/non-SIMD code. For clarity it should not be used
74 * outside such code.
76 template<int align>
77 static inline void
78 gatherLoadTranspose(const float* base, const std::int32_t offset[], float* v0, float* v1, float* v2, float* v3)
80 *v0 = base[align * offset[0]];
81 *v1 = base[align * offset[0] + 1];
82 *v2 = base[align * offset[0] + 2];
83 *v3 = base[align * offset[0] + 3];
86 /*! \brief Load 2 consecutive floats from base/offset into four variables
88 * \tparam align Alignment of the memory from which we read.
89 * \param base Pointer to the start of the memory area
90 * \param offset Index to data.
91 * \param[out] v0 1st float, base[align*offset[0]].
92 * \param[out] v1 2nd float, base[align*offset[0] + 1].
94 * \note This function might be superficially meaningless, but it helps us to
95 * write templated SIMD/non-SIMD code. For clarity it should not be used
96 * outside such code.
98 template<int align>
99 static inline void gatherLoadTranspose(const float* base, const std::int32_t offset[], float* v0, float* v1)
101 *v0 = base[align * offset[0]];
102 *v1 = base[align * offset[0] + 1];
106 /*! \brief Load 3 consecutive floats from base/offsets, store into three vars.
108 * \tparam align Alignment of the memory from which we read, i.e. distance
109 * (measured in elements, not bytes) between index points.
110 * \param base Pointer to the start of the memory area
111 * \param offset Offset to the start of data.
112 * \param[out] v0 1st value, base[align*offset[0]].
113 * \param[out] v1 2nd value, base[align*offset[0] + 1].
114 * \param[out] v2 3rd value, base[align*offset[0] + 2].
116 * \note This function might be superficially meaningless, but it helps us to
117 * write templated SIMD/non-SIMD code. For clarity it should not be used
118 * outside such code.
120 template<int align>
121 static inline void
122 gatherLoadUTranspose(const float* base, const std::int32_t offset[], float* v0, float* v1, float* v2)
124 *v0 = base[align * offset[0]];
125 *v1 = base[align * offset[0] + 1];
126 *v2 = base[align * offset[0] + 2];
129 /*! \brief Store 3 floats to 3 to base/offset.
131 * \tparam align Alignment of the memory to which we write, i.e. distance
132 * (measured in elements, not bytes) between index points.
133 * \param[out] base Pointer to the start of the memory area
134 * \param offset Offset to the start of triplet.
135 * \param v0 1st value, written to base[align*offset[0]].
136 * \param v1 2nd value, written to base[align*offset[0] + 1].
137 * \param v2 3rd value, written to base[align*offset[0] + 2].
139 * \note This function might be superficially meaningless, but it helps us to
140 * write templated SIMD/non-SIMD code. For clarity it should not be used
141 * outside such code.
143 template<int align>
144 static inline void transposeScatterStoreU(float* base, const std::int32_t offset[], float v0, float v1, float v2)
146 base[align * offset[0]] = v0;
147 base[align * offset[0] + 1] = v1;
148 base[align * offset[0] + 2] = v2;
151 /*! \brief Add 3 floats to base/offset.
153 * \tparam align Alignment of the memory to which we write, i.e. distance
154 * (measured in elements, not bytes) between index points.
155 * \param[out] base Pointer to the start of the memory area
156 * \param offset Offset to the start of triplet.
157 * \param v0 1st value, added to base[align*offset[0]].
158 * \param v1 2nd value, added to base[align*offset[0] + 1].
159 * \param v2 3rd value, added to base[align*offset[0] + 2].
161 * \note This function might be superficially meaningless, but it helps us to
162 * write templated SIMD/non-SIMD code. For clarity it should not be used
163 * outside such code.
165 template<int align>
166 static inline void transposeScatterIncrU(float* base, const std::int32_t offset[], float v0, float v1, float v2)
168 base[align * offset[0]] += v0;
169 base[align * offset[0] + 1] += v1;
170 base[align * offset[0] + 2] += v2;
173 /*! \brief Subtract 3 floats from base/offset.
175 * \tparam align Alignment of the memory to which we write, i.e. distance
176 * (measured in elements, not bytes) between index points.
177 * \param[out] base Pointer to the start of the memory area
178 * \param offset Offset to the start of triplet.
179 * \param v0 1st value, subtracted from base[align*offset[0]].
180 * \param v1 2nd value, subtracted from base[align*offset[0] + 1].
181 * \param v2 3rd value, subtracted from base[align*offset[0] + 2].
183 * \note This function might be superficially meaningless, but it helps us to
184 * write templated SIMD/non-SIMD code. For clarity it should not be used
185 * outside such code.
187 template<int align>
188 static inline void transposeScatterDecrU(float* base, const std::int32_t offset[], float v0, float v1, float v2)
190 base[align * offset[0]] -= v0;
191 base[align * offset[0] + 1] -= v1;
192 base[align * offset[0] + 2] -= v2;
195 /*! \brief Copy single float to three variables.
197 * \param scalar Floating-point input.
198 * \param[out] triplets0 Copy 1.
199 * \param[out] triplets1 Copy 2.
200 * \param[out] triplets2 Copy 3.
202 * \note This function might be superficially meaningless, but it helps us to
203 * write templated SIMD/non-SIMD code. For clarity it should not be used
204 * outside such code.
206 static inline void expandScalarsToTriplets(float scalar, float* triplets0, float* triplets1, float* triplets2)
208 *triplets0 = scalar;
209 *triplets1 = scalar;
210 *triplets2 = scalar;
213 /*! \brief Load 4 floats from base/offsets and store into variables.
215 * \tparam align Alignment of the memory from which we read, i.e. distance
216 * (measured in elements, not bytes) between index points.
217 * \param base Aligned pointer to the start of the memory.
218 * \param offset Integer type with offset to the start of each triplet.
219 * \param[out] v0 First float, base[align*offset[0]].
220 * \param[out] v1 Second float, base[align*offset[0] + 1].
221 * \param[out] v2 Third float, base[align*offset[0] + 2].
222 * \param[out] v3 Fourth float, base[align*offset[0] + 3].
224 * \note This function might be superficially meaningless, but it helps us to
225 * write templated SIMD/non-SIMD code. For clarity it should not be used
226 * outside such code.
228 template<int align>
229 static inline void
230 gatherLoadBySimdIntTranspose(const float* base, std::int32_t offset, float* v0, float* v1, float* v2, float* v3)
232 *v0 = base[align * offset];
233 *v1 = base[align * offset + 1];
234 *v2 = base[align * offset + 2];
235 *v3 = base[align * offset + 3];
238 /*! \brief Load 2 floats from base/offsets and store into variables (unaligned).
240 * \tparam align Alignment of the memory from which we read, i.e. distance
241 * (measured in elements, not bytes) between index points.
242 * \param base Aligned pointer to the start of the memory.
243 * \param offset Integer type with offset to the start of each triplet.
244 * \param[out] v0 First float, base[align*offset[0]].
245 * \param[out] v1 Second float, base[align*offset[0] + 1].
247 * \note This function might be superficially meaningless, but it helps us to
248 * write templated SIMD/non-SIMD code. For clarity it should not be used
249 * outside such code.
251 template<int align>
252 static inline void gatherLoadUBySimdIntTranspose(const float* base, std::int32_t offset, float* v0, float* v1)
254 *v0 = base[align * offset];
255 *v1 = base[align * offset + 1];
258 /*! \brief Load 2 floats from base/offsets and store into variables (aligned).
260 * \tparam align Alignment of the memory from which we read, i.e. distance
261 * (measured in elements, not bytes) between index points.
262 * \param base Aligned pointer to the start of the memory.
263 * \param offset Integer type with offset to the start of each triplet.
264 * \param[out] v0 First float, base[align*offset[0]].
265 * \param[out] v1 Second float, base[align*offset[0] + 1].
267 * \note This function might be superficially meaningless, but it helps us to
268 * write templated SIMD/non-SIMD code. For clarity it should not be used
269 * outside such code.
271 template<int align>
272 static inline void gatherLoadBySimdIntTranspose(const float* base, std::int32_t offset, float* v0, float* v1)
274 *v0 = base[align * offset];
275 *v1 = base[align * offset + 1];
278 /*! \brief Add each float to four consecutive memory locations, return sum.
280 * \param m Pointer to memory where four floats should be incremented
281 * \param v0 float to be added to m[0]
282 * \param v1 float to be added to m[1]
283 * \param v2 float to be added to m[2]
284 * \param v3 float to be added to m[3]
286 * \return v0+v1+v2+v3.
288 * \note This function might be superficially meaningless, but it helps us to
289 * write templated SIMD/non-SIMD code. For clarity it should not be used
290 * outside such code.
292 static inline float reduceIncr4ReturnSum(float* m, float v0, float v1, float v2, float v3)
294 m[0] += v0;
295 m[1] += v1;
296 m[2] += v2;
297 m[3] += v3;
299 return v0 + v1 + v2 + v3;
303 /*****************************************************************************
304 * Double-precision utility load/store functions mimicking SIMD versions *
305 *****************************************************************************/
307 /*! \brief Load 4 consecutive doubles from base/offset into four variables
309 * \tparam align Alignment of the memory from which we read.
310 * \param base Pointer to the start of the memory area
311 * \param offset Index to data.
312 * \param[out] v0 1st double, base[align*offset[0]].
313 * \param[out] v1 2nd double, base[align*offset[0] + 1].
314 * \param[out] v2 3rd double, base[align*offset[0] + 2].
315 * \param[out] v3 4th double, base[align*offset[0] + 3].
317 * \note This function might be superficially meaningless, but it helps us to
318 * write templated SIMD/non-SIMD code. For clarity it should not be used
319 * outside such code.
321 template<int align>
322 static inline void gatherLoadTranspose(const double* base,
323 const std::int32_t offset[],
324 double* v0,
325 double* v1,
326 double* v2,
327 double* v3)
329 *v0 = base[align * offset[0]];
330 *v1 = base[align * offset[0] + 1];
331 *v2 = base[align * offset[0] + 2];
332 *v3 = base[align * offset[0] + 3];
335 /*! \brief Load 2 consecutive doubles from base/offset into four variables
337 * \tparam align Alignment of the memory from which we read.
338 * \param base Pointer to the start of the memory area
339 * \param offset Index to data.
340 * \param[out] v0 1st double, base[align*offset[0]].
341 * \param[out] v1 2nd double, base[align*offset[0] + 1].
343 * \note This function might be superficially meaningless, but it helps us to
344 * write templated SIMD/non-SIMD code. For clarity it should not be used
345 * outside such code.
347 template<int align>
348 static inline void gatherLoadTranspose(const double* base, const std::int32_t offset[], double* v0, double* v1)
350 *v0 = base[align * offset[0]];
351 *v1 = base[align * offset[0] + 1];
355 /*! \brief Load 3 consecutive doubles from base/offsets, store into three vars.
357 * \tparam align Alignment of the memory from which we read, i.e. distance
358 * (measured in elements, not bytes) between index points.
359 * \param base Pointer to the start of the memory area
360 * \param offset Offset to the start of data.
361 * \param[out] v0 1st double, base[align*offset[0]].
362 * \param[out] v1 2nd double, base[align*offset[0] + 1].
363 * \param[out] v2 3rd double, base[align*offset[0] + 2].
365 * \note This function might be superficially meaningless, but it helps us to
366 * write templated SIMD/non-SIMD code. For clarity it should not be used
367 * outside such code.
369 template<int align>
370 static inline void
371 gatherLoadUTranspose(const double* base, const std::int32_t offset[], double* v0, double* v1, double* v2)
373 *v0 = base[align * offset[0]];
374 *v1 = base[align * offset[0] + 1];
375 *v2 = base[align * offset[0] + 2];
378 /*! \brief Store 3 doubles to 3 to base/offset.
380 * \tparam align Alignment of the memory to which we write, i.e. distance
381 * (measured in elements, not bytes) between index points.
382 * \param[out] base Pointer to the start of the memory area
383 * \param offset Offset to the start of triplet.
384 * \param v0 1st value, written to base[align*offset[0]].
385 * \param v1 2nd value, written to base[align*offset[0] + 1].
386 * \param v2 3rd value, written to base[align*offset[0] + 2].
388 * \note This function might be superficially meaningless, but it helps us to
389 * write templated SIMD/non-SIMD code. For clarity it should not be used
390 * outside such code.
392 template<int align>
393 static inline void transposeScatterStoreU(double* base, const std::int32_t offset[], double v0, double v1, double v2)
395 base[align * offset[0]] = v0;
396 base[align * offset[0] + 1] = v1;
397 base[align * offset[0] + 2] = v2;
400 /*! \brief Add 3 doubles to base/offset.
402 * \tparam align Alignment of the memory to which we write, i.e. distance
403 * (measured in elements, not bytes) between index points.
404 * \param[out] base Pointer to the start of the memory area
405 * \param offset Offset to the start of triplet.
406 * \param v0 1st value, added to base[align*offset[0]].
407 * \param v1 2nd value, added to base[align*offset[0] + 1].
408 * \param v2 3rd value, added to base[align*offset[0] + 2].
410 * \note This function might be superficially meaningless, but it helps us to
411 * write templated SIMD/non-SIMD code. For clarity it should not be used
412 * outside such code.
414 template<int align>
415 static inline void transposeScatterIncrU(double* base, const std::int32_t offset[], double v0, double v1, double v2)
417 base[align * offset[0]] += v0;
418 base[align * offset[0] + 1] += v1;
419 base[align * offset[0] + 2] += v2;
422 /*! \brief Subtract 3 doubles from base/offset.
424 * \tparam align Alignment of the memory to which we write, i.e. distance
425 * (measured in elements, not bytes) between index points.
426 * \param[out] base Pointer to the start of the memory area
427 * \param offset Offset to the start of triplet.
428 * \param v0 1st value, subtracted from base[align*offset[0]].
429 * \param v1 2nd value, subtracted from base[align*offset[0] + 1].
430 * \param v2 3rd value, subtracted from base[align*offset[0] + 2].
432 * \note This function might be superficially meaningless, but it helps us to
433 * write templated SIMD/non-SIMD code. For clarity it should not be used
434 * outside such code.
436 template<int align>
437 static inline void transposeScatterDecrU(double* base, const std::int32_t offset[], double v0, double v1, double v2)
439 base[align * offset[0]] -= v0;
440 base[align * offset[0] + 1] -= v1;
441 base[align * offset[0] + 2] -= v2;
444 /*! \brief Copy single double to three variables.
446 * \param scalar Floating-point input.
447 * \param[out] triplets0 Copy 1.
448 * \param[out] triplets1 Copy 2.
449 * \param[out] triplets2 Copy 3.
451 * \note This function might be superficially meaningless, but it helps us to
452 * write templated SIMD/non-SIMD code. For clarity it should not be used
453 * outside such code.
455 static inline void expandScalarsToTriplets(double scalar, double* triplets0, double* triplets1, double* triplets2)
457 *triplets0 = scalar;
458 *triplets1 = scalar;
459 *triplets2 = scalar;
462 /*! \brief Load 4 doubles from base/offsets and store into variables.
464 * \tparam align Alignment of the memory from which we read, i.e. distance
465 * (measured in elements, not bytes) between index points.
466 * \param base Aligned pointer to the start of the memory.
467 * \param offset Integer type with offset to the start of each triplet.
468 * \param[out] v0 First double, base[align*offset[0]].
469 * \param[out] v1 Second double, base[align*offset[0] + 1].
470 * \param[out] v2 Third double, base[align*offset[0] + 2].
471 * \param[out] v3 Fourth double, base[align*offset[0] + 3].
473 * \note This function might be superficially meaningless, but it helps us to
474 * write templated SIMD/non-SIMD code. For clarity it should not be used
475 * outside such code.
477 template<int align>
478 static inline void gatherLoadBySimdIntTranspose(const double* base,
479 std::int32_t offset,
480 double* v0,
481 double* v1,
482 double* v2,
483 double* v3)
485 *v0 = base[align * offset];
486 *v1 = base[align * offset + 1];
487 *v2 = base[align * offset + 2];
488 *v3 = base[align * offset + 3];
491 /*! \brief Load 2 doubles from base/offsets and store into variables (unaligned).
493 * \tparam align Alignment of the memory from which we read, i.e. distance
494 * (measured in elements, not bytes) between index points.
495 * \param base Aligned pointer to the start of the memory.
496 * \param offset Integer type with offset to the start of each triplet.
497 * \param[out] v0 First double, base[align*offset[0]].
498 * \param[out] v1 Second double, base[align*offset[0] + 1].
500 * \note This function might be superficially meaningless, but it helps us to
501 * write templated SIMD/non-SIMD code. For clarity it should not be used
502 * outside such code.
504 template<int align>
505 static inline void gatherLoadUBySimdIntTranspose(const double* base, std::int32_t offset, double* v0, double* v1)
507 *v0 = base[align * offset];
508 *v1 = base[align * offset + 1];
511 /*! \brief Load 2 doubles from base/offsets and store into variables (aligned).
513 * \tparam align Alignment of the memory from which we read, i.e. distance
514 * (measured in elements, not bytes) between index points.
515 * \param base Aligned pointer to the start of the memory.
516 * \param offset Integer type with offset to the start of each triplet.
517 * \param[out] v0 First double, base[align*offset[0]].
518 * \param[out] v1 Second double, base[align*offset[0] + 1].
520 * \note This function might be superficially meaningless, but it helps us to
521 * write templated SIMD/non-SIMD code. For clarity it should not be used
522 * outside such code.
524 template<int align>
525 static inline void gatherLoadBySimdIntTranspose(const double* base, std::int32_t offset, double* v0, double* v1)
527 *v0 = base[align * offset];
528 *v1 = base[align * offset + 1];
531 /*! \brief Add each double to four consecutive memory locations, return sum.
533 * \param m Pointer to memory where four floats should be incremented
534 * \param v0 double to be added to m[0]
535 * \param v1 double to be added to m[1]
536 * \param v2 double to be added to m[2]
537 * \param v3 double to be added to m[3]
539 * \return v0+v1+v2+v3.
541 * \note This function might be superficially meaningless, but it helps us to
542 * write templated SIMD/non-SIMD code. For clarity it should not be used
543 * outside such code.
545 static inline double reduceIncr4ReturnSum(double* m, double v0, double v1, double v2, double v3)
547 m[0] += v0;
548 m[1] += v1;
549 m[2] += v2;
550 m[3] += v3;
552 return v0 + v1 + v2 + v3;
555 } // namespace gmx
558 #endif // GMX_SIMD_SCALAR_UTIL_H