Update instructions in containers.rst
[gromacs.git] / src / gromacs / simd / impl_ibm_vsx / impl_ibm_vsx_util_double.h
bloba84317a3edb6d9b6ca46e258460507f9c27829b4
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H
38 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H
40 #include "config.h"
42 #include "gromacs/utility/basedefinitions.h"
44 #include "impl_ibm_vsx_definitions.h"
45 #include "impl_ibm_vsx_simd_double.h"
47 namespace gmx
50 template<int align>
51 static inline void gmx_simdcall gatherLoadTranspose(const double* base,
52 const std::int32_t offset[],
53 SimdDouble* v0,
54 SimdDouble* v1,
55 SimdDouble* v2,
56 SimdDouble* v3)
58 __vector double t1, t2, t3, t4;
60 t1 = *reinterpret_cast<const __vector double*>(base + align * offset[0]);
61 t2 = *reinterpret_cast<const __vector double*>(base + align * offset[1]);
62 t3 = *reinterpret_cast<const __vector double*>(base + align * offset[0] + 2);
63 t4 = *reinterpret_cast<const __vector double*>(base + align * offset[1] + 2);
64 v0->simdInternal_ = vec_mergeh(t1, t2);
65 v1->simdInternal_ = vec_mergel(t1, t2);
66 v2->simdInternal_ = vec_mergeh(t3, t4);
67 v3->simdInternal_ = vec_mergel(t3, t4);
70 template<int align>
71 static inline void gmx_simdcall
72 gatherLoadTranspose(const double* base, const std::int32_t offset[], SimdDouble* v0, SimdDouble* v1)
74 __vector double t1, t2;
76 t1 = *reinterpret_cast<const __vector double*>(base + align * offset[0]);
77 t2 = *reinterpret_cast<const __vector double*>(base + align * offset[1]);
78 v0->simdInternal_ = vec_mergeh(t1, t2);
79 v1->simdInternal_ = vec_mergel(t1, t2);
82 static const int c_simdBestPairAlignmentDouble = 2;
84 template<int align>
85 static inline void gmx_simdcall gatherLoadUTranspose(const double* base,
86 const std::int32_t offset[],
87 SimdDouble* v0,
88 SimdDouble* v1,
89 SimdDouble* v2)
91 SimdDouble t1, t2;
93 t1 = simdLoad(base + align * offset[0]);
94 t2 = simdLoad(base + align * offset[1]);
96 v0->simdInternal_ = vec_mergeh(t1.simdInternal_, t2.simdInternal_);
97 v1->simdInternal_ = vec_mergel(t1.simdInternal_, t2.simdInternal_);
98 v2->simdInternal_ = vec_mergeh(vec_splats(*(base + align * offset[0] + 2)),
99 vec_splats(*(base + align * offset[1] + 2)));
102 // gcc-4.9 fails to recognize that the argument to vec_extract() is used
103 template<int align>
104 static inline void gmx_simdcall transposeScatterStoreU(double* base,
105 const std::int32_t offset[],
106 SimdDouble v0,
107 SimdDouble v1,
108 SimdDouble gmx_unused v2)
110 SimdDouble t1, t2;
112 t1.simdInternal_ = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
113 t2.simdInternal_ = vec_mergel(v0.simdInternal_, v1.simdInternal_);
115 store(base + align * offset[0], t1);
116 base[align * offset[0] + 2] = vec_extract(v2.simdInternal_, 0);
117 store(base + align * offset[1], t2);
118 base[align * offset[1] + 2] = vec_extract(v2.simdInternal_, 1);
121 template<int align>
122 static inline void gmx_simdcall
123 transposeScatterIncrU(double* base, const std::int32_t offset[], SimdDouble v0, SimdDouble v1, SimdDouble v2)
125 if (align % 4 == 0)
127 __vector double t1, t2, t3, t4;
128 SimdDouble t5, t6, t7, t8;
130 t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
131 t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
132 t3 = vec_mergeh(v2.simdInternal_, vec_splats(0.0));
133 t4 = vec_mergel(v2.simdInternal_, vec_splats(0.0));
135 t5 = simdLoad(base + align * offset[0]);
136 t6 = simdLoad(base + align * offset[0] + 2);
137 t5.simdInternal_ = vec_add(t5.simdInternal_, t1);
138 t6.simdInternal_ = vec_add(t6.simdInternal_, t3);
139 store(base + align * offset[0], t5);
140 store(base + align * offset[0] + 2, t6);
142 t5 = simdLoad(base + align * offset[1]);
143 t6 = simdLoad(base + align * offset[1] + 2);
144 t5.simdInternal_ = vec_add(t5.simdInternal_, t2);
145 t6.simdInternal_ = vec_add(t6.simdInternal_, t4);
146 store(base + align * offset[1], t5);
147 store(base + align * offset[1] + 2, t6);
149 else
151 __vector double t1, t2;
152 SimdDouble t3, t4;
154 t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
155 t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
157 t3 = simdLoad(base + align * offset[0]);
158 t3.simdInternal_ = vec_add(t3.simdInternal_, t1);
159 store(base + align * offset[0], t3);
160 base[align * offset[0] + 2] += vec_extract(v2.simdInternal_, 0);
162 t4 = simdLoad(base + align * offset[1]);
163 t4.simdInternal_ = vec_add(t4.simdInternal_, t2);
164 store(base + align * offset[1], t4);
165 base[align * offset[1] + 2] += vec_extract(v2.simdInternal_, 1);
169 template<int align>
170 static inline void gmx_simdcall
171 transposeScatterDecrU(double* base, const std::int32_t offset[], SimdDouble v0, SimdDouble v1, SimdDouble v2)
173 if (align % 4 == 0)
175 __vector double t1, t2, t3, t4;
176 SimdDouble t5, t6, t7, t8;
178 t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
179 t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
180 t3 = vec_mergeh(v2.simdInternal_, vec_splats(0.0));
181 t4 = vec_mergel(v2.simdInternal_, vec_splats(0.0));
183 t5 = simdLoad(base + align * offset[0]);
184 t6 = simdLoad(base + align * offset[0] + 2);
185 t5.simdInternal_ = vec_sub(t5.simdInternal_, t1);
186 t6.simdInternal_ = vec_sub(t6.simdInternal_, t3);
187 store(base + align * offset[0], t5);
188 store(base + align * offset[0] + 2, t6);
190 t5 = simdLoad(base + align * offset[1]);
191 t6 = simdLoad(base + align * offset[1] + 2);
192 t5.simdInternal_ = vec_sub(t5.simdInternal_, t2);
193 t6.simdInternal_ = vec_sub(t6.simdInternal_, t4);
194 store(base + align * offset[1], t5);
195 store(base + align * offset[1] + 2, t6);
197 else
199 __vector double t1, t2;
200 SimdDouble t3, t4;
202 t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
203 t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
205 t3 = simdLoad(base + align * offset[0]);
206 t3.simdInternal_ = vec_sub(t3.simdInternal_, t1);
207 store(base + align * offset[0], t3);
208 base[align * offset[0] + 2] -= vec_extract(v2.simdInternal_, 0);
210 t4 = simdLoad(base + align * offset[1]);
211 t4.simdInternal_ = vec_sub(t4.simdInternal_, t2);
212 store(base + align * offset[1], t4);
213 base[align * offset[1] + 2] -= vec_extract(v2.simdInternal_, 1);
217 static inline void gmx_simdcall expandScalarsToTriplets(SimdDouble scalar,
218 SimdDouble* triplets0,
219 SimdDouble* triplets1,
220 SimdDouble* triplets2)
222 triplets0->simdInternal_ = vec_mergeh(scalar.simdInternal_, scalar.simdInternal_);
223 triplets1->simdInternal_ = scalar.simdInternal_;
224 triplets2->simdInternal_ = vec_mergel(scalar.simdInternal_, scalar.simdInternal_);
227 template<int align>
228 static inline void gmx_simdcall gatherLoadBySimdIntTranspose(const double* base,
229 SimdDInt32 offset,
230 SimdDouble* v0,
231 SimdDouble* v1,
232 SimdDouble* v2,
233 SimdDouble* v3)
235 alignas(GMX_SIMD_ALIGNMENT) std::int32_t ioffset[GMX_SIMD_DINT32_WIDTH];
237 store(ioffset, offset);
238 gatherLoadTranspose<align>(base, ioffset, v0, v1, v2, v3);
241 template<int align>
242 static inline void gmx_simdcall
243 gatherLoadBySimdIntTranspose(const double* base, SimdDInt32 offset, SimdDouble* v0, SimdDouble* v1)
245 alignas(GMX_SIMD_ALIGNMENT) std::int32_t ioffset[GMX_SIMD_DINT32_WIDTH];
247 store(ioffset, offset);
248 gatherLoadTranspose<align>(base, ioffset, v0, v1);
252 template<int align>
253 static inline void gmx_simdcall
254 gatherLoadUBySimdIntTranspose(const double* base, SimdDInt32 offset, SimdDouble* v0, SimdDouble* v1)
256 alignas(GMX_SIMD_ALIGNMENT) std::int32_t ioffset[GMX_SIMD_DINT32_WIDTH];
258 store(ioffset, offset);
260 SimdDouble t1 = simdLoadU(base + align * ioffset[0]);
261 SimdDouble t2 = simdLoadU(base + align * ioffset[1]);
262 v0->simdInternal_ = vec_mergeh(t1.simdInternal_, t2.simdInternal_);
263 v1->simdInternal_ = vec_mergel(t1.simdInternal_, t2.simdInternal_);
266 static inline double gmx_simdcall
267 reduceIncr4ReturnSum(double* m, SimdDouble v0, SimdDouble v1, SimdDouble v2, SimdDouble v3)
269 __vector double t1, t2, t3, t4;
271 t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
272 t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
273 t3 = vec_mergeh(v2.simdInternal_, v3.simdInternal_);
274 t4 = vec_mergel(v2.simdInternal_, v3.simdInternal_);
276 t1 = vec_add(t1, t2);
277 t3 = vec_add(t3, t4);
279 *reinterpret_cast<__vector double*>(m) += t1;
280 *reinterpret_cast<__vector double*>(m + 2) += t3;
282 t1 = vec_add(t1, t3);
283 return reduce(t1);
286 } // namespace gmx
288 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H