Introduce SimulatorBuilder
[gromacs.git] / src / gromacs / simd / impl_ibm_vsx / impl_ibm_vsx_util_double.h
blob6d96e43ce4a7a9945dd40124a13f143f437dc5f9
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H
37 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H
39 #include "config.h"
41 #include "gromacs/utility/basedefinitions.h"
43 #include "impl_ibm_vsx_definitions.h"
44 #include "impl_ibm_vsx_simd_double.h"
46 namespace gmx
49 template <int align>
50 static inline void gmx_simdcall
51 gatherLoadTranspose(const double * base,
52 const std::int32_t offset[],
53 SimdDouble * v0,
54 SimdDouble * v1,
55 SimdDouble * v2,
56 SimdDouble * v3)
58 __vector double t1, t2, t3, t4;
60 t1 = *reinterpret_cast<const __vector double *>(base + align * offset[0]);
61 t2 = *reinterpret_cast<const __vector double *>(base + align * offset[1]);
62 t3 = *reinterpret_cast<const __vector double *>(base + align * offset[0] + 2);
63 t4 = *reinterpret_cast<const __vector double *>(base + align * offset[1] + 2);
64 v0->simdInternal_ = vec_mergeh(t1, t2);
65 v1->simdInternal_ = vec_mergel(t1, t2);
66 v2->simdInternal_ = vec_mergeh(t3, t4);
67 v3->simdInternal_ = vec_mergel(t3, t4);
71 template <int align>
72 static inline void gmx_simdcall
73 gatherLoadTranspose(const double * base,
74 const std::int32_t offset[],
75 SimdDouble * v0,
76 SimdDouble * v1)
78 __vector double t1, t2;
80 t1 = *reinterpret_cast<const __vector double *>(base + align * offset[0]);
81 t2 = *reinterpret_cast<const __vector double *>(base + align * offset[1]);
82 v0->simdInternal_ = vec_mergeh(t1, t2);
83 v1->simdInternal_ = vec_mergel(t1, t2);
87 static const int c_simdBestPairAlignmentDouble = 2;
89 template <int align>
90 static inline void gmx_simdcall
91 gatherLoadUTranspose(const double * base,
92 const std::int32_t offset[],
93 SimdDouble * v0,
94 SimdDouble * v1,
95 SimdDouble * v2)
97 SimdDouble t1, t2;
99 t1 = simdLoad(base + align * offset[0]);
100 t2 = simdLoad(base + align * offset[1]);
102 v0->simdInternal_ = vec_mergeh(t1.simdInternal_, t2.simdInternal_);
103 v1->simdInternal_ = vec_mergel(t1.simdInternal_, t2.simdInternal_);
104 v2->simdInternal_ = vec_mergeh(vec_splats(*(base + align * offset[0] + 2)),
105 vec_splats(*(base + align * offset[1] + 2)));
108 // gcc-4.9 fails to recognize that the argument to vec_extract() is used
109 template <int align>
110 static inline void gmx_simdcall
111 transposeScatterStoreU(double * base,
112 const std::int32_t offset[],
113 SimdDouble v0,
114 SimdDouble v1,
115 SimdDouble gmx_unused v2)
117 SimdDouble t1, t2;
119 t1.simdInternal_ = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
120 t2.simdInternal_ = vec_mergel(v0.simdInternal_, v1.simdInternal_);
122 store(base + align * offset[0], t1);
123 base[align * offset[0] + 2] = vec_extract(v2.simdInternal_, 0);
124 store(base + align * offset[1], t2);
125 base[align * offset[1] + 2] = vec_extract(v2.simdInternal_, 1);
128 template <int align>
129 static inline void gmx_simdcall
130 transposeScatterIncrU(double * base,
131 const std::int32_t offset[],
132 SimdDouble v0,
133 SimdDouble v1,
134 SimdDouble v2)
136 if (align % 4 == 0)
138 __vector double t1, t2, t3, t4;
139 SimdDouble t5, t6, t7, t8;
141 t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
142 t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
143 t3 = vec_mergeh(v2.simdInternal_, vec_splats(0.0));
144 t4 = vec_mergel(v2.simdInternal_, vec_splats(0.0));
146 t5 = simdLoad(base + align * offset[0]);
147 t6 = simdLoad(base + align * offset[0] + 2);
148 t5.simdInternal_ = vec_add(t5.simdInternal_, t1);
149 t6.simdInternal_ = vec_add(t6.simdInternal_, t3);
150 store(base + align * offset[0], t5);
151 store(base + align * offset[0] + 2, t6);
153 t5 = simdLoad(base + align * offset[1]);
154 t6 = simdLoad(base + align * offset[1] + 2);
155 t5.simdInternal_ = vec_add(t5.simdInternal_, t2);
156 t6.simdInternal_ = vec_add(t6.simdInternal_, t4);
157 store(base + align * offset[1], t5);
158 store(base + align * offset[1] + 2, t6);
160 else
162 __vector double t1, t2;
163 SimdDouble t3, t4;
165 t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
166 t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
168 t3 = simdLoad(base + align * offset[0]);
169 t3.simdInternal_ = vec_add(t3.simdInternal_, t1);
170 store(base + align * offset[0], t3);
171 base[align * offset[0] + 2] += vec_extract(v2.simdInternal_, 0);
173 t4 = simdLoad(base + align * offset[1]);
174 t4.simdInternal_ = vec_add(t4.simdInternal_, t2);
175 store(base + align * offset[1], t4);
176 base[align * offset[1] + 2] += vec_extract(v2.simdInternal_, 1);
180 template <int align>
181 static inline void gmx_simdcall
182 transposeScatterDecrU(double * base,
183 const std::int32_t offset[],
184 SimdDouble v0,
185 SimdDouble v1,
186 SimdDouble v2)
188 if (align % 4 == 0)
190 __vector double t1, t2, t3, t4;
191 SimdDouble t5, t6, t7, t8;
193 t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
194 t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
195 t3 = vec_mergeh(v2.simdInternal_, vec_splats(0.0));
196 t4 = vec_mergel(v2.simdInternal_, vec_splats(0.0));
198 t5 = simdLoad(base + align * offset[0]);
199 t6 = simdLoad(base + align * offset[0] + 2);
200 t5.simdInternal_ = vec_sub(t5.simdInternal_, t1);
201 t6.simdInternal_ = vec_sub(t6.simdInternal_, t3);
202 store(base + align * offset[0], t5);
203 store(base + align * offset[0] + 2, t6);
205 t5 = simdLoad(base + align * offset[1]);
206 t6 = simdLoad(base + align * offset[1] + 2);
207 t5.simdInternal_ = vec_sub(t5.simdInternal_, t2);
208 t6.simdInternal_ = vec_sub(t6.simdInternal_, t4);
209 store(base + align * offset[1], t5);
210 store(base + align * offset[1] + 2, t6);
212 else
214 __vector double t1, t2;
215 SimdDouble t3, t4;
217 t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
218 t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
220 t3 = simdLoad(base + align * offset[0]);
221 t3.simdInternal_ = vec_sub(t3.simdInternal_, t1);
222 store(base + align * offset[0], t3);
223 base[align * offset[0] + 2] -= vec_extract(v2.simdInternal_, 0);
225 t4 = simdLoad(base + align * offset[1]);
226 t4.simdInternal_ = vec_sub(t4.simdInternal_, t2);
227 store(base + align * offset[1], t4);
228 base[align * offset[1] + 2] -= vec_extract(v2.simdInternal_, 1);
232 static inline void gmx_simdcall
233 expandScalarsToTriplets(SimdDouble scalar,
234 SimdDouble * triplets0,
235 SimdDouble * triplets1,
236 SimdDouble * triplets2)
238 triplets0->simdInternal_ = vec_mergeh(scalar.simdInternal_, scalar.simdInternal_);
239 triplets1->simdInternal_ = scalar.simdInternal_;
240 triplets2->simdInternal_ = vec_mergel(scalar.simdInternal_, scalar.simdInternal_);
243 template <int align>
244 static inline void gmx_simdcall
245 gatherLoadBySimdIntTranspose(const double * base,
246 SimdDInt32 offset,
247 SimdDouble * v0,
248 SimdDouble * v1,
249 SimdDouble * v2,
250 SimdDouble * v3)
252 alignas(GMX_SIMD_ALIGNMENT) std::int32_t ioffset[GMX_SIMD_DINT32_WIDTH];
254 store(ioffset, offset );
255 gatherLoadTranspose<align>(base, ioffset, v0, v1, v2, v3);
258 template <int align>
259 static inline void gmx_simdcall
260 gatherLoadBySimdIntTranspose(const double * base,
261 SimdDInt32 offset,
262 SimdDouble * v0,
263 SimdDouble * v1)
265 alignas(GMX_SIMD_ALIGNMENT) std::int32_t ioffset[GMX_SIMD_DINT32_WIDTH];
267 store(ioffset, offset );
268 gatherLoadTranspose<align>(base, ioffset, v0, v1);
272 template <int align>
273 static inline void gmx_simdcall
274 gatherLoadUBySimdIntTranspose(const double * base,
275 SimdDInt32 offset,
276 SimdDouble * v0,
277 SimdDouble * v1)
279 alignas(GMX_SIMD_ALIGNMENT) std::int32_t ioffset[GMX_SIMD_DINT32_WIDTH];
281 store(ioffset, offset);
283 SimdDouble t1 = simdLoadU(base + align * ioffset[0]);
284 SimdDouble t2 = simdLoadU(base + align * ioffset[1]);
285 v0->simdInternal_ = vec_mergeh(t1.simdInternal_, t2.simdInternal_);
286 v1->simdInternal_ = vec_mergel(t1.simdInternal_, t2.simdInternal_);
289 static inline double gmx_simdcall
290 reduceIncr4ReturnSum(double * m,
291 SimdDouble v0,
292 SimdDouble v1,
293 SimdDouble v2,
294 SimdDouble v3)
296 __vector double t1, t2, t3, t4;
298 t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
299 t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
300 t3 = vec_mergeh(v2.simdInternal_, v3.simdInternal_);
301 t4 = vec_mergel(v2.simdInternal_, v3.simdInternal_);
303 t1 = vec_add(t1, t2);
304 t3 = vec_add(t3, t4);
306 *reinterpret_cast<__vector double *>(m) += t1;
307 *reinterpret_cast<__vector double *>(m+2) += t3;
309 t1 = vec_add(t1, t3);
310 return reduce(t1);
313 } // namespace gmx
315 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H