2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H
38 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H
42 #include "gromacs/utility/basedefinitions.h"
44 #include "impl_ibm_vsx_definitions.h"
45 #include "impl_ibm_vsx_simd_double.h"
51 static inline void gmx_simdcall
gatherLoadTranspose(const double* base
,
52 const std::int32_t offset
[],
58 __vector
double t1
, t2
, t3
, t4
;
60 t1
= *reinterpret_cast<const __vector
double*>(base
+ align
* offset
[0]);
61 t2
= *reinterpret_cast<const __vector
double*>(base
+ align
* offset
[1]);
62 t3
= *reinterpret_cast<const __vector
double*>(base
+ align
* offset
[0] + 2);
63 t4
= *reinterpret_cast<const __vector
double*>(base
+ align
* offset
[1] + 2);
64 v0
->simdInternal_
= vec_mergeh(t1
, t2
);
65 v1
->simdInternal_
= vec_mergel(t1
, t2
);
66 v2
->simdInternal_
= vec_mergeh(t3
, t4
);
67 v3
->simdInternal_
= vec_mergel(t3
, t4
);
71 static inline void gmx_simdcall
72 gatherLoadTranspose(const double* base
, const std::int32_t offset
[], SimdDouble
* v0
, SimdDouble
* v1
)
74 __vector
double t1
, t2
;
76 t1
= *reinterpret_cast<const __vector
double*>(base
+ align
* offset
[0]);
77 t2
= *reinterpret_cast<const __vector
double*>(base
+ align
* offset
[1]);
78 v0
->simdInternal_
= vec_mergeh(t1
, t2
);
79 v1
->simdInternal_
= vec_mergel(t1
, t2
);
82 static const int c_simdBestPairAlignmentDouble
= 2;
85 static inline void gmx_simdcall
gatherLoadUTranspose(const double* base
,
86 const std::int32_t offset
[],
93 t1
= simdLoad(base
+ align
* offset
[0]);
94 t2
= simdLoad(base
+ align
* offset
[1]);
96 v0
->simdInternal_
= vec_mergeh(t1
.simdInternal_
, t2
.simdInternal_
);
97 v1
->simdInternal_
= vec_mergel(t1
.simdInternal_
, t2
.simdInternal_
);
98 v2
->simdInternal_
= vec_mergeh(vec_splats(*(base
+ align
* offset
[0] + 2)),
99 vec_splats(*(base
+ align
* offset
[1] + 2)));
102 // gcc-4.9 fails to recognize that the argument to vec_extract() is used
104 static inline void gmx_simdcall
transposeScatterStoreU(double* base
,
105 const std::int32_t offset
[],
108 SimdDouble gmx_unused v2
)
112 t1
.simdInternal_
= vec_mergeh(v0
.simdInternal_
, v1
.simdInternal_
);
113 t2
.simdInternal_
= vec_mergel(v0
.simdInternal_
, v1
.simdInternal_
);
115 store(base
+ align
* offset
[0], t1
);
116 base
[align
* offset
[0] + 2] = vec_extract(v2
.simdInternal_
, 0);
117 store(base
+ align
* offset
[1], t2
);
118 base
[align
* offset
[1] + 2] = vec_extract(v2
.simdInternal_
, 1);
122 static inline void gmx_simdcall
123 transposeScatterIncrU(double* base
, const std::int32_t offset
[], SimdDouble v0
, SimdDouble v1
, SimdDouble v2
)
127 __vector
double t1
, t2
, t3
, t4
;
128 SimdDouble t5
, t6
, t7
, t8
;
130 t1
= vec_mergeh(v0
.simdInternal_
, v1
.simdInternal_
);
131 t2
= vec_mergel(v0
.simdInternal_
, v1
.simdInternal_
);
132 t3
= vec_mergeh(v2
.simdInternal_
, vec_splats(0.0));
133 t4
= vec_mergel(v2
.simdInternal_
, vec_splats(0.0));
135 t5
= simdLoad(base
+ align
* offset
[0]);
136 t6
= simdLoad(base
+ align
* offset
[0] + 2);
137 t5
.simdInternal_
= vec_add(t5
.simdInternal_
, t1
);
138 t6
.simdInternal_
= vec_add(t6
.simdInternal_
, t3
);
139 store(base
+ align
* offset
[0], t5
);
140 store(base
+ align
* offset
[0] + 2, t6
);
142 t5
= simdLoad(base
+ align
* offset
[1]);
143 t6
= simdLoad(base
+ align
* offset
[1] + 2);
144 t5
.simdInternal_
= vec_add(t5
.simdInternal_
, t2
);
145 t6
.simdInternal_
= vec_add(t6
.simdInternal_
, t4
);
146 store(base
+ align
* offset
[1], t5
);
147 store(base
+ align
* offset
[1] + 2, t6
);
151 __vector
double t1
, t2
;
154 t1
= vec_mergeh(v0
.simdInternal_
, v1
.simdInternal_
);
155 t2
= vec_mergel(v0
.simdInternal_
, v1
.simdInternal_
);
157 t3
= simdLoad(base
+ align
* offset
[0]);
158 t3
.simdInternal_
= vec_add(t3
.simdInternal_
, t1
);
159 store(base
+ align
* offset
[0], t3
);
160 base
[align
* offset
[0] + 2] += vec_extract(v2
.simdInternal_
, 0);
162 t4
= simdLoad(base
+ align
* offset
[1]);
163 t4
.simdInternal_
= vec_add(t4
.simdInternal_
, t2
);
164 store(base
+ align
* offset
[1], t4
);
165 base
[align
* offset
[1] + 2] += vec_extract(v2
.simdInternal_
, 1);
170 static inline void gmx_simdcall
171 transposeScatterDecrU(double* base
, const std::int32_t offset
[], SimdDouble v0
, SimdDouble v1
, SimdDouble v2
)
175 __vector
double t1
, t2
, t3
, t4
;
176 SimdDouble t5
, t6
, t7
, t8
;
178 t1
= vec_mergeh(v0
.simdInternal_
, v1
.simdInternal_
);
179 t2
= vec_mergel(v0
.simdInternal_
, v1
.simdInternal_
);
180 t3
= vec_mergeh(v2
.simdInternal_
, vec_splats(0.0));
181 t4
= vec_mergel(v2
.simdInternal_
, vec_splats(0.0));
183 t5
= simdLoad(base
+ align
* offset
[0]);
184 t6
= simdLoad(base
+ align
* offset
[0] + 2);
185 t5
.simdInternal_
= vec_sub(t5
.simdInternal_
, t1
);
186 t6
.simdInternal_
= vec_sub(t6
.simdInternal_
, t3
);
187 store(base
+ align
* offset
[0], t5
);
188 store(base
+ align
* offset
[0] + 2, t6
);
190 t5
= simdLoad(base
+ align
* offset
[1]);
191 t6
= simdLoad(base
+ align
* offset
[1] + 2);
192 t5
.simdInternal_
= vec_sub(t5
.simdInternal_
, t2
);
193 t6
.simdInternal_
= vec_sub(t6
.simdInternal_
, t4
);
194 store(base
+ align
* offset
[1], t5
);
195 store(base
+ align
* offset
[1] + 2, t6
);
199 __vector
double t1
, t2
;
202 t1
= vec_mergeh(v0
.simdInternal_
, v1
.simdInternal_
);
203 t2
= vec_mergel(v0
.simdInternal_
, v1
.simdInternal_
);
205 t3
= simdLoad(base
+ align
* offset
[0]);
206 t3
.simdInternal_
= vec_sub(t3
.simdInternal_
, t1
);
207 store(base
+ align
* offset
[0], t3
);
208 base
[align
* offset
[0] + 2] -= vec_extract(v2
.simdInternal_
, 0);
210 t4
= simdLoad(base
+ align
* offset
[1]);
211 t4
.simdInternal_
= vec_sub(t4
.simdInternal_
, t2
);
212 store(base
+ align
* offset
[1], t4
);
213 base
[align
* offset
[1] + 2] -= vec_extract(v2
.simdInternal_
, 1);
217 static inline void gmx_simdcall
expandScalarsToTriplets(SimdDouble scalar
,
218 SimdDouble
* triplets0
,
219 SimdDouble
* triplets1
,
220 SimdDouble
* triplets2
)
222 triplets0
->simdInternal_
= vec_mergeh(scalar
.simdInternal_
, scalar
.simdInternal_
);
223 triplets1
->simdInternal_
= scalar
.simdInternal_
;
224 triplets2
->simdInternal_
= vec_mergel(scalar
.simdInternal_
, scalar
.simdInternal_
);
228 static inline void gmx_simdcall
gatherLoadBySimdIntTranspose(const double* base
,
235 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t ioffset
[GMX_SIMD_DINT32_WIDTH
];
237 store(ioffset
, offset
);
238 gatherLoadTranspose
<align
>(base
, ioffset
, v0
, v1
, v2
, v3
);
242 static inline void gmx_simdcall
243 gatherLoadBySimdIntTranspose(const double* base
, SimdDInt32 offset
, SimdDouble
* v0
, SimdDouble
* v1
)
245 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t ioffset
[GMX_SIMD_DINT32_WIDTH
];
247 store(ioffset
, offset
);
248 gatherLoadTranspose
<align
>(base
, ioffset
, v0
, v1
);
253 static inline void gmx_simdcall
254 gatherLoadUBySimdIntTranspose(const double* base
, SimdDInt32 offset
, SimdDouble
* v0
, SimdDouble
* v1
)
256 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t ioffset
[GMX_SIMD_DINT32_WIDTH
];
258 store(ioffset
, offset
);
260 SimdDouble t1
= simdLoadU(base
+ align
* ioffset
[0]);
261 SimdDouble t2
= simdLoadU(base
+ align
* ioffset
[1]);
262 v0
->simdInternal_
= vec_mergeh(t1
.simdInternal_
, t2
.simdInternal_
);
263 v1
->simdInternal_
= vec_mergel(t1
.simdInternal_
, t2
.simdInternal_
);
266 static inline double gmx_simdcall
267 reduceIncr4ReturnSum(double* m
, SimdDouble v0
, SimdDouble v1
, SimdDouble v2
, SimdDouble v3
)
269 __vector
double t1
, t2
, t3
, t4
;
271 t1
= vec_mergeh(v0
.simdInternal_
, v1
.simdInternal_
);
272 t2
= vec_mergel(v0
.simdInternal_
, v1
.simdInternal_
);
273 t3
= vec_mergeh(v2
.simdInternal_
, v3
.simdInternal_
);
274 t4
= vec_mergel(v2
.simdInternal_
, v3
.simdInternal_
);
276 t1
= vec_add(t1
, t2
);
277 t3
= vec_add(t3
, t4
);
279 *reinterpret_cast<__vector
double*>(m
) += t1
;
280 *reinterpret_cast<__vector
double*>(m
+ 2) += t3
;
282 t1
= vec_add(t1
, t3
);
288 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H