2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H
37 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H
41 #include "gromacs/utility/basedefinitions.h"
43 #include "impl_ibm_vsx_definitions.h"
44 #include "impl_ibm_vsx_simd_double.h"
50 static inline void gmx_simdcall
51 gatherLoadTranspose(const double * base
,
52 const std::int32_t offset
[],
58 __vector
double t1
, t2
, t3
, t4
;
60 t1
= *reinterpret_cast<const __vector
double *>(base
+ align
* offset
[0]);
61 t2
= *reinterpret_cast<const __vector
double *>(base
+ align
* offset
[1]);
62 t3
= *reinterpret_cast<const __vector
double *>(base
+ align
* offset
[0] + 2);
63 t4
= *reinterpret_cast<const __vector
double *>(base
+ align
* offset
[1] + 2);
64 v0
->simdInternal_
= vec_mergeh(t1
, t2
);
65 v1
->simdInternal_
= vec_mergel(t1
, t2
);
66 v2
->simdInternal_
= vec_mergeh(t3
, t4
);
67 v3
->simdInternal_
= vec_mergel(t3
, t4
);
72 static inline void gmx_simdcall
73 gatherLoadTranspose(const double * base
,
74 const std::int32_t offset
[],
78 __vector
double t1
, t2
;
80 t1
= *reinterpret_cast<const __vector
double *>(base
+ align
* offset
[0]);
81 t2
= *reinterpret_cast<const __vector
double *>(base
+ align
* offset
[1]);
82 v0
->simdInternal_
= vec_mergeh(t1
, t2
);
83 v1
->simdInternal_
= vec_mergel(t1
, t2
);
87 static const int c_simdBestPairAlignmentDouble
= 2;
90 static inline void gmx_simdcall
91 gatherLoadUTranspose(const double * base
,
92 const std::int32_t offset
[],
99 t1
= simdLoad(base
+ align
* offset
[0]);
100 t2
= simdLoad(base
+ align
* offset
[1]);
102 v0
->simdInternal_
= vec_mergeh(t1
.simdInternal_
, t2
.simdInternal_
);
103 v1
->simdInternal_
= vec_mergel(t1
.simdInternal_
, t2
.simdInternal_
);
104 v2
->simdInternal_
= vec_mergeh(vec_splats(*(base
+ align
* offset
[0] + 2)),
105 vec_splats(*(base
+ align
* offset
[1] + 2)));
108 // gcc-4.9 fails to recognize that the argument to vec_extract() is used
110 static inline void gmx_simdcall
111 transposeScatterStoreU(double * base
,
112 const std::int32_t offset
[],
115 SimdDouble gmx_unused v2
)
119 t1
.simdInternal_
= vec_mergeh(v0
.simdInternal_
, v1
.simdInternal_
);
120 t2
.simdInternal_
= vec_mergel(v0
.simdInternal_
, v1
.simdInternal_
);
122 store(base
+ align
* offset
[0], t1
);
123 base
[align
* offset
[0] + 2] = vec_extract(v2
.simdInternal_
, 0);
124 store(base
+ align
* offset
[1], t2
);
125 base
[align
* offset
[1] + 2] = vec_extract(v2
.simdInternal_
, 1);
129 static inline void gmx_simdcall
130 transposeScatterIncrU(double * base
,
131 const std::int32_t offset
[],
138 __vector
double t1
, t2
, t3
, t4
;
139 SimdDouble t5
, t6
, t7
, t8
;
141 t1
= vec_mergeh(v0
.simdInternal_
, v1
.simdInternal_
);
142 t2
= vec_mergel(v0
.simdInternal_
, v1
.simdInternal_
);
143 t3
= vec_mergeh(v2
.simdInternal_
, vec_splats(0.0));
144 t4
= vec_mergel(v2
.simdInternal_
, vec_splats(0.0));
146 t5
= simdLoad(base
+ align
* offset
[0]);
147 t6
= simdLoad(base
+ align
* offset
[0] + 2);
148 t5
.simdInternal_
= vec_add(t5
.simdInternal_
, t1
);
149 t6
.simdInternal_
= vec_add(t6
.simdInternal_
, t3
);
150 store(base
+ align
* offset
[0], t5
);
151 store(base
+ align
* offset
[0] + 2, t6
);
153 t5
= simdLoad(base
+ align
* offset
[1]);
154 t6
= simdLoad(base
+ align
* offset
[1] + 2);
155 t5
.simdInternal_
= vec_add(t5
.simdInternal_
, t2
);
156 t6
.simdInternal_
= vec_add(t6
.simdInternal_
, t4
);
157 store(base
+ align
* offset
[1], t5
);
158 store(base
+ align
* offset
[1] + 2, t6
);
162 __vector
double t1
, t2
;
165 t1
= vec_mergeh(v0
.simdInternal_
, v1
.simdInternal_
);
166 t2
= vec_mergel(v0
.simdInternal_
, v1
.simdInternal_
);
168 t3
= simdLoad(base
+ align
* offset
[0]);
169 t3
.simdInternal_
= vec_add(t3
.simdInternal_
, t1
);
170 store(base
+ align
* offset
[0], t3
);
171 base
[align
* offset
[0] + 2] += vec_extract(v2
.simdInternal_
, 0);
173 t4
= simdLoad(base
+ align
* offset
[1]);
174 t4
.simdInternal_
= vec_add(t4
.simdInternal_
, t2
);
175 store(base
+ align
* offset
[1], t4
);
176 base
[align
* offset
[1] + 2] += vec_extract(v2
.simdInternal_
, 1);
181 static inline void gmx_simdcall
182 transposeScatterDecrU(double * base
,
183 const std::int32_t offset
[],
190 __vector
double t1
, t2
, t3
, t4
;
191 SimdDouble t5
, t6
, t7
, t8
;
193 t1
= vec_mergeh(v0
.simdInternal_
, v1
.simdInternal_
);
194 t2
= vec_mergel(v0
.simdInternal_
, v1
.simdInternal_
);
195 t3
= vec_mergeh(v2
.simdInternal_
, vec_splats(0.0));
196 t4
= vec_mergel(v2
.simdInternal_
, vec_splats(0.0));
198 t5
= simdLoad(base
+ align
* offset
[0]);
199 t6
= simdLoad(base
+ align
* offset
[0] + 2);
200 t5
.simdInternal_
= vec_sub(t5
.simdInternal_
, t1
);
201 t6
.simdInternal_
= vec_sub(t6
.simdInternal_
, t3
);
202 store(base
+ align
* offset
[0], t5
);
203 store(base
+ align
* offset
[0] + 2, t6
);
205 t5
= simdLoad(base
+ align
* offset
[1]);
206 t6
= simdLoad(base
+ align
* offset
[1] + 2);
207 t5
.simdInternal_
= vec_sub(t5
.simdInternal_
, t2
);
208 t6
.simdInternal_
= vec_sub(t6
.simdInternal_
, t4
);
209 store(base
+ align
* offset
[1], t5
);
210 store(base
+ align
* offset
[1] + 2, t6
);
214 __vector
double t1
, t2
;
217 t1
= vec_mergeh(v0
.simdInternal_
, v1
.simdInternal_
);
218 t2
= vec_mergel(v0
.simdInternal_
, v1
.simdInternal_
);
220 t3
= simdLoad(base
+ align
* offset
[0]);
221 t3
.simdInternal_
= vec_sub(t3
.simdInternal_
, t1
);
222 store(base
+ align
* offset
[0], t3
);
223 base
[align
* offset
[0] + 2] -= vec_extract(v2
.simdInternal_
, 0);
225 t4
= simdLoad(base
+ align
* offset
[1]);
226 t4
.simdInternal_
= vec_sub(t4
.simdInternal_
, t2
);
227 store(base
+ align
* offset
[1], t4
);
228 base
[align
* offset
[1] + 2] -= vec_extract(v2
.simdInternal_
, 1);
232 static inline void gmx_simdcall
233 expandScalarsToTriplets(SimdDouble scalar
,
234 SimdDouble
* triplets0
,
235 SimdDouble
* triplets1
,
236 SimdDouble
* triplets2
)
238 triplets0
->simdInternal_
= vec_mergeh(scalar
.simdInternal_
, scalar
.simdInternal_
);
239 triplets1
->simdInternal_
= scalar
.simdInternal_
;
240 triplets2
->simdInternal_
= vec_mergel(scalar
.simdInternal_
, scalar
.simdInternal_
);
244 static inline void gmx_simdcall
245 gatherLoadBySimdIntTranspose(const double * base
,
252 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t ioffset
[GMX_SIMD_DINT32_WIDTH
];
254 store(ioffset
, offset
);
255 gatherLoadTranspose
<align
>(base
, ioffset
, v0
, v1
, v2
, v3
);
259 static inline void gmx_simdcall
260 gatherLoadBySimdIntTranspose(const double * base
,
265 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t ioffset
[GMX_SIMD_DINT32_WIDTH
];
267 store(ioffset
, offset
);
268 gatherLoadTranspose
<align
>(base
, ioffset
, v0
, v1
);
273 static inline void gmx_simdcall
274 gatherLoadUBySimdIntTranspose(const double * base
,
279 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t ioffset
[GMX_SIMD_DINT32_WIDTH
];
281 store(ioffset
, offset
);
283 SimdDouble t1
= simdLoadU(base
+ align
* ioffset
[0]);
284 SimdDouble t2
= simdLoadU(base
+ align
* ioffset
[1]);
285 v0
->simdInternal_
= vec_mergeh(t1
.simdInternal_
, t2
.simdInternal_
);
286 v1
->simdInternal_
= vec_mergel(t1
.simdInternal_
, t2
.simdInternal_
);
289 static inline double gmx_simdcall
290 reduceIncr4ReturnSum(double * m
,
296 __vector
double t1
, t2
, t3
, t4
;
298 t1
= vec_mergeh(v0
.simdInternal_
, v1
.simdInternal_
);
299 t2
= vec_mergel(v0
.simdInternal_
, v1
.simdInternal_
);
300 t3
= vec_mergeh(v2
.simdInternal_
, v3
.simdInternal_
);
301 t4
= vec_mergel(v2
.simdInternal_
, v3
.simdInternal_
);
303 t1
= vec_add(t1
, t2
);
304 t3
= vec_add(t3
, t4
);
306 *reinterpret_cast<__vector
double *>(m
) += t1
;
307 *reinterpret_cast<__vector
double *>(m
+2) += t3
;
309 t1
= vec_add(t1
, t3
);
315 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H