2 * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support
4 * Copyright (c) 2005 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 #define dh_alias_Reg ptr
29 #define dh_alias_XMMReg ptr
30 #define dh_alias_MMXReg ptr
31 #define dh_ctype_Reg Reg *
32 #define dh_ctype_XMMReg XMMReg *
33 #define dh_ctype_MMXReg MMXReg *
35 DEF_HELPER_2(glue(psrlw
, SUFFIX
), void, Reg
, Reg
)
36 DEF_HELPER_2(glue(psraw
, SUFFIX
), void, Reg
, Reg
)
37 DEF_HELPER_2(glue(psllw
, SUFFIX
), void, Reg
, Reg
)
38 DEF_HELPER_2(glue(psrld
, SUFFIX
), void, Reg
, Reg
)
39 DEF_HELPER_2(glue(psrad
, SUFFIX
), void, Reg
, Reg
)
40 DEF_HELPER_2(glue(pslld
, SUFFIX
), void, Reg
, Reg
)
41 DEF_HELPER_2(glue(psrlq
, SUFFIX
), void, Reg
, Reg
)
42 DEF_HELPER_2(glue(psllq
, SUFFIX
), void, Reg
, Reg
)
45 DEF_HELPER_2(glue(psrldq
, SUFFIX
), void, Reg
, Reg
)
46 DEF_HELPER_2(glue(pslldq
, SUFFIX
), void, Reg
, Reg
)
49 #define SSE_HELPER_B(name, F)\
50 DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
52 #define SSE_HELPER_W(name, F)\
53 DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
55 #define SSE_HELPER_L(name, F)\
56 DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
58 #define SSE_HELPER_Q(name, F)\
59 DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
61 SSE_HELPER_B(paddb
, FADD
)
62 SSE_HELPER_W(paddw
, FADD
)
63 SSE_HELPER_L(paddl
, FADD
)
64 SSE_HELPER_Q(paddq
, FADD
)
66 SSE_HELPER_B(psubb
, FSUB
)
67 SSE_HELPER_W(psubw
, FSUB
)
68 SSE_HELPER_L(psubl
, FSUB
)
69 SSE_HELPER_Q(psubq
, FSUB
)
71 SSE_HELPER_B(paddusb
, FADDUB
)
72 SSE_HELPER_B(paddsb
, FADDSB
)
73 SSE_HELPER_B(psubusb
, FSUBUB
)
74 SSE_HELPER_B(psubsb
, FSUBSB
)
76 SSE_HELPER_W(paddusw
, FADDUW
)
77 SSE_HELPER_W(paddsw
, FADDSW
)
78 SSE_HELPER_W(psubusw
, FSUBUW
)
79 SSE_HELPER_W(psubsw
, FSUBSW
)
81 SSE_HELPER_B(pminub
, FMINUB
)
82 SSE_HELPER_B(pmaxub
, FMAXUB
)
84 SSE_HELPER_W(pminsw
, FMINSW
)
85 SSE_HELPER_W(pmaxsw
, FMAXSW
)
87 SSE_HELPER_Q(pand
, FAND
)
88 SSE_HELPER_Q(pandn
, FANDN
)
89 SSE_HELPER_Q(por
, FOR
)
90 SSE_HELPER_Q(pxor
, FXOR
)
92 SSE_HELPER_B(pcmpgtb
, FCMPGTB
)
93 SSE_HELPER_W(pcmpgtw
, FCMPGTW
)
94 SSE_HELPER_L(pcmpgtl
, FCMPGTL
)
96 SSE_HELPER_B(pcmpeqb
, FCMPEQ
)
97 SSE_HELPER_W(pcmpeqw
, FCMPEQ
)
98 SSE_HELPER_L(pcmpeql
, FCMPEQ
)
100 SSE_HELPER_W(pmullw
, FMULLW
)
102 SSE_HELPER_W(pmulhrw
, FMULHRW
)
104 SSE_HELPER_W(pmulhuw
, FMULHUW
)
105 SSE_HELPER_W(pmulhw
, FMULHW
)
107 SSE_HELPER_B(pavgb
, FAVG
)
108 SSE_HELPER_W(pavgw
, FAVG
)
110 DEF_HELPER_2(glue(pmuludq
, SUFFIX
), void, Reg
, Reg
)
111 DEF_HELPER_2(glue(pmaddwd
, SUFFIX
), void, Reg
, Reg
)
113 DEF_HELPER_2(glue(psadbw
, SUFFIX
), void, Reg
, Reg
)
114 DEF_HELPER_3(glue(maskmov
, SUFFIX
), void, Reg
, Reg
, tl
)
115 DEF_HELPER_2(glue(movl_mm_T0
, SUFFIX
), void, Reg
, i32
)
117 DEF_HELPER_2(glue(movq_mm_T0
, SUFFIX
), void, Reg
, i64
)
121 DEF_HELPER_3(glue(pshufw
, SUFFIX
), void, Reg
, Reg
, int)
123 DEF_HELPER_3(shufps
, void, Reg
, Reg
, int)
124 DEF_HELPER_3(shufpd
, void, Reg
, Reg
, int)
125 DEF_HELPER_3(glue(pshufd
, SUFFIX
), void, Reg
, Reg
, int)
126 DEF_HELPER_3(glue(pshuflw
, SUFFIX
), void, Reg
, Reg
, int)
127 DEF_HELPER_3(glue(pshufhw
, SUFFIX
), void, Reg
, Reg
, int)
132 /* XXX: not accurate */
134 #define SSE_HELPER_S(name, F)\
135 DEF_HELPER_2(name ## ps , void, Reg, Reg) \
136 DEF_HELPER_2(name ## ss , void, Reg, Reg) \
137 DEF_HELPER_2(name ## pd , void, Reg, Reg) \
138 DEF_HELPER_2(name ## sd , void, Reg, Reg)
140 SSE_HELPER_S(add
, FPU_ADD
)
141 SSE_HELPER_S(sub
, FPU_SUB
)
142 SSE_HELPER_S(mul
, FPU_MUL
)
143 SSE_HELPER_S(div
, FPU_DIV
)
144 SSE_HELPER_S(min
, FPU_MIN
)
145 SSE_HELPER_S(max
, FPU_MAX
)
146 SSE_HELPER_S(sqrt
, FPU_SQRT
)
149 DEF_HELPER_2(cvtps2pd
, void, Reg
, Reg
)
150 DEF_HELPER_2(cvtpd2ps
, void, Reg
, Reg
)
151 DEF_HELPER_2(cvtss2sd
, void, Reg
, Reg
)
152 DEF_HELPER_2(cvtsd2ss
, void, Reg
, Reg
)
153 DEF_HELPER_2(cvtdq2ps
, void, Reg
, Reg
)
154 DEF_HELPER_2(cvtdq2pd
, void, Reg
, Reg
)
155 DEF_HELPER_2(cvtpi2ps
, void, XMMReg
, MMXReg
)
156 DEF_HELPER_2(cvtpi2pd
, void, XMMReg
, MMXReg
)
157 DEF_HELPER_2(cvtsi2ss
, void, XMMReg
, i32
)
158 DEF_HELPER_2(cvtsi2sd
, void, XMMReg
, i32
)
161 DEF_HELPER_2(cvtsq2ss
, void, XMMReg
, i64
)
162 DEF_HELPER_2(cvtsq2sd
, void, XMMReg
, i64
)
165 DEF_HELPER_2(cvtps2dq
, void, XMMReg
, XMMReg
)
166 DEF_HELPER_2(cvtpd2dq
, void, XMMReg
, XMMReg
)
167 DEF_HELPER_2(cvtps2pi
, void, MMXReg
, XMMReg
)
168 DEF_HELPER_2(cvtpd2pi
, void, MMXReg
, XMMReg
)
169 DEF_HELPER_1(cvtss2si
, s32
, XMMReg
)
170 DEF_HELPER_1(cvtsd2si
, s32
, XMMReg
)
172 DEF_HELPER_1(cvtss2sq
, s64
, XMMReg
)
173 DEF_HELPER_1(cvtsd2sq
, s64
, XMMReg
)
176 DEF_HELPER_2(cvttps2dq
, void, XMMReg
, XMMReg
)
177 DEF_HELPER_2(cvttpd2dq
, void, XMMReg
, XMMReg
)
178 DEF_HELPER_2(cvttps2pi
, void, MMXReg
, XMMReg
)
179 DEF_HELPER_2(cvttpd2pi
, void, MMXReg
, XMMReg
)
180 DEF_HELPER_1(cvttss2si
, s32
, XMMReg
)
181 DEF_HELPER_1(cvttsd2si
, s32
, XMMReg
)
183 DEF_HELPER_1(cvttss2sq
, s64
, XMMReg
)
184 DEF_HELPER_1(cvttsd2sq
, s64
, XMMReg
)
187 DEF_HELPER_2(rsqrtps
, void, XMMReg
, XMMReg
)
188 DEF_HELPER_2(rsqrtss
, void, XMMReg
, XMMReg
)
189 DEF_HELPER_2(rcpps
, void, XMMReg
, XMMReg
)
190 DEF_HELPER_2(rcpss
, void, XMMReg
, XMMReg
)
191 DEF_HELPER_2(haddps
, void, XMMReg
, XMMReg
)
192 DEF_HELPER_2(haddpd
, void, XMMReg
, XMMReg
)
193 DEF_HELPER_2(hsubps
, void, XMMReg
, XMMReg
)
194 DEF_HELPER_2(hsubpd
, void, XMMReg
, XMMReg
)
195 DEF_HELPER_2(addsubps
, void, XMMReg
, XMMReg
)
196 DEF_HELPER_2(addsubpd
, void, XMMReg
, XMMReg
)
198 #define SSE_HELPER_CMP(name, F)\
199 DEF_HELPER_2( name ## ps , void, Reg, Reg) \
200 DEF_HELPER_2( name ## ss , void, Reg, Reg) \
201 DEF_HELPER_2( name ## pd , void, Reg, Reg) \
202 DEF_HELPER_2( name ## sd , void, Reg, Reg)
204 SSE_HELPER_CMP(cmpeq
, FPU_CMPEQ
)
205 SSE_HELPER_CMP(cmplt
, FPU_CMPLT
)
206 SSE_HELPER_CMP(cmple
, FPU_CMPLE
)
207 SSE_HELPER_CMP(cmpunord
, FPU_CMPUNORD
)
208 SSE_HELPER_CMP(cmpneq
, FPU_CMPNEQ
)
209 SSE_HELPER_CMP(cmpnlt
, FPU_CMPNLT
)
210 SSE_HELPER_CMP(cmpnle
, FPU_CMPNLE
)
211 SSE_HELPER_CMP(cmpord
, FPU_CMPORD
)
213 DEF_HELPER_2(ucomiss
, void, Reg
, Reg
)
214 DEF_HELPER_2(comiss
, void, Reg
, Reg
)
215 DEF_HELPER_2(ucomisd
, void, Reg
, Reg
)
216 DEF_HELPER_2(comisd
, void, Reg
, Reg
)
217 DEF_HELPER_1(movmskps
, i32
, Reg
)
218 DEF_HELPER_1(movmskpd
, i32
, Reg
)
221 DEF_HELPER_1(glue(pmovmskb
, SUFFIX
), i32
, Reg
)
222 DEF_HELPER_2(glue(packsswb
, SUFFIX
), void, Reg
, Reg
)
223 DEF_HELPER_2(glue(packuswb
, SUFFIX
), void, Reg
, Reg
)
224 DEF_HELPER_2(glue(packssdw
, SUFFIX
), void, Reg
, Reg
)
225 #define UNPCK_OP(base_name, base) \
226 DEF_HELPER_2(glue(punpck ## base_name ## bw, SUFFIX) , void, Reg, Reg) \
227 DEF_HELPER_2(glue(punpck ## base_name ## wd, SUFFIX) , void, Reg, Reg) \
228 DEF_HELPER_2(glue(punpck ## base_name ## dq, SUFFIX) , void, Reg, Reg)
234 DEF_HELPER_2(glue(punpcklqdq
, SUFFIX
), void, Reg
, Reg
)
235 DEF_HELPER_2(glue(punpckhqdq
, SUFFIX
), void, Reg
, Reg
)
238 /* 3DNow! float ops */
240 DEF_HELPER_2(pi2fd
, void, MMXReg
, MMXReg
)
241 DEF_HELPER_2(pi2fw
, void, MMXReg
, MMXReg
)
242 DEF_HELPER_2(pf2id
, void, MMXReg
, MMXReg
)
243 DEF_HELPER_2(pf2iw
, void, MMXReg
, MMXReg
)
244 DEF_HELPER_2(pfacc
, void, MMXReg
, MMXReg
)
245 DEF_HELPER_2(pfadd
, void, MMXReg
, MMXReg
)
246 DEF_HELPER_2(pfcmpeq
, void, MMXReg
, MMXReg
)
247 DEF_HELPER_2(pfcmpge
, void, MMXReg
, MMXReg
)
248 DEF_HELPER_2(pfcmpgt
, void, MMXReg
, MMXReg
)
249 DEF_HELPER_2(pfmax
, void, MMXReg
, MMXReg
)
250 DEF_HELPER_2(pfmin
, void, MMXReg
, MMXReg
)
251 DEF_HELPER_2(pfmul
, void, MMXReg
, MMXReg
)
252 DEF_HELPER_2(pfnacc
, void, MMXReg
, MMXReg
)
253 DEF_HELPER_2(pfpnacc
, void, MMXReg
, MMXReg
)
254 DEF_HELPER_2(pfrcp
, void, MMXReg
, MMXReg
)
255 DEF_HELPER_2(pfrsqrt
, void, MMXReg
, MMXReg
)
256 DEF_HELPER_2(pfsub
, void, MMXReg
, MMXReg
)
257 DEF_HELPER_2(pfsubr
, void, MMXReg
, MMXReg
)
258 DEF_HELPER_2(pswapd
, void, MMXReg
, MMXReg
)
261 /* SSSE3 op helpers */
262 DEF_HELPER_2(glue(phaddw
, SUFFIX
), void, Reg
, Reg
)
263 DEF_HELPER_2(glue(phaddd
, SUFFIX
), void, Reg
, Reg
)
264 DEF_HELPER_2(glue(phaddsw
, SUFFIX
), void, Reg
, Reg
)
265 DEF_HELPER_2(glue(phsubw
, SUFFIX
), void, Reg
, Reg
)
266 DEF_HELPER_2(glue(phsubd
, SUFFIX
), void, Reg
, Reg
)
267 DEF_HELPER_2(glue(phsubsw
, SUFFIX
), void, Reg
, Reg
)
268 DEF_HELPER_2(glue(pabsb
, SUFFIX
), void, Reg
, Reg
)
269 DEF_HELPER_2(glue(pabsw
, SUFFIX
), void, Reg
, Reg
)
270 DEF_HELPER_2(glue(pabsd
, SUFFIX
), void, Reg
, Reg
)
271 DEF_HELPER_2(glue(pmaddubsw
, SUFFIX
), void, Reg
, Reg
)
272 DEF_HELPER_2(glue(pmulhrsw
, SUFFIX
), void, Reg
, Reg
)
273 DEF_HELPER_2(glue(pshufb
, SUFFIX
), void, Reg
, Reg
)
274 DEF_HELPER_2(glue(psignb
, SUFFIX
), void, Reg
, Reg
)
275 DEF_HELPER_2(glue(psignw
, SUFFIX
), void, Reg
, Reg
)
276 DEF_HELPER_2(glue(psignd
, SUFFIX
), void, Reg
, Reg
)
277 DEF_HELPER_3(glue(palignr
, SUFFIX
), void, Reg
, Reg
, s32
)
279 /* SSE4.1 op helpers */
281 DEF_HELPER_2(glue(pblendvb
, SUFFIX
), void, Reg
, Reg
)
282 DEF_HELPER_2(glue(blendvps
, SUFFIX
), void, Reg
, Reg
)
283 DEF_HELPER_2(glue(blendvpd
, SUFFIX
), void, Reg
, Reg
)
284 DEF_HELPER_2(glue(ptest
, SUFFIX
), void, Reg
, Reg
)
285 DEF_HELPER_2(glue(pmovsxbw
, SUFFIX
), void, Reg
, Reg
)
286 DEF_HELPER_2(glue(pmovsxbd
, SUFFIX
), void, Reg
, Reg
)
287 DEF_HELPER_2(glue(pmovsxbq
, SUFFIX
), void, Reg
, Reg
)
288 DEF_HELPER_2(glue(pmovsxwd
, SUFFIX
), void, Reg
, Reg
)
289 DEF_HELPER_2(glue(pmovsxwq
, SUFFIX
), void, Reg
, Reg
)
290 DEF_HELPER_2(glue(pmovsxdq
, SUFFIX
), void, Reg
, Reg
)
291 DEF_HELPER_2(glue(pmovzxbw
, SUFFIX
), void, Reg
, Reg
)
292 DEF_HELPER_2(glue(pmovzxbd
, SUFFIX
), void, Reg
, Reg
)
293 DEF_HELPER_2(glue(pmovzxbq
, SUFFIX
), void, Reg
, Reg
)
294 DEF_HELPER_2(glue(pmovzxwd
, SUFFIX
), void, Reg
, Reg
)
295 DEF_HELPER_2(glue(pmovzxwq
, SUFFIX
), void, Reg
, Reg
)
296 DEF_HELPER_2(glue(pmovzxdq
, SUFFIX
), void, Reg
, Reg
)
297 DEF_HELPER_2(glue(pmuldq
, SUFFIX
), void, Reg
, Reg
)
298 DEF_HELPER_2(glue(pcmpeqq
, SUFFIX
), void, Reg
, Reg
)
299 DEF_HELPER_2(glue(packusdw
, SUFFIX
), void, Reg
, Reg
)
300 DEF_HELPER_2(glue(pminsb
, SUFFIX
), void, Reg
, Reg
)
301 DEF_HELPER_2(glue(pminsd
, SUFFIX
), void, Reg
, Reg
)
302 DEF_HELPER_2(glue(pminuw
, SUFFIX
), void, Reg
, Reg
)
303 DEF_HELPER_2(glue(pminud
, SUFFIX
), void, Reg
, Reg
)
304 DEF_HELPER_2(glue(pmaxsb
, SUFFIX
), void, Reg
, Reg
)
305 DEF_HELPER_2(glue(pmaxsd
, SUFFIX
), void, Reg
, Reg
)
306 DEF_HELPER_2(glue(pmaxuw
, SUFFIX
), void, Reg
, Reg
)
307 DEF_HELPER_2(glue(pmaxud
, SUFFIX
), void, Reg
, Reg
)
308 DEF_HELPER_2(glue(pmulld
, SUFFIX
), void, Reg
, Reg
)
309 DEF_HELPER_2(glue(phminposuw
, SUFFIX
), void, Reg
, Reg
)
310 DEF_HELPER_3(glue(roundps
, SUFFIX
), void, Reg
, Reg
, i32
)
311 DEF_HELPER_3(glue(roundpd
, SUFFIX
), void, Reg
, Reg
, i32
)
312 DEF_HELPER_3(glue(roundss
, SUFFIX
), void, Reg
, Reg
, i32
)
313 DEF_HELPER_3(glue(roundsd
, SUFFIX
), void, Reg
, Reg
, i32
)
314 DEF_HELPER_3(glue(blendps
, SUFFIX
), void, Reg
, Reg
, i32
)
315 DEF_HELPER_3(glue(blendpd
, SUFFIX
), void, Reg
, Reg
, i32
)
316 DEF_HELPER_3(glue(pblendw
, SUFFIX
), void, Reg
, Reg
, i32
)
317 DEF_HELPER_3(glue(dpps
, SUFFIX
), void, Reg
, Reg
, i32
)
318 DEF_HELPER_3(glue(dppd
, SUFFIX
), void, Reg
, Reg
, i32
)
319 DEF_HELPER_3(glue(mpsadbw
, SUFFIX
), void, Reg
, Reg
, i32
)
322 /* SSE4.2 op helpers */
324 DEF_HELPER_2(glue(pcmpgtq
, SUFFIX
), void, Reg
, Reg
)
325 DEF_HELPER_3(glue(pcmpestri
, SUFFIX
), void, Reg
, Reg
, i32
)
326 DEF_HELPER_3(glue(pcmpestrm
, SUFFIX
), void, Reg
, Reg
, i32
)
327 DEF_HELPER_3(glue(pcmpistri
, SUFFIX
), void, Reg
, Reg
, i32
)
328 DEF_HELPER_3(glue(pcmpistrm
, SUFFIX
), void, Reg
, Reg
, i32
)
329 DEF_HELPER_3(crc32
, tl
, i32
, tl
, i32
)
330 DEF_HELPER_2(popcnt
, tl
, tl
, i32
)
342 #undef SSE_HELPER_CMP