1 /////////////////////////////////////////////////////////////////////////
2 // $Id: sse.cc,v 1.63 2008/09/19 19:18:57 sshwarts Exp $
3 /////////////////////////////////////////////////////////////////////////
5 // Copyright (c) 2003 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 /////////////////////////////////////////////////////////////////////////
24 #define NEED_CPU_REG_SHORTCUTS 1
27 #define LOG_THIS BX_CPU_THIS_PTR
29 /* ********************************************** */
30 /* SSE Integer Operations (128bit MMX extensions) */
31 /* ********************************************** */
34 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
37 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSHUFB_VdqWdq(bxInstruction_c
*i
)
39 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
40 BX_CPU_THIS_PTR
prepareSSE();
42 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
44 /* op2 is a register or memory reference */
46 op2
= BX_READ_XMM_REG(i
->rm());
49 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
50 /* pointer, segment address pair */
51 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
54 for(unsigned j
=0; j
<16; j
++)
56 unsigned mask
= op2
.xmmubyte(j
);
58 result
.xmmubyte(j
) = 0;
60 result
.xmmubyte(j
) = op1
.xmmubyte(mask
& 0xf);
63 BX_WRITE_XMM_REG(i
->nnn(), result
);
65 BX_INFO(("PSHUFB_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
66 exception(BX_UD_EXCEPTION
, 0, 0);
71 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDW_VdqWdq(bxInstruction_c
*i
)
73 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
74 BX_CPU_THIS_PTR
prepareSSE();
76 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
78 /* op2 is a register or memory reference */
80 op2
= BX_READ_XMM_REG(i
->rm());
83 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
84 /* pointer, segment address pair */
85 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
88 result
.xmm16u(0) = op1
.xmm16u(0) + op1
.xmm16u(1);
89 result
.xmm16u(1) = op1
.xmm16u(2) + op1
.xmm16u(3);
90 result
.xmm16u(2) = op1
.xmm16u(4) + op1
.xmm16u(5);
91 result
.xmm16u(3) = op1
.xmm16u(6) + op1
.xmm16u(7);
93 result
.xmm16u(4) = op2
.xmm16u(0) + op2
.xmm16u(1);
94 result
.xmm16u(5) = op2
.xmm16u(2) + op2
.xmm16u(3);
95 result
.xmm16u(6) = op2
.xmm16u(4) + op2
.xmm16u(5);
96 result
.xmm16u(7) = op2
.xmm16u(6) + op2
.xmm16u(7);
98 BX_WRITE_XMM_REG(i
->nnn(), result
);
100 BX_INFO(("PHADDW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
101 exception(BX_UD_EXCEPTION
, 0, 0);
106 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDD_VdqWdq(bxInstruction_c
*i
)
108 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
109 BX_CPU_THIS_PTR
prepareSSE();
111 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
113 /* op2 is a register or memory reference */
115 op2
= BX_READ_XMM_REG(i
->rm());
118 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
119 /* pointer, segment address pair */
120 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
123 result
.xmm32u(0) = op1
.xmm32u(0) + op1
.xmm32u(1);
124 result
.xmm32u(1) = op1
.xmm32u(2) + op1
.xmm32u(3);
125 result
.xmm32u(2) = op2
.xmm32u(0) + op2
.xmm32u(1);
126 result
.xmm32u(3) = op2
.xmm32u(2) + op2
.xmm32u(3);
128 BX_WRITE_XMM_REG(i
->nnn(), result
);
130 BX_INFO(("PHADDD_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
131 exception(BX_UD_EXCEPTION
, 0, 0);
136 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDSW_VdqWdq(bxInstruction_c
*i
)
138 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
139 BX_CPU_THIS_PTR
prepareSSE();
141 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
143 /* op2 is a register or memory reference */
145 op2
= BX_READ_XMM_REG(i
->rm());
148 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
149 /* pointer, segment address pair */
150 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
153 result
.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(0)) + Bit32s(op1
.xmm16s(1)));
154 result
.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(2)) + Bit32s(op1
.xmm16s(3)));
155 result
.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(4)) + Bit32s(op1
.xmm16s(5)));
156 result
.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(6)) + Bit32s(op1
.xmm16s(7)));
158 result
.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(0)) + Bit32s(op2
.xmm16s(1)));
159 result
.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(2)) + Bit32s(op2
.xmm16s(3)));
160 result
.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(4)) + Bit32s(op2
.xmm16s(5)));
161 result
.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(6)) + Bit32s(op2
.xmm16s(7)));
163 /* now write result back to destination */
164 BX_WRITE_XMM_REG(i
->nnn(), result
);
166 BX_INFO(("PHADDSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
167 exception(BX_UD_EXCEPTION
, 0, 0);
172 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMADDUBSW_VdqWdq(bxInstruction_c
*i
)
174 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
175 BX_CPU_THIS_PTR
prepareSSE();
177 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
179 /* op2 is a register or memory reference */
181 op2
= BX_READ_XMM_REG(i
->rm());
184 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
185 /* pointer, segment address pair */
186 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
189 for(unsigned j
=0; j
<8; j
++)
191 Bit32s temp
= Bit32s(op1
.xmmubyte(j
*2+0))*Bit32s(op2
.xmmsbyte(j
*2+0)) +
192 Bit32s(op1
.xmmubyte(j
*2+1))*Bit32s(op2
.xmmsbyte(j
*2+1));
194 result
.xmm16s(j
) = SaturateDwordSToWordS(temp
);
197 /* now write result back to destination */
198 BX_WRITE_XMM_REG(i
->nnn(), result
);
200 BX_INFO(("PMADDUBSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
201 exception(BX_UD_EXCEPTION
, 0, 0);
206 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBSW_VdqWdq(bxInstruction_c
*i
)
208 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
209 BX_CPU_THIS_PTR
prepareSSE();
211 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
213 /* op2 is a register or memory reference */
215 op2
= BX_READ_XMM_REG(i
->rm());
218 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
219 /* pointer, segment address pair */
220 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
223 result
.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(0)) - Bit32s(op1
.xmm16s(1)));
224 result
.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(2)) - Bit32s(op1
.xmm16s(3)));
225 result
.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(4)) - Bit32s(op1
.xmm16s(5)));
226 result
.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(6)) - Bit32s(op1
.xmm16s(7)));
228 result
.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(0)) - Bit32s(op2
.xmm16s(1)));
229 result
.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(2)) - Bit32s(op2
.xmm16s(3)));
230 result
.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(4)) - Bit32s(op2
.xmm16s(5)));
231 result
.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(6)) - Bit32s(op2
.xmm16s(7)));
233 /* now write result back to destination */
234 BX_WRITE_XMM_REG(i
->nnn(), result
);
236 BX_INFO(("PHSUBSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
237 exception(BX_UD_EXCEPTION
, 0, 0);
242 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBW_VdqWdq(bxInstruction_c
*i
)
244 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
245 BX_CPU_THIS_PTR
prepareSSE();
247 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
249 /* op2 is a register or memory reference */
251 op2
= BX_READ_XMM_REG(i
->rm());
254 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
255 /* pointer, segment address pair */
256 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
259 result
.xmm16u(0) = op1
.xmm16u(0) - op1
.xmm16u(1);
260 result
.xmm16u(1) = op1
.xmm16u(2) - op1
.xmm16u(3);
261 result
.xmm16u(2) = op1
.xmm16u(4) - op1
.xmm16u(5);
262 result
.xmm16u(3) = op1
.xmm16u(6) - op1
.xmm16u(7);
264 result
.xmm16u(4) = op2
.xmm16u(0) - op2
.xmm16u(1);
265 result
.xmm16u(5) = op2
.xmm16u(2) - op2
.xmm16u(3);
266 result
.xmm16u(6) = op2
.xmm16u(4) - op2
.xmm16u(5);
267 result
.xmm16u(7) = op2
.xmm16u(6) - op2
.xmm16u(7);
269 BX_WRITE_XMM_REG(i
->nnn(), result
);
271 BX_INFO(("PHSUBW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
272 exception(BX_UD_EXCEPTION
, 0, 0);
277 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBD_VdqWdq(bxInstruction_c
*i
)
279 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
280 BX_CPU_THIS_PTR
prepareSSE();
282 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
284 /* op2 is a register or memory reference */
286 op2
= BX_READ_XMM_REG(i
->rm());
289 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
290 /* pointer, segment address pair */
291 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
294 result
.xmm32u(0) = op1
.xmm32u(0) - op1
.xmm32u(1);
295 result
.xmm32u(1) = op1
.xmm32u(2) - op1
.xmm32u(3);
296 result
.xmm32u(2) = op2
.xmm32u(0) - op2
.xmm32u(1);
297 result
.xmm32u(3) = op2
.xmm32u(2) - op2
.xmm32u(3);
299 BX_WRITE_XMM_REG(i
->nnn(), result
);
301 BX_INFO(("PHSUBD_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
302 exception(BX_UD_EXCEPTION
, 0, 0);
307 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSIGNB_VdqWdq(bxInstruction_c
*i
)
309 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
310 BX_CPU_THIS_PTR
prepareSSE();
312 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
314 /* op2 is a register or memory reference */
316 op2
= BX_READ_XMM_REG(i
->rm());
319 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
320 /* pointer, segment address pair */
321 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
324 for(unsigned j
=0; j
<16; j
++) {
325 int sign
= (op2
.xmmsbyte(j
) > 0) - (op2
.xmmsbyte(j
) < 0);
326 op1
.xmmsbyte(j
) *= sign
;
329 BX_WRITE_XMM_REG(i
->nnn(), op1
);
331 BX_INFO(("PSIGNB_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
332 exception(BX_UD_EXCEPTION
, 0, 0);
337 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSIGNW_VdqWdq(bxInstruction_c
*i
)
339 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
340 BX_CPU_THIS_PTR
prepareSSE();
342 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
344 /* op2 is a register or memory reference */
346 op2
= BX_READ_XMM_REG(i
->rm());
349 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
350 /* pointer, segment address pair */
351 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
354 for(unsigned j
=0; j
<8; j
++) {
355 int sign
= (op2
.xmm16s(j
) > 0) - (op2
.xmm16s(j
) < 0);
356 op1
.xmm16s(j
) *= sign
;
359 BX_WRITE_XMM_REG(i
->nnn(), op1
);
361 BX_INFO(("PSIGNW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
362 exception(BX_UD_EXCEPTION
, 0, 0);
367 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSIGND_VdqWdq(bxInstruction_c
*i
)
369 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
370 BX_CPU_THIS_PTR
prepareSSE();
372 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
374 /* op2 is a register or memory reference */
376 op2
= BX_READ_XMM_REG(i
->rm());
379 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
380 /* pointer, segment address pair */
381 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
384 for(unsigned j
=0; j
<4; j
++) {
385 int sign
= (op2
.xmm32s(j
) > 0) - (op2
.xmm32s(j
) < 0);
386 op1
.xmm32s(j
) *= sign
;
389 BX_WRITE_XMM_REG(i
->nnn(), op1
);
391 BX_INFO(("PSIGND_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
392 exception(BX_UD_EXCEPTION
, 0, 0);
397 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULHRSW_VdqWdq(bxInstruction_c
*i
)
399 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
400 BX_CPU_THIS_PTR
prepareSSE();
402 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
404 /* op2 is a register or memory reference */
406 op2
= BX_READ_XMM_REG(i
->rm());
409 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
410 /* pointer, segment address pair */
411 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
414 op1
.xmm16u(0) = (((op1
.xmm16s(0) * op2
.xmm16s(0)) >> 14) + 1) >> 1;
415 op1
.xmm16u(1) = (((op1
.xmm16s(1) * op2
.xmm16s(1)) >> 14) + 1) >> 1;
416 op1
.xmm16u(2) = (((op1
.xmm16s(2) * op2
.xmm16s(2)) >> 14) + 1) >> 1;
417 op1
.xmm16u(3) = (((op1
.xmm16s(3) * op2
.xmm16s(3)) >> 14) + 1) >> 1;
418 op1
.xmm16u(4) = (((op1
.xmm16s(4) * op2
.xmm16s(4)) >> 14) + 1) >> 1;
419 op1
.xmm16u(5) = (((op1
.xmm16s(5) * op2
.xmm16s(5)) >> 14) + 1) >> 1;
420 op1
.xmm16u(6) = (((op1
.xmm16s(6) * op2
.xmm16s(6)) >> 14) + 1) >> 1;
421 op1
.xmm16u(7) = (((op1
.xmm16s(7) * op2
.xmm16s(7)) >> 14) + 1) >> 1;
423 /* now write result back to destination */
424 BX_WRITE_XMM_REG(i
->nnn(), op1
);
426 BX_INFO(("PMULHRSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
427 exception(BX_UD_EXCEPTION
, 0, 0);
432 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PABSB_VdqWdq(bxInstruction_c
*i
)
434 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
435 BX_CPU_THIS_PTR
prepareSSE();
437 BxPackedXmmRegister op
;
440 op
= BX_READ_XMM_REG(i
->rm());
443 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
444 /* pointer, segment address pair */
445 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
448 if(op
.xmmsbyte(0x0) < 0) op
.xmmubyte(0x0) = -op
.xmmsbyte(0x0);
449 if(op
.xmmsbyte(0x1) < 0) op
.xmmubyte(0x1) = -op
.xmmsbyte(0x1);
450 if(op
.xmmsbyte(0x2) < 0) op
.xmmubyte(0x2) = -op
.xmmsbyte(0x2);
451 if(op
.xmmsbyte(0x3) < 0) op
.xmmubyte(0x3) = -op
.xmmsbyte(0x3);
452 if(op
.xmmsbyte(0x4) < 0) op
.xmmubyte(0x4) = -op
.xmmsbyte(0x4);
453 if(op
.xmmsbyte(0x5) < 0) op
.xmmubyte(0x5) = -op
.xmmsbyte(0x5);
454 if(op
.xmmsbyte(0x6) < 0) op
.xmmubyte(0x6) = -op
.xmmsbyte(0x6);
455 if(op
.xmmsbyte(0x7) < 0) op
.xmmubyte(0x7) = -op
.xmmsbyte(0x7);
456 if(op
.xmmsbyte(0x8) < 0) op
.xmmubyte(0x8) = -op
.xmmsbyte(0x8);
457 if(op
.xmmsbyte(0x9) < 0) op
.xmmubyte(0x9) = -op
.xmmsbyte(0x9);
458 if(op
.xmmsbyte(0xa) < 0) op
.xmmubyte(0xa) = -op
.xmmsbyte(0xa);
459 if(op
.xmmsbyte(0xb) < 0) op
.xmmubyte(0xb) = -op
.xmmsbyte(0xb);
460 if(op
.xmmsbyte(0xc) < 0) op
.xmmubyte(0xc) = -op
.xmmsbyte(0xc);
461 if(op
.xmmsbyte(0xd) < 0) op
.xmmubyte(0xd) = -op
.xmmsbyte(0xd);
462 if(op
.xmmsbyte(0xe) < 0) op
.xmmubyte(0xe) = -op
.xmmsbyte(0xe);
463 if(op
.xmmsbyte(0xf) < 0) op
.xmmubyte(0xf) = -op
.xmmsbyte(0xf);
465 /* now write result back to destination */
466 BX_WRITE_XMM_REG(i
->nnn(), op
);
468 BX_INFO(("PABSB_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
469 exception(BX_UD_EXCEPTION
, 0, 0);
474 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PABSW_VdqWdq(bxInstruction_c
*i
)
476 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
477 BX_CPU_THIS_PTR
prepareSSE();
479 BxPackedXmmRegister op
;
482 op
= BX_READ_XMM_REG(i
->rm());
485 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
486 /* pointer, segment address pair */
487 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
490 if(op
.xmm16s(0) < 0) op
.xmm16u(0) = -op
.xmm16s(0);
491 if(op
.xmm16s(1) < 0) op
.xmm16u(1) = -op
.xmm16s(1);
492 if(op
.xmm16s(2) < 0) op
.xmm16u(2) = -op
.xmm16s(2);
493 if(op
.xmm16s(3) < 0) op
.xmm16u(3) = -op
.xmm16s(3);
494 if(op
.xmm16s(4) < 0) op
.xmm16u(4) = -op
.xmm16s(4);
495 if(op
.xmm16s(5) < 0) op
.xmm16u(5) = -op
.xmm16s(5);
496 if(op
.xmm16s(6) < 0) op
.xmm16u(6) = -op
.xmm16s(6);
497 if(op
.xmm16s(7) < 0) op
.xmm16u(7) = -op
.xmm16s(7);
499 /* now write result back to destination */
500 BX_WRITE_XMM_REG(i
->nnn(), op
);
502 BX_INFO(("PABSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
503 exception(BX_UD_EXCEPTION
, 0, 0);
508 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PABSD_VdqWdq(bxInstruction_c
*i
)
510 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
511 BX_CPU_THIS_PTR
prepareSSE();
513 BxPackedXmmRegister op
;
516 op
= BX_READ_XMM_REG(i
->rm());
519 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
520 /* pointer, segment address pair */
521 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
524 if(op
.xmm32s(0) < 0) op
.xmm32u(0) = -op
.xmm32s(0);
525 if(op
.xmm32s(1) < 0) op
.xmm32u(1) = -op
.xmm32s(1);
526 if(op
.xmm32s(2) < 0) op
.xmm32u(2) = -op
.xmm32s(2);
527 if(op
.xmm32s(3) < 0) op
.xmm32u(3) = -op
.xmm32s(3);
529 /* now write result back to destination */
530 BX_WRITE_XMM_REG(i
->nnn(), op
);
532 BX_INFO(("PABSD_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
533 exception(BX_UD_EXCEPTION
, 0, 0);
538 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PBLENDVB_VdqWdq(bxInstruction_c
*i
)
540 #if BX_SUPPORT_SSE >= 4
541 BX_CPU_THIS_PTR
prepareSSE();
543 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
,
544 mask
= BX_READ_XMM_REG(0); // XMM0
546 /* op2 is a register or memory reference */
548 op2
= BX_READ_XMM_REG(i
->rm());
551 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
552 /* pointer, segment address pair */
553 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
556 for(unsigned j
=0; j
<16; j
++)
557 if (mask
.xmmubyte(j
) & 0x80) op1
.xmmubyte(j
) = op2
.xmmubyte(j
);
559 /* now write result back to destination */
560 BX_WRITE_XMM_REG(i
->nnn(), op1
);
562 BX_INFO(("PBLENDVB_VdqWdq: required SSE4, use --enable-sse option"));
563 exception(BX_UD_EXCEPTION
, 0, 0);
568 void BX_CPP_AttrRegparmN(1) BX_CPU_C::BLENDVPS_VpsWps(bxInstruction_c
*i
)
570 #if BX_SUPPORT_SSE >= 4
571 BX_CPU_THIS_PTR
prepareSSE();
573 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
,
574 mask
= BX_READ_XMM_REG(0); // XMM0
576 /* op2 is a register or memory reference */
578 op2
= BX_READ_XMM_REG(i
->rm());
581 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
582 /* pointer, segment address pair */
583 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
586 if (mask
.xmm32u(0) & 0x80000000) op1
.xmm32u(0) = op2
.xmm32u(0);
587 if (mask
.xmm32u(1) & 0x80000000) op1
.xmm32u(1) = op2
.xmm32u(1);
588 if (mask
.xmm32u(2) & 0x80000000) op1
.xmm32u(2) = op2
.xmm32u(2);
589 if (mask
.xmm32u(3) & 0x80000000) op1
.xmm32u(3) = op2
.xmm32u(3);
591 /* now write result back to destination */
592 BX_WRITE_XMM_REG(i
->nnn(), op1
);
594 BX_INFO(("BLENDVPS_VpsWps: required SSE4, use --enable-sse option"));
595 exception(BX_UD_EXCEPTION
, 0, 0);
600 void BX_CPP_AttrRegparmN(1) BX_CPU_C::BLENDVPD_VpdWpd(bxInstruction_c
*i
)
602 #if BX_SUPPORT_SSE >= 4
603 BX_CPU_THIS_PTR
prepareSSE();
605 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
,
606 mask
= BX_READ_XMM_REG(0); // XMM0
608 /* op2 is a register or memory reference */
610 op2
= BX_READ_XMM_REG(i
->rm());
613 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
614 /* pointer, segment address pair */
615 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
618 if (mask
.xmm32u(1) & 0x80000000) op1
.xmm64u(0) = op2
.xmm64u(0);
619 if (mask
.xmm32u(3) & 0x80000000) op1
.xmm64u(1) = op2
.xmm64u(1);
621 /* now write result back to destination */
622 BX_WRITE_XMM_REG(i
->nnn(), op1
);
624 BX_INFO(("BLENDVPD_VpdWpd: required SSE4, use --enable-sse option"));
625 exception(BX_UD_EXCEPTION
, 0, 0);
630 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PTEST_VdqWdq(bxInstruction_c
*i
)
632 #if BX_SUPPORT_SSE >= 4
633 BX_CPU_THIS_PTR
prepareSSE();
635 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
638 /* op2 is a register or memory reference */
640 op2
= BX_READ_XMM_REG(i
->rm());
643 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
644 /* pointer, segment address pair */
645 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
648 if ((op2
.xmm64u(0) & op1
.xmm64u(0)) == 0 &&
649 (op2
.xmm64u(1) & op1
.xmm64u(1)) == 0) result
|= EFlagsZFMask
;
651 if ((op2
.xmm64u(0) & ~op1
.xmm64u(0)) == 0 &&
652 (op2
.xmm64u(1) & ~op1
.xmm64u(1)) == 0) result
|= EFlagsCFMask
;
654 setEFlagsOSZAPC(result
);
657 BX_INFO(("PTEST_VdqWdq: required SSE4, use --enable-sse option"));
658 exception(BX_UD_EXCEPTION
, 0, 0);
663 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULDQ_VdqWdq(bxInstruction_c
*i
)
665 #if BX_SUPPORT_SSE >= 4
666 BX_CPU_THIS_PTR
prepareSSE();
668 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
670 /* op2 is a register or memory reference */
672 op2
= BX_READ_XMM_REG(i
->rm());
675 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
676 /* pointer, segment address pair */
677 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
680 result
.xmm64s(0) = Bit64s(op1
.xmm32s(0)) * Bit64s(op2
.xmm32s(0));
681 result
.xmm64s(1) = Bit64s(op1
.xmm32s(2)) * Bit64s(op2
.xmm32s(2));
683 /* now write result back to destination */
684 BX_WRITE_XMM_REG(i
->nnn(), result
);
686 BX_INFO(("PMULDQ_VdqWdq: required SSE4, use --enable-sse option"));
687 exception(BX_UD_EXCEPTION
, 0, 0);
692 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPEQQ_VdqWdq(bxInstruction_c
*i
)
694 #if BX_SUPPORT_SSE >= 4
695 BX_CPU_THIS_PTR
prepareSSE();
697 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
699 /* op2 is a register or memory reference */
701 op2
= BX_READ_XMM_REG(i
->rm());
704 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
705 /* pointer, segment address pair */
706 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
709 op1
.xmm64u(0) = (op1
.xmm64u(0) == op2
.xmm64u(0)) ?
710 BX_CONST64(0xffffffffffffffff) : 0;
712 op1
.xmm64u(1) = (op1
.xmm64u(1) == op2
.xmm64u(1)) ?
713 BX_CONST64(0xffffffffffffffff) : 0;
715 /* now write result back to destination */
716 BX_WRITE_XMM_REG(i
->nnn(), op1
);
718 BX_INFO(("PCMPEQQ_VdqWdq: required SSE4, use --enable-sse option"));
719 exception(BX_UD_EXCEPTION
, 0, 0);
724 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKUSDW_VdqWdq(bxInstruction_c
*i
)
726 #if BX_SUPPORT_SSE >= 4
727 BX_CPU_THIS_PTR
prepareSSE();
729 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
731 /* op2 is a register or memory reference */
733 op2
= BX_READ_XMM_REG(i
->rm());
736 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
737 /* pointer, segment address pair */
738 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
741 result
.xmm16u(0) = SaturateDwordSToWordU(op1
.xmm32s(0));
742 result
.xmm16u(1) = SaturateDwordSToWordU(op1
.xmm32s(1));
743 result
.xmm16u(2) = SaturateDwordSToWordU(op1
.xmm32s(2));
744 result
.xmm16u(3) = SaturateDwordSToWordU(op1
.xmm32s(3));
745 result
.xmm16u(4) = SaturateDwordSToWordU(op2
.xmm32s(0));
746 result
.xmm16u(5) = SaturateDwordSToWordU(op2
.xmm32s(1));
747 result
.xmm16u(6) = SaturateDwordSToWordU(op2
.xmm32s(2));
748 result
.xmm16u(7) = SaturateDwordSToWordU(op2
.xmm32s(3));
750 /* now write result back to destination */
751 BX_WRITE_XMM_REG(i
->nnn(), result
);
753 BX_INFO(("PACKUSDW_VdqWdq: required SSE4, use --enable-sse option"));
754 exception(BX_UD_EXCEPTION
, 0, 0);
759 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPGTQ_VdqWdq(bxInstruction_c
*i
)
761 #if (BX_SUPPORT_SSE > 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
762 BX_CPU_THIS_PTR
prepareSSE();
764 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
766 /* op2 is a register or memory reference */
768 op2
= BX_READ_XMM_REG(i
->rm());
771 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
772 /* pointer, segment address pair */
773 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
776 op1
.xmm64u(0) = (op1
.xmm64u(0) > op2
.xmm64u(0)) ?
777 BX_CONST64(0xffffffffffffffff) : 0;
779 op1
.xmm64u(1) = (op1
.xmm64u(1) > op2
.xmm64u(1)) ?
780 BX_CONST64(0xffffffffffffffff) : 0;
782 /* now write result back to destination */
783 BX_WRITE_XMM_REG(i
->nnn(), op1
);
785 BX_INFO(("PCMPGTQ_VdqWdq: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
786 exception(BX_UD_EXCEPTION
, 0, 0);
791 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINSB_VdqWdq(bxInstruction_c
*i
)
793 #if BX_SUPPORT_SSE >= 4
794 BX_CPU_THIS_PTR
prepareSSE();
796 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
798 /* op2 is a register or memory reference */
800 op2
= BX_READ_XMM_REG(i
->rm());
803 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
804 /* pointer, segment address pair */
805 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
808 for(unsigned j
=0; j
<16; j
++) {
809 if(op2
.xmmsbyte(j
) < op1
.xmmsbyte(j
)) op1
.xmmubyte(j
) = op2
.xmmubyte(j
);
812 /* now write result back to destination */
813 BX_WRITE_XMM_REG(i
->nnn(), op1
);
815 BX_INFO(("PMINSB_VdqWdq: required SSE4, use --enable-sse option"));
816 exception(BX_UD_EXCEPTION
, 0, 0);
821 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINSD_VdqWdq(bxInstruction_c
*i
)
823 #if BX_SUPPORT_SSE >= 4
824 BX_CPU_THIS_PTR
prepareSSE();
826 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
828 /* op2 is a register or memory reference */
830 op2
= BX_READ_XMM_REG(i
->rm());
833 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
834 /* pointer, segment address pair */
835 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
838 if(op2
.xmm32s(0) < op1
.xmm32s(0)) op1
.xmm32u(0) = op2
.xmm32u(0);
839 if(op2
.xmm32s(1) < op1
.xmm32s(1)) op1
.xmm32u(1) = op2
.xmm32u(1);
840 if(op2
.xmm32s(2) < op1
.xmm32s(2)) op1
.xmm32u(2) = op2
.xmm32u(2);
841 if(op2
.xmm32s(3) < op1
.xmm32s(3)) op1
.xmm32u(3) = op2
.xmm32u(3);
843 /* now write result back to destination */
844 BX_WRITE_XMM_REG(i
->nnn(), op1
);
846 BX_INFO(("PMINSD_VdqWdq: required SSE4, use --enable-sse option"));
847 exception(BX_UD_EXCEPTION
, 0, 0);
852 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINUW_VdqWdq(bxInstruction_c
*i
)
854 #if BX_SUPPORT_SSE >= 4
855 BX_CPU_THIS_PTR
prepareSSE();
857 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
859 /* op2 is a register or memory reference */
861 op2
= BX_READ_XMM_REG(i
->rm());
864 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
865 /* pointer, segment address pair */
866 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
869 if(op2
.xmm16u(0) < op1
.xmm16u(0)) op1
.xmm16u(0) = op2
.xmm16u(0);
870 if(op2
.xmm16u(1) < op1
.xmm16u(1)) op1
.xmm16u(1) = op2
.xmm16u(1);
871 if(op2
.xmm16u(2) < op1
.xmm16u(2)) op1
.xmm16u(2) = op2
.xmm16u(2);
872 if(op2
.xmm16u(3) < op1
.xmm16u(3)) op1
.xmm16u(3) = op2
.xmm16u(3);
873 if(op2
.xmm16u(4) < op1
.xmm16u(4)) op1
.xmm16u(4) = op2
.xmm16u(4);
874 if(op2
.xmm16u(5) < op1
.xmm16u(5)) op1
.xmm16u(5) = op2
.xmm16u(5);
875 if(op2
.xmm16u(6) < op1
.xmm16u(6)) op1
.xmm16u(6) = op2
.xmm16u(6);
876 if(op2
.xmm16u(7) < op1
.xmm16u(7)) op1
.xmm16u(7) = op2
.xmm16u(7);
878 /* now write result back to destination */
879 BX_WRITE_XMM_REG(i
->nnn(), op1
);
881 BX_INFO(("PMINUW_VdqWdq: required SSE4, use --enable-sse option"));
882 exception(BX_UD_EXCEPTION
, 0, 0);
887 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINUD_VdqWdq(bxInstruction_c
*i
)
889 #if BX_SUPPORT_SSE >= 4
890 BX_CPU_THIS_PTR
prepareSSE();
892 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
894 /* op2 is a register or memory reference */
896 op2
= BX_READ_XMM_REG(i
->rm());
899 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
900 /* pointer, segment address pair */
901 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
904 if(op2
.xmm32u(0) < op1
.xmm32u(0)) op1
.xmm32u(0) = op2
.xmm32u(0);
905 if(op2
.xmm32u(1) < op1
.xmm32u(1)) op1
.xmm32u(1) = op2
.xmm32u(1);
906 if(op2
.xmm32u(2) < op1
.xmm32u(2)) op1
.xmm32u(2) = op2
.xmm32u(2);
907 if(op2
.xmm32u(3) < op1
.xmm32u(3)) op1
.xmm32u(3) = op2
.xmm32u(3);
909 /* now write result back to destination */
910 BX_WRITE_XMM_REG(i
->nnn(), op1
);
912 BX_INFO(("PMINUD_VdqWdq: required SSE4, use --enable-sse option"));
913 exception(BX_UD_EXCEPTION
, 0, 0);
918 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXSB_VdqWdq(bxInstruction_c
*i
)
920 #if BX_SUPPORT_SSE >= 4
921 BX_CPU_THIS_PTR
prepareSSE();
923 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
925 /* op2 is a register or memory reference */
927 op2
= BX_READ_XMM_REG(i
->rm());
930 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
931 /* pointer, segment address pair */
932 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
935 for(unsigned j
=0; j
<16; j
++) {
936 if(op2
.xmmsbyte(j
) > op1
.xmmsbyte(j
)) op1
.xmmubyte(j
) = op2
.xmmubyte(j
);
939 /* now write result back to destination */
940 BX_WRITE_XMM_REG(i
->nnn(), op1
);
942 BX_INFO(("PMAXSB_VdqWdq: required SSE4, use --enable-sse option"));
943 exception(BX_UD_EXCEPTION
, 0, 0);
948 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXSD_VdqWdq(bxInstruction_c
*i
)
950 #if BX_SUPPORT_SSE >= 4
951 BX_CPU_THIS_PTR
prepareSSE();
953 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
955 /* op2 is a register or memory reference */
957 op2
= BX_READ_XMM_REG(i
->rm());
960 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
961 /* pointer, segment address pair */
962 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
965 if(op2
.xmm32s(0) > op1
.xmm32s(0)) op1
.xmm32u(0) = op2
.xmm32u(0);
966 if(op2
.xmm32s(1) > op1
.xmm32s(1)) op1
.xmm32u(1) = op2
.xmm32u(1);
967 if(op2
.xmm32s(2) > op1
.xmm32s(2)) op1
.xmm32u(2) = op2
.xmm32u(2);
968 if(op2
.xmm32s(3) > op1
.xmm32s(3)) op1
.xmm32u(3) = op2
.xmm32u(3);
970 /* now write result back to destination */
971 BX_WRITE_XMM_REG(i
->nnn(), op1
);
973 BX_INFO(("PMAXSD_VdqWdq: required SSE4, use --enable-sse option"));
974 exception(BX_UD_EXCEPTION
, 0, 0);
979 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXUW_VdqWdq(bxInstruction_c
*i
)
981 #if BX_SUPPORT_SSE >= 4
982 BX_CPU_THIS_PTR
prepareSSE();
984 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
986 /* op2 is a register or memory reference */
988 op2
= BX_READ_XMM_REG(i
->rm());
991 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
992 /* pointer, segment address pair */
993 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
996 if(op2
.xmm16u(0) > op1
.xmm16u(0)) op1
.xmm16u(0) = op2
.xmm16u(0);
997 if(op2
.xmm16u(1) > op1
.xmm16u(1)) op1
.xmm16u(1) = op2
.xmm16u(1);
998 if(op2
.xmm16u(2) > op1
.xmm16u(2)) op1
.xmm16u(2) = op2
.xmm16u(2);
999 if(op2
.xmm16u(3) > op1
.xmm16u(3)) op1
.xmm16u(3) = op2
.xmm16u(3);
1000 if(op2
.xmm16u(4) > op1
.xmm16u(4)) op1
.xmm16u(4) = op2
.xmm16u(4);
1001 if(op2
.xmm16u(5) > op1
.xmm16u(5)) op1
.xmm16u(5) = op2
.xmm16u(5);
1002 if(op2
.xmm16u(6) > op1
.xmm16u(6)) op1
.xmm16u(6) = op2
.xmm16u(6);
1003 if(op2
.xmm16u(7) > op1
.xmm16u(7)) op1
.xmm16u(7) = op2
.xmm16u(7);
1005 /* now write result back to destination */
1006 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1008 BX_INFO(("PMAXUW_VdqWdq: required SSE4, use --enable-sse option"));
1009 exception(BX_UD_EXCEPTION
, 0, 0);
1014 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXUD_VdqWdq(bxInstruction_c
*i
)
1016 #if BX_SUPPORT_SSE >= 4
1017 BX_CPU_THIS_PTR
prepareSSE();
1019 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1021 /* op2 is a register or memory reference */
1023 op2
= BX_READ_XMM_REG(i
->rm());
1026 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1027 /* pointer, segment address pair */
1028 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1031 if(op2
.xmm32u(0) > op1
.xmm32u(0)) op1
.xmm32u(0) = op2
.xmm32u(0);
1032 if(op2
.xmm32u(1) > op1
.xmm32u(1)) op1
.xmm32u(1) = op2
.xmm32u(1);
1033 if(op2
.xmm32u(2) > op1
.xmm32u(2)) op1
.xmm32u(2) = op2
.xmm32u(2);
1034 if(op2
.xmm32u(3) > op1
.xmm32u(3)) op1
.xmm32u(3) = op2
.xmm32u(3);
1036 /* now write result back to destination */
1037 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1039 BX_INFO(("PMAXUD_VdqWdq: required SSE4, use --enable-sse option"));
1040 exception(BX_UD_EXCEPTION
, 0, 0);
1045 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULLD_VdqWdq(bxInstruction_c
*i
)
1047 #if BX_SUPPORT_SSE >= 4
1048 BX_CPU_THIS_PTR
prepareSSE();
1050 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1052 /* op2 is a register or memory reference */
1054 op2
= BX_READ_XMM_REG(i
->rm());
1057 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1058 /* pointer, segment address pair */
1059 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1062 Bit64s product1
= Bit64s(op1
.xmm32s(0)) * Bit64s(op2
.xmm32s(0));
1063 Bit64s product2
= Bit64s(op1
.xmm32s(1)) * Bit64s(op2
.xmm32s(1));
1064 Bit64s product3
= Bit64s(op1
.xmm32s(2)) * Bit64s(op2
.xmm32s(2));
1065 Bit64s product4
= Bit64s(op1
.xmm32s(3)) * Bit64s(op2
.xmm32s(3));
1067 op1
.xmm32u(0) = (Bit32u
)(product1
& 0xFFFFFFFF);
1068 op1
.xmm32u(1) = (Bit32u
)(product2
& 0xFFFFFFFF);
1069 op1
.xmm32u(2) = (Bit32u
)(product3
& 0xFFFFFFFF);
1070 op1
.xmm32u(3) = (Bit32u
)(product4
& 0xFFFFFFFF);
1072 /* now write result back to destination */
1073 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1075 BX_INFO(("PMULLD_VdqWdq: required SSE4, use --enable-sse option"));
1076 exception(BX_UD_EXCEPTION
, 0, 0);
1081 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHMINPOSUW_VdqWdq(bxInstruction_c
*i
)
1083 #if BX_SUPPORT_SSE >= 4
1084 BX_CPU_THIS_PTR
prepareSSE();
1086 BxPackedXmmRegister op
, result
;
1088 /* op2 is a register or memory reference */
1090 op
= BX_READ_XMM_REG(i
->rm());
1093 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1094 /* pointer, segment address pair */
1095 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
1100 for (unsigned j
=1; j
< 8; j
++) {
1101 if (op
.xmm16u(j
) < op
.xmm16u(min
)) min
= j
;
1104 result
.xmm16u(0) = op
.xmm16u(min
);
1105 result
.xmm16u(1) = min
;
1106 result
.xmm32u(1) = 0;
1107 result
.xmm64u(1) = 0;
1109 /* now write result back to destination */
1110 BX_WRITE_XMM_REG(i
->nnn(), result
);
1112 BX_INFO(("PHMINPOSUW_VdqWdq: required SSE4, use --enable-sse option"));
1113 exception(BX_UD_EXCEPTION
, 0, 0);
1118 void BX_CPP_AttrRegparmN(1) BX_CPU_C::BLENDPS_VpsWpsIb(bxInstruction_c
*i
)
1120 #if BX_SUPPORT_SSE >= 4
1121 BX_CPU_THIS_PTR
prepareSSE();
1123 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1124 Bit8u mask
= i
->Ib();
1126 /* op2 is a register or memory reference */
1128 op2
= BX_READ_XMM_REG(i
->rm());
1131 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1132 /* pointer, segment address pair */
1133 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1136 if (mask
& 0x1) op1
.xmm32u(0) = op2
.xmm32u(0);
1137 if (mask
& 0x2) op1
.xmm32u(1) = op2
.xmm32u(1);
1138 if (mask
& 0x4) op1
.xmm32u(2) = op2
.xmm32u(2);
1139 if (mask
& 0x8) op1
.xmm32u(3) = op2
.xmm32u(3);
1141 /* now write result back to destination */
1142 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1144 BX_INFO(("BLENDPS_VpsWpsIb: required SSE4, use --enable-sse option"));
1145 exception(BX_UD_EXCEPTION
, 0, 0);
1150 void BX_CPP_AttrRegparmN(1) BX_CPU_C::BLENDPD_VpdWpdIb(bxInstruction_c
*i
)
1152 #if BX_SUPPORT_SSE >= 4
1153 BX_CPU_THIS_PTR
prepareSSE();
1155 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1156 Bit8u mask
= i
->Ib();
1158 /* op2 is a register or memory reference */
1160 op2
= BX_READ_XMM_REG(i
->rm());
1163 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1164 /* pointer, segment address pair */
1165 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1168 if (mask
& 0x1) op1
.xmm64u(0) = op2
.xmm64u(0);
1169 if (mask
& 0x2) op1
.xmm64u(1) = op2
.xmm64u(1);
1171 /* now write result back to destination */
1172 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1174 BX_INFO(("BLENDPD_VpdWpdIb: required SSE4, use --enable-sse option"));
1175 exception(BX_UD_EXCEPTION
, 0, 0);
1180 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PBLENDW_VdqWdqIb(bxInstruction_c
*i
)
1182 #if BX_SUPPORT_SSE >= 4
1183 BX_CPU_THIS_PTR
prepareSSE();
1185 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1186 Bit8u mask
= i
->Ib();
1188 /* op2 is a register or memory reference */
1190 op2
= BX_READ_XMM_REG(i
->rm());
1193 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1194 /* pointer, segment address pair */
1195 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1198 if (mask
& 0x01) op1
.xmm16u(0) = op2
.xmm16u(0);
1199 if (mask
& 0x02) op1
.xmm16u(1) = op2
.xmm16u(1);
1200 if (mask
& 0x04) op1
.xmm16u(2) = op2
.xmm16u(2);
1201 if (mask
& 0x08) op1
.xmm16u(3) = op2
.xmm16u(3);
1202 if (mask
& 0x10) op1
.xmm16u(4) = op2
.xmm16u(4);
1203 if (mask
& 0x20) op1
.xmm16u(5) = op2
.xmm16u(5);
1204 if (mask
& 0x40) op1
.xmm16u(6) = op2
.xmm16u(6);
1205 if (mask
& 0x80) op1
.xmm16u(7) = op2
.xmm16u(7);
1207 /* now write result back to destination */
1208 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1210 BX_INFO(("PBLENDW_VdqWdqIb: required SSE4, use --enable-sse option"));
1211 exception(BX_UD_EXCEPTION
, 0, 0);
1216 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_HbdUdqIb(bxInstruction_c
*i
)
1218 #if BX_SUPPORT_SSE >= 4
1219 BX_CPU_THIS_PTR
prepareSSE();
1221 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
1222 Bit8u result
= op
.xmmubyte(i
->Ib() & 0xF);
1224 /* result is a register or memory reference */
1226 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
1229 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1230 /* pointer, segment address pair */
1231 write_virtual_byte(i
->seg(), eaddr
, result
);
1234 BX_INFO(("PEXTRB_HbdUdqIb: required SSE4, use --enable-sse option"));
1235 exception(BX_UD_EXCEPTION
, 0, 0);
1240 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_HwdUdqIb(bxInstruction_c
*i
)
1242 #if BX_SUPPORT_SSE >= 4
1243 BX_CPU_THIS_PTR
prepareSSE();
1245 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
1246 Bit16u result
= op
.xmm16u(i
->Ib() & 7);
1248 /* result is a register or memory reference */
1250 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
1253 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1254 /* pointer, segment address pair */
1255 write_virtual_word(i
->seg(), eaddr
, result
);
1258 BX_INFO(("PEXTRW_HwdUdqIb: required SSE4, use --enable-sse option"));
1259 exception(BX_UD_EXCEPTION
, 0, 0);
1264 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRD_HdUdqIb(bxInstruction_c
*i
)
1266 #if BX_SUPPORT_SSE >= 4
1267 BX_CPU_THIS_PTR
prepareSSE();
1269 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
1271 #if BX_SUPPORT_X86_64
1272 if (i
->os64L()) /* 64 bit operand size mode */
1274 Bit64u result
= op
.xmm64u(i
->Ib() & 1);
1276 /* result is a register or memory reference */
1278 BX_WRITE_64BIT_REG(i
->nnn(), result
);
1281 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1282 /* pointer, segment address pair */
1283 write_virtual_qword_64(i
->seg(), eaddr
, result
);
1289 Bit32u result
= op
.xmm32u(i
->Ib() & 3);
1291 /* result is a register or memory reference */
1293 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
1296 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1297 /* pointer, segment address pair */
1298 write_virtual_dword(i
->seg(), eaddr
, result
);
1302 BX_INFO(("PEXTRD_HdUdqIb: required SSE4, use --enable-sse option"));
1303 exception(BX_UD_EXCEPTION
, 0, 0);
1308 void BX_CPP_AttrRegparmN(1) BX_CPU_C::EXTRACTPS_HdUpsIb(bxInstruction_c
*i
)
1310 #if BX_SUPPORT_SSE >= 4
1311 BX_CPU_THIS_PTR
prepareSSE();
1313 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
1314 Bit32u result
= op
.xmm32u(i
->Ib() & 3);
1316 /* result is a register or memory reference */
1318 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
1321 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1322 /* pointer, segment address pair */
1323 write_virtual_dword(i
->seg(), eaddr
, result
);
1326 BX_INFO(("EXTRACTPS_HdUpsIb: required SSE4, use --enable-sse option"));
1327 exception(BX_UD_EXCEPTION
, 0, 0);
1332 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PINSRB_VdqEbIb(bxInstruction_c
*i
)
1334 #if BX_SUPPORT_SSE >= 4
1335 BX_CPU_THIS_PTR
prepareSSE();
1337 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn());
1340 /* op2 is a register or memory reference */
1342 op2
= BX_READ_16BIT_REG(i
->rm()); // won't allow reading of AH/CH/BH/DH
1345 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1346 /* pointer, segment address pair */
1347 op2
= read_virtual_byte(i
->seg(), eaddr
);
1350 op1
.xmmubyte(i
->Ib() & 0xF) = op2
;
1352 /* now write result back to destination */
1353 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1355 BX_INFO(("PINSRB_VdqEbIb: required SSE4, use --enable-sse option"));
1356 exception(BX_UD_EXCEPTION
, 0, 0);
1361 void BX_CPP_AttrRegparmN(1) BX_CPU_C::INSERTPS_VpsWssIb(bxInstruction_c
*i
)
1363 #if BX_SUPPORT_SSE >= 4
1364 BX_CPU_THIS_PTR
prepareSSE();
1366 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn());
1367 Bit8u control
= i
->Ib();
1370 /* op2 is a register or memory reference */
1372 BxPackedXmmRegister temp
= BX_READ_XMM_REG(i
->rm());
1373 op2
= temp
.xmm32u((control
>> 6) & 3);
1376 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1377 /* pointer, segment address pair */
1378 op2
= read_virtual_dword(i
->seg(), eaddr
);
1381 op1
.xmm32u((control
>> 4) & 3) = op2
;
1383 if (control
& 1) op1
.xmm32u(0) = 0;
1384 if (control
& 2) op1
.xmm32u(1) = 0;
1385 if (control
& 4) op1
.xmm32u(2) = 0;
1386 if (control
& 8) op1
.xmm32u(3) = 0;
1388 /* now write result back to destination */
1389 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1391 BX_INFO(("INSERTPS_VpsWssIb: required SSE4, use --enable-sse option"));
1392 exception(BX_UD_EXCEPTION
, 0, 0);
1397 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PINSRD_VdqEdIb(bxInstruction_c
*i
)
1399 #if BX_SUPPORT_SSE >= 4
1400 BX_CPU_THIS_PTR
prepareSSE();
1402 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn());
1404 #if BX_SUPPORT_X86_64
1405 if (i
->os64L()) /* 64 bit operand size mode */
1409 /* op2 is a register or memory reference */
1411 op2
= BX_READ_64BIT_REG(i
->rm());
1414 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1415 /* pointer, segment address pair */
1416 op2
= read_virtual_qword_64(i
->seg(), eaddr
);
1419 op1
.xmm64u(i
->Ib() & 1) = op2
;
1426 /* op2 is a register or memory reference */
1428 op2
= BX_READ_32BIT_REG(i
->rm());
1431 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1432 /* pointer, segment address pair */
1433 op2
= read_virtual_dword(i
->seg(), eaddr
);
1436 op1
.xmm32u(i
->Ib() & 3) = op2
;
1439 /* now write result back to destination */
1440 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1442 BX_INFO(("PINSRD_VdqEdIb: required SSE4, use --enable-sse option"));
1443 exception(BX_UD_EXCEPTION
, 0, 0);
1448 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MPSADBW_VdqWdqIb(bxInstruction_c
*i
)
1450 #if BX_SUPPORT_SSE >= 4
1451 BX_CPU_THIS_PTR
prepareSSE();
1453 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1455 /* op2 is a register or memory reference */
1457 op2
= BX_READ_XMM_REG(i
->rm());
1460 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1461 /* pointer, segment address pair */
1462 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1465 unsigned src_offset
= (i
->Ib() & 3) * 4;
1466 unsigned dst_offset
= ((i
->Ib() >> 2) & 1) * 4;
1468 for (unsigned j
=0; j
< 8; j
++)
1470 result
.xmm16u(j
) = 0;
1472 for (unsigned k
=0; k
< 4; k
++) {
1473 Bit8u temp1
= op1
.xmmubyte(j
+ k
+ dst_offset
);
1474 Bit8u temp2
= op2
.xmmubyte( k
+ src_offset
);
1476 result
.xmm16u(j
) += (temp1
- temp2
);
1478 result
.xmm16u(j
) += (temp2
- temp1
);
1482 BX_WRITE_XMM_REG(i
->nnn(), result
);
1484 BX_INFO(("MPSADBW_VdqWdqIb: required SSE4, use --enable-sse option"));
1485 exception(BX_UD_EXCEPTION
, 0, 0);
1489 #endif // (BX_SUPPORT_SSE >= 4 || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
1492 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKLBW_VdqWdq(bxInstruction_c
*i
)
1494 #if BX_SUPPORT_SSE >= 2
1495 BX_CPU_THIS_PTR
prepareSSE();
1497 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1499 /* op2 is a register or memory reference */
1501 op2
= BX_READ_XMM_REG(i
->rm());
1504 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1505 /* pointer, segment address pair */
1506 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1509 result
.xmmubyte(0x0) = op1
.xmmubyte(0);
1510 result
.xmmubyte(0x1) = op2
.xmmubyte(0);
1511 result
.xmmubyte(0x2) = op1
.xmmubyte(1);
1512 result
.xmmubyte(0x3) = op2
.xmmubyte(1);
1513 result
.xmmubyte(0x4) = op1
.xmmubyte(2);
1514 result
.xmmubyte(0x5) = op2
.xmmubyte(2);
1515 result
.xmmubyte(0x6) = op1
.xmmubyte(3);
1516 result
.xmmubyte(0x7) = op2
.xmmubyte(3);
1517 result
.xmmubyte(0x8) = op1
.xmmubyte(4);
1518 result
.xmmubyte(0x9) = op2
.xmmubyte(4);
1519 result
.xmmubyte(0xA) = op1
.xmmubyte(5);
1520 result
.xmmubyte(0xB) = op2
.xmmubyte(5);
1521 result
.xmmubyte(0xC) = op1
.xmmubyte(6);
1522 result
.xmmubyte(0xD) = op2
.xmmubyte(6);
1523 result
.xmmubyte(0xE) = op1
.xmmubyte(7);
1524 result
.xmmubyte(0xF) = op2
.xmmubyte(7);
1526 /* now write result back to destination */
1527 BX_WRITE_XMM_REG(i
->nnn(), result
);
1529 BX_INFO(("PUNPCKLBW_VdqWdq: required SSE2, use --enable-sse option"));
1530 exception(BX_UD_EXCEPTION
, 0, 0);
1535 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKLWD_VdqWdq(bxInstruction_c
*i
)
1537 #if BX_SUPPORT_SSE >= 2
1538 BX_CPU_THIS_PTR
prepareSSE();
1540 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1542 /* op2 is a register or memory reference */
1544 op2
= BX_READ_XMM_REG(i
->rm());
1547 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1548 /* pointer, segment address pair */
1549 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1552 result
.xmm16u(0) = op1
.xmm16u(0);
1553 result
.xmm16u(1) = op2
.xmm16u(0);
1554 result
.xmm16u(2) = op1
.xmm16u(1);
1555 result
.xmm16u(3) = op2
.xmm16u(1);
1556 result
.xmm16u(4) = op1
.xmm16u(2);
1557 result
.xmm16u(5) = op2
.xmm16u(2);
1558 result
.xmm16u(6) = op1
.xmm16u(3);
1559 result
.xmm16u(7) = op2
.xmm16u(3);
1561 /* now write result back to destination */
1562 BX_WRITE_XMM_REG(i
->nnn(), result
);
1564 BX_INFO(("PUNPCKLWD_VdqWdq: required SSE2, use --enable-sse option"));
1565 exception(BX_UD_EXCEPTION
, 0, 0);
1569 /* UNPCKLPS: 0F 14 */
1570 /* PUNPCKLDQ: 66 0F 62 */
1571 void BX_CPP_AttrRegparmN(1) BX_CPU_C::UNPCKLPS_VpsWdq(bxInstruction_c
*i
)
1573 #if BX_SUPPORT_SSE >= 1
1574 BX_CPU_THIS_PTR
prepareSSE();
1576 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1578 /* op2 is a register or memory reference */
1580 op2
= BX_READ_XMM_REG(i
->rm());
1583 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1584 /* pointer, segment address pair */
1585 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1588 result
.xmm32u(0) = op1
.xmm32u(0);
1589 result
.xmm32u(1) = op2
.xmm32u(0);
1590 result
.xmm32u(2) = op1
.xmm32u(1);
1591 result
.xmm32u(3) = op2
.xmm32u(1);
1593 /* now write result back to destination */
1594 BX_WRITE_XMM_REG(i
->nnn(), result
);
1596 BX_INFO(("UNPCKLPS_VpsWdq: required SSE, use --enable-sse option"));
1597 exception(BX_UD_EXCEPTION
, 0, 0);
1602 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKSSWB_VdqWdq(bxInstruction_c
*i
)
1604 #if BX_SUPPORT_SSE >= 2
1605 BX_CPU_THIS_PTR
prepareSSE();
1607 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1609 /* op2 is a register or memory reference */
1611 op2
= BX_READ_XMM_REG(i
->rm());
1614 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1615 /* pointer, segment address pair */
1616 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1619 result
.xmmsbyte(0x0) = SaturateWordSToByteS(op1
.xmm16s(0));
1620 result
.xmmsbyte(0x1) = SaturateWordSToByteS(op1
.xmm16s(1));
1621 result
.xmmsbyte(0x2) = SaturateWordSToByteS(op1
.xmm16s(2));
1622 result
.xmmsbyte(0x3) = SaturateWordSToByteS(op1
.xmm16s(3));
1623 result
.xmmsbyte(0x4) = SaturateWordSToByteS(op1
.xmm16s(4));
1624 result
.xmmsbyte(0x5) = SaturateWordSToByteS(op1
.xmm16s(5));
1625 result
.xmmsbyte(0x6) = SaturateWordSToByteS(op1
.xmm16s(6));
1626 result
.xmmsbyte(0x7) = SaturateWordSToByteS(op1
.xmm16s(7));
1628 result
.xmmsbyte(0x8) = SaturateWordSToByteS(op2
.xmm16s(0));
1629 result
.xmmsbyte(0x9) = SaturateWordSToByteS(op2
.xmm16s(1));
1630 result
.xmmsbyte(0xA) = SaturateWordSToByteS(op2
.xmm16s(2));
1631 result
.xmmsbyte(0xB) = SaturateWordSToByteS(op2
.xmm16s(3));
1632 result
.xmmsbyte(0xC) = SaturateWordSToByteS(op2
.xmm16s(4));
1633 result
.xmmsbyte(0xD) = SaturateWordSToByteS(op2
.xmm16s(5));
1634 result
.xmmsbyte(0xE) = SaturateWordSToByteS(op2
.xmm16s(6));
1635 result
.xmmsbyte(0xF) = SaturateWordSToByteS(op2
.xmm16s(7));
1637 /* now write result back to destination */
1638 BX_WRITE_XMM_REG(i
->nnn(), result
);
1640 BX_INFO(("PACKSSWB_VdqWdq: required SSE2, use --enable-sse option"));
1641 exception(BX_UD_EXCEPTION
, 0, 0);
1646 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPGTB_VdqWdq(bxInstruction_c
*i
)
1648 #if BX_SUPPORT_SSE >= 2
1649 BX_CPU_THIS_PTR
prepareSSE();
1651 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1653 /* op2 is a register or memory reference */
1655 op2
= BX_READ_XMM_REG(i
->rm());
1658 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1659 /* pointer, segment address pair */
1660 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1663 for(unsigned j
=0; j
<16; j
++) {
1664 op1
.xmmubyte(j
) = (op1
.xmmsbyte(j
) > op2
.xmmsbyte(j
)) ? 0xff : 0;
1667 /* now write result back to destination */
1668 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1670 BX_INFO(("PCMPGTB_VdqWdq: required SSE2, use --enable-sse option"));
1671 exception(BX_UD_EXCEPTION
, 0, 0);
1676 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPGTW_VdqWdq(bxInstruction_c
*i
)
1678 #if BX_SUPPORT_SSE >= 2
1679 BX_CPU_THIS_PTR
prepareSSE();
1681 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1683 /* op2 is a register or memory reference */
1685 op2
= BX_READ_XMM_REG(i
->rm());
1688 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1689 /* pointer, segment address pair */
1690 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1693 op1
.xmm16u(0) = (op1
.xmm16s(0) > op2
.xmm16s(0)) ? 0xffff : 0;
1694 op1
.xmm16u(1) = (op1
.xmm16s(1) > op2
.xmm16s(1)) ? 0xffff : 0;
1695 op1
.xmm16u(2) = (op1
.xmm16s(2) > op2
.xmm16s(2)) ? 0xffff : 0;
1696 op1
.xmm16u(3) = (op1
.xmm16s(3) > op2
.xmm16s(3)) ? 0xffff : 0;
1697 op1
.xmm16u(4) = (op1
.xmm16s(4) > op2
.xmm16s(4)) ? 0xffff : 0;
1698 op1
.xmm16u(5) = (op1
.xmm16s(5) > op2
.xmm16s(5)) ? 0xffff : 0;
1699 op1
.xmm16u(6) = (op1
.xmm16s(6) > op2
.xmm16s(6)) ? 0xffff : 0;
1700 op1
.xmm16u(7) = (op1
.xmm16s(7) > op2
.xmm16s(7)) ? 0xffff : 0;
1702 /* now write result back to destination */
1703 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1705 BX_INFO(("PCMPGTW_VdqWdq: required SSE2, use --enable-sse option"));
1706 exception(BX_UD_EXCEPTION
, 0, 0);
1711 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPGTD_VdqWdq(bxInstruction_c
*i
)
1713 #if BX_SUPPORT_SSE >= 2
1714 BX_CPU_THIS_PTR
prepareSSE();
1716 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1718 /* op2 is a register or memory reference */
1720 op2
= BX_READ_XMM_REG(i
->rm());
1723 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1724 /* pointer, segment address pair */
1725 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1728 op1
.xmm32u(0) = (op1
.xmm32s(0) > op2
.xmm32s(0)) ? 0xffffffff : 0;
1729 op1
.xmm32u(1) = (op1
.xmm32s(1) > op2
.xmm32s(1)) ? 0xffffffff : 0;
1730 op1
.xmm32u(2) = (op1
.xmm32s(2) > op2
.xmm32s(2)) ? 0xffffffff : 0;
1731 op1
.xmm32u(3) = (op1
.xmm32s(3) > op2
.xmm32s(3)) ? 0xffffffff : 0;
1733 /* now write result back to destination */
1734 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1736 BX_INFO(("PCMPGTD_VdqWdq: required SSE2, use --enable-sse option"));
1737 exception(BX_UD_EXCEPTION
, 0, 0);
1742 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKUSWB_VdqWdq(bxInstruction_c
*i
)
1744 #if BX_SUPPORT_SSE >= 2
1745 BX_CPU_THIS_PTR
prepareSSE();
1747 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1749 /* op2 is a register or memory reference */
1751 op2
= BX_READ_XMM_REG(i
->rm());
1754 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1755 /* pointer, segment address pair */
1756 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1759 result
.xmmubyte(0x0) = SaturateWordSToByteU(op1
.xmm16s(0));
1760 result
.xmmubyte(0x1) = SaturateWordSToByteU(op1
.xmm16s(1));
1761 result
.xmmubyte(0x2) = SaturateWordSToByteU(op1
.xmm16s(2));
1762 result
.xmmubyte(0x3) = SaturateWordSToByteU(op1
.xmm16s(3));
1763 result
.xmmubyte(0x4) = SaturateWordSToByteU(op1
.xmm16s(4));
1764 result
.xmmubyte(0x5) = SaturateWordSToByteU(op1
.xmm16s(5));
1765 result
.xmmubyte(0x6) = SaturateWordSToByteU(op1
.xmm16s(6));
1766 result
.xmmubyte(0x7) = SaturateWordSToByteU(op1
.xmm16s(7));
1768 result
.xmmubyte(0x8) = SaturateWordSToByteU(op2
.xmm16s(0));
1769 result
.xmmubyte(0x9) = SaturateWordSToByteU(op2
.xmm16s(1));
1770 result
.xmmubyte(0xA) = SaturateWordSToByteU(op2
.xmm16s(2));
1771 result
.xmmubyte(0xB) = SaturateWordSToByteU(op2
.xmm16s(3));
1772 result
.xmmubyte(0xC) = SaturateWordSToByteU(op2
.xmm16s(4));
1773 result
.xmmubyte(0xD) = SaturateWordSToByteU(op2
.xmm16s(5));
1774 result
.xmmubyte(0xE) = SaturateWordSToByteU(op2
.xmm16s(6));
1775 result
.xmmubyte(0xF) = SaturateWordSToByteU(op2
.xmm16s(7));
1777 /* now write result back to destination */
1778 BX_WRITE_XMM_REG(i
->nnn(), result
);
1780 BX_INFO(("PACKUSWB_VdqWdq: required SSE2, use --enable-sse option"));
1781 exception(BX_UD_EXCEPTION
, 0, 0);
1786 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKHBW_VdqWdq(bxInstruction_c
*i
)
1788 #if BX_SUPPORT_SSE >= 2
1789 BX_CPU_THIS_PTR
prepareSSE();
1791 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1793 /* op2 is a register or memory reference */
1795 op2
= BX_READ_XMM_REG(i
->rm());
1798 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1799 /* pointer, segment address pair */
1800 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1803 result
.xmmubyte(0x0) = op1
.xmmubyte(0x8);
1804 result
.xmmubyte(0x1) = op2
.xmmubyte(0x8);
1805 result
.xmmubyte(0x2) = op1
.xmmubyte(0x9);
1806 result
.xmmubyte(0x3) = op2
.xmmubyte(0x9);
1807 result
.xmmubyte(0x4) = op1
.xmmubyte(0xA);
1808 result
.xmmubyte(0x5) = op2
.xmmubyte(0xA);
1809 result
.xmmubyte(0x6) = op1
.xmmubyte(0xB);
1810 result
.xmmubyte(0x7) = op2
.xmmubyte(0xB);
1811 result
.xmmubyte(0x8) = op1
.xmmubyte(0xC);
1812 result
.xmmubyte(0x9) = op2
.xmmubyte(0xC);
1813 result
.xmmubyte(0xA) = op1
.xmmubyte(0xD);
1814 result
.xmmubyte(0xB) = op2
.xmmubyte(0xD);
1815 result
.xmmubyte(0xC) = op1
.xmmubyte(0xE);
1816 result
.xmmubyte(0xD) = op2
.xmmubyte(0xE);
1817 result
.xmmubyte(0xE) = op1
.xmmubyte(0xF);
1818 result
.xmmubyte(0xF) = op2
.xmmubyte(0xF);
1820 /* now write result back to destination */
1821 BX_WRITE_XMM_REG(i
->nnn(), result
);
1823 BX_INFO(("PUNPCKHBW_VdqWdq: required SSE2, use --enable-sse option"));
1824 exception(BX_UD_EXCEPTION
, 0, 0);
1829 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKHWD_VdqWdq(bxInstruction_c
*i
)
1831 #if BX_SUPPORT_SSE >= 2
1832 BX_CPU_THIS_PTR
prepareSSE();
1834 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1836 /* op2 is a register or memory reference */
1838 op2
= BX_READ_XMM_REG(i
->rm());
1841 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1842 /* pointer, segment address pair */
1843 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1846 result
.xmm16u(0) = op1
.xmm16u(4);
1847 result
.xmm16u(1) = op2
.xmm16u(4);
1848 result
.xmm16u(2) = op1
.xmm16u(5);
1849 result
.xmm16u(3) = op2
.xmm16u(5);
1850 result
.xmm16u(4) = op1
.xmm16u(6);
1851 result
.xmm16u(5) = op2
.xmm16u(6);
1852 result
.xmm16u(6) = op1
.xmm16u(7);
1853 result
.xmm16u(7) = op2
.xmm16u(7);
1855 /* now write result back to destination */
1856 BX_WRITE_XMM_REG(i
->nnn(), result
);
1858 BX_INFO(("PUNPCKHWD_VdqWdq: required SSE2, use --enable-sse option"));
1859 exception(BX_UD_EXCEPTION
, 0, 0);
1863 /* UNPCKHPS: 0F 15 */
1864 /* PUNPCKHDQ: 66 0F 6A */
1865 void BX_CPP_AttrRegparmN(1) BX_CPU_C::UNPCKHPS_VpsWdq(bxInstruction_c
*i
)
1867 #if BX_SUPPORT_SSE >= 1
1868 BX_CPU_THIS_PTR
prepareSSE();
1870 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1872 /* op2 is a register or memory reference */
1874 op2
= BX_READ_XMM_REG(i
->rm());
1877 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1878 /* pointer, segment address pair */
1879 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1882 result
.xmm32u(0) = op1
.xmm32u(2);
1883 result
.xmm32u(1) = op2
.xmm32u(2);
1884 result
.xmm32u(2) = op1
.xmm32u(3);
1885 result
.xmm32u(3) = op2
.xmm32u(3);
1887 /* now write result back to destination */
1888 BX_WRITE_XMM_REG(i
->nnn(), result
);
1890 BX_INFO(("UNPCKHPS_VpsWdq: required SSE, use --enable-sse option"));
1891 exception(BX_UD_EXCEPTION
, 0, 0);
1896 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKSSDW_VdqWdq(bxInstruction_c
*i
)
1898 #if BX_SUPPORT_SSE >= 2
1899 BX_CPU_THIS_PTR
prepareSSE();
1901 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1903 /* op2 is a register or memory reference */
1905 op2
= BX_READ_XMM_REG(i
->rm());
1908 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1909 /* pointer, segment address pair */
1910 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1913 result
.xmm16s(0) = SaturateDwordSToWordS(op1
.xmm32s(0));
1914 result
.xmm16s(1) = SaturateDwordSToWordS(op1
.xmm32s(1));
1915 result
.xmm16s(2) = SaturateDwordSToWordS(op1
.xmm32s(2));
1916 result
.xmm16s(3) = SaturateDwordSToWordS(op1
.xmm32s(3));
1918 result
.xmm16s(4) = SaturateDwordSToWordS(op2
.xmm32s(0));
1919 result
.xmm16s(5) = SaturateDwordSToWordS(op2
.xmm32s(1));
1920 result
.xmm16s(6) = SaturateDwordSToWordS(op2
.xmm32s(2));
1921 result
.xmm16s(7) = SaturateDwordSToWordS(op2
.xmm32s(3));
1923 /* now write result back to destination */
1924 BX_WRITE_XMM_REG(i
->nnn(), result
);
1926 BX_INFO(("PACKSSDW_VdqWdq: required SSE2, use --enable-sse option"));
1927 exception(BX_UD_EXCEPTION
, 0, 0);
1931 /* UNPCKLPD: 66 0F 14 */
1932 /* PUNPCKLQDQ: 66 0F 6C */
1933 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKLQDQ_VdqWdq(bxInstruction_c
*i
)
1935 #if BX_SUPPORT_SSE >= 2
1936 BX_CPU_THIS_PTR
prepareSSE();
1938 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1940 /* op2 is a register or memory reference */
1942 op2
= BX_READ_XMM_REG(i
->rm());
1945 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1946 /* pointer, segment address pair */
1947 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1950 op1
.xmm64u(1) = op2
.xmm64u(0);
1952 /* now write result back to destination */
1953 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1955 BX_INFO(("PUNPCKLQDQ_VdqWdq: required SSE2, use --enable-sse option"));
1956 exception(BX_UD_EXCEPTION
, 0, 0);
1960 /* UNPCKHPD: 66 0F 15 */
1961 /* PUNPCKHQDQ: 66 0F 6D */
1962 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKHQDQ_VdqWdq(bxInstruction_c
*i
)
1964 #if BX_SUPPORT_SSE >= 2
1965 BX_CPU_THIS_PTR
prepareSSE();
1967 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1969 /* op2 is a register or memory reference */
1971 op2
= BX_READ_XMM_REG(i
->rm());
1974 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1975 /* pointer, segment address pair */
1976 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1979 result
.xmm64u(0) = op1
.xmm64u(1);
1980 result
.xmm64u(1) = op2
.xmm64u(1);
1982 /* now write result back to destination */
1983 BX_WRITE_XMM_REG(i
->nnn(), result
);
1985 BX_INFO(("PUNPCKHQDQ_VdqWdq: required SSE2, use --enable-sse option"));
1986 exception(BX_UD_EXCEPTION
, 0, 0);
1991 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSHUFD_VdqWdqIb(bxInstruction_c
*i
)
1993 #if BX_SUPPORT_SSE >= 2
1994 BX_CPU_THIS_PTR
prepareSSE();
1996 BxPackedXmmRegister op
, result
;
1997 Bit8u order
= i
->Ib();
1999 /* op is a register or memory reference */
2001 op
= BX_READ_XMM_REG(i
->rm());
2004 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2005 /* pointer, segment address pair */
2006 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
2009 result
.xmm32u(0) = op
.xmm32u((order
>> 0) & 0x3);
2010 result
.xmm32u(1) = op
.xmm32u((order
>> 2) & 0x3);
2011 result
.xmm32u(2) = op
.xmm32u((order
>> 4) & 0x3);
2012 result
.xmm32u(3) = op
.xmm32u((order
>> 6) & 0x3);
2014 /* now write result back to destination */
2015 BX_WRITE_XMM_REG(i
->nnn(), result
);
2017 BX_INFO(("PSHUFD_VdqWdqIb: required SSE2, use --enable-sse option"));
2018 exception(BX_UD_EXCEPTION
, 0, 0);
2023 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSHUFHW_VdqWdqIb(bxInstruction_c
*i
)
2025 #if BX_SUPPORT_SSE >= 2
2026 BX_CPU_THIS_PTR
prepareSSE();
2028 BxPackedXmmRegister op
, result
;
2029 Bit8u order
= i
->Ib();
2031 /* op is a register or memory reference */
2033 op
= BX_READ_XMM_REG(i
->rm());
2036 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2037 /* pointer, segment address pair */
2038 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
2041 result
.xmm64u(0) = op
.xmm64u(0);
2042 result
.xmm16u(4) = op
.xmm16u(4 + ((order
>> 0) & 0x3));
2043 result
.xmm16u(5) = op
.xmm16u(4 + ((order
>> 2) & 0x3));
2044 result
.xmm16u(6) = op
.xmm16u(4 + ((order
>> 4) & 0x3));
2045 result
.xmm16u(7) = op
.xmm16u(4 + ((order
>> 6) & 0x3));
2047 /* now write result back to destination */
2048 BX_WRITE_XMM_REG(i
->nnn(), result
);
2050 BX_INFO(("PSHUFHW_VdqWdqIb: required SSE2, use --enable-sse option"));
2051 exception(BX_UD_EXCEPTION
, 0, 0);
2056 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSHUFLW_VdqWdqIb(bxInstruction_c
*i
)
2058 #if BX_SUPPORT_SSE >= 2
2059 BX_CPU_THIS_PTR
prepareSSE();
2061 BxPackedXmmRegister op
, result
;
2062 Bit8u order
= i
->Ib();
2064 /* op is a register or memory reference */
2066 op
= BX_READ_XMM_REG(i
->rm());
2069 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2070 /* pointer, segment address pair */
2071 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
2074 result
.xmm16u(0) = op
.xmm16u((order
>> 0) & 0x3);
2075 result
.xmm16u(1) = op
.xmm16u((order
>> 2) & 0x3);
2076 result
.xmm16u(2) = op
.xmm16u((order
>> 4) & 0x3);
2077 result
.xmm16u(3) = op
.xmm16u((order
>> 6) & 0x3);
2078 result
.xmm64u(1) = op
.xmm64u(1);
2080 /* now write result back to destination */
2081 BX_WRITE_XMM_REG(i
->nnn(), result
);
2083 BX_INFO(("PSHUFLW_VdqWdqIb: required SSE, use --enable-sse option"));
2084 exception(BX_UD_EXCEPTION
, 0, 0);
2089 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPEQB_VdqWdq(bxInstruction_c
*i
)
2091 #if BX_SUPPORT_SSE >= 2
2092 BX_CPU_THIS_PTR
prepareSSE();
2094 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2096 /* op2 is a register or memory reference */
2098 op2
= BX_READ_XMM_REG(i
->rm());
2101 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2102 /* pointer, segment address pair */
2103 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2106 for(unsigned j
=0; j
<16; j
++) {
2107 op1
.xmmubyte(j
) = (op1
.xmmubyte(j
) == op2
.xmmubyte(j
)) ? 0xff : 0;
2110 /* now write result back to destination */
2111 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2113 BX_INFO(("PCMPEQB_VdqWdq: required SSE2, use --enable-sse option"));
2114 exception(BX_UD_EXCEPTION
, 0, 0);
2119 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPEQW_VdqWdq(bxInstruction_c
*i
)
2121 #if BX_SUPPORT_SSE >= 2
2122 BX_CPU_THIS_PTR
prepareSSE();
2124 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2126 /* op2 is a register or memory reference */
2128 op2
= BX_READ_XMM_REG(i
->rm());
2131 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2132 /* pointer, segment address pair */
2133 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2136 op1
.xmm16u(0) = (op1
.xmm16u(0) == op2
.xmm16u(0)) ? 0xffff : 0;
2137 op1
.xmm16u(1) = (op1
.xmm16u(1) == op2
.xmm16u(1)) ? 0xffff : 0;
2138 op1
.xmm16u(2) = (op1
.xmm16u(2) == op2
.xmm16u(2)) ? 0xffff : 0;
2139 op1
.xmm16u(3) = (op1
.xmm16u(3) == op2
.xmm16u(3)) ? 0xffff : 0;
2140 op1
.xmm16u(4) = (op1
.xmm16u(4) == op2
.xmm16u(4)) ? 0xffff : 0;
2141 op1
.xmm16u(5) = (op1
.xmm16u(5) == op2
.xmm16u(5)) ? 0xffff : 0;
2142 op1
.xmm16u(6) = (op1
.xmm16u(6) == op2
.xmm16u(6)) ? 0xffff : 0;
2143 op1
.xmm16u(7) = (op1
.xmm16u(7) == op2
.xmm16u(7)) ? 0xffff : 0;
2145 /* now write result back to destination */
2146 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2148 BX_INFO(("PCMPEQW_VdqWdq: required SSE2, use --enable-sse option"));
2149 exception(BX_UD_EXCEPTION
, 0, 0);
2154 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPEQD_VdqWdq(bxInstruction_c
*i
)
2156 #if BX_SUPPORT_SSE >= 2
2157 BX_CPU_THIS_PTR
prepareSSE();
2159 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2161 /* op2 is a register or memory reference */
2163 op2
= BX_READ_XMM_REG(i
->rm());
2166 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2167 /* pointer, segment address pair */
2168 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2171 op1
.xmm32u(0) = (op1
.xmm32u(0) == op2
.xmm32u(0)) ? 0xffffffff : 0;
2172 op1
.xmm32u(1) = (op1
.xmm32u(1) == op2
.xmm32u(1)) ? 0xffffffff : 0;
2173 op1
.xmm32u(2) = (op1
.xmm32u(2) == op2
.xmm32u(2)) ? 0xffffffff : 0;
2174 op1
.xmm32u(3) = (op1
.xmm32u(3) == op2
.xmm32u(3)) ? 0xffffffff : 0;
2176 /* now write result back to destination */
2177 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2179 BX_INFO(("PCMPEQD_VdqWdq: required SSE2, use --enable-sse option"));
2180 exception(BX_UD_EXCEPTION
, 0, 0);
2185 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PINSRW_VdqEwIb(bxInstruction_c
*i
)
2187 #if BX_SUPPORT_SSE >= 2
2188 BX_CPU_THIS_PTR
prepareSSE();
2190 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn());
2192 Bit8u count
= i
->Ib() & 0x7;
2194 /* op2 is a register or memory reference */
2196 op2
= BX_READ_16BIT_REG(i
->rm());
2199 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2200 /* pointer, segment address pair */
2201 op2
= read_virtual_word(i
->seg(), eaddr
);
2204 op1
.xmm16u(count
) = op2
;
2206 /* now write result back to destination */
2207 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2209 BX_INFO(("PINSRW_VdqEdIb: required SSE2, use --enable-sse option"));
2210 exception(BX_UD_EXCEPTION
, 0, 0);
2215 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_GdUdqIb(bxInstruction_c
*i
)
2217 #if BX_SUPPORT_SSE >= 2
2218 BX_CPU_THIS_PTR
prepareSSE();
2220 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
2221 Bit8u count
= i
->Ib() & 0x7;
2222 Bit32u result
= (Bit32u
) op
.xmm16u(count
);
2224 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
2226 BX_INFO(("PEXTRW_GdUdqIb: required SSE2, use --enable-sse option"));
2227 exception(BX_UD_EXCEPTION
, 0, 0);
2232 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SHUFPS_VpsWpsIb(bxInstruction_c
*i
)
2234 #if BX_SUPPORT_SSE >= 1
2235 BX_CPU_THIS_PTR
prepareSSE();
2237 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2238 Bit8u order
= i
->Ib();
2240 /* op2 is a register or memory reference */
2242 op2
= BX_READ_XMM_REG(i
->rm());
2245 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2246 /* pointer, segment address pair */
2247 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2250 result
.xmm32u(0) = op1
.xmm32u((order
>> 0) & 0x3);
2251 result
.xmm32u(1) = op1
.xmm32u((order
>> 2) & 0x3);
2252 result
.xmm32u(2) = op2
.xmm32u((order
>> 4) & 0x3);
2253 result
.xmm32u(3) = op2
.xmm32u((order
>> 6) & 0x3);
2255 /* now write result back to destination */
2256 BX_WRITE_XMM_REG(i
->nnn(), result
);
2258 BX_INFO(("SHUFPS_VpsWpsIb: required SSE, use --enable-sse option"));
2259 exception(BX_UD_EXCEPTION
, 0, 0);
2264 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SHUFPD_VpdWpdIb(bxInstruction_c
*i
)
2266 #if BX_SUPPORT_SSE >= 2
2267 BX_CPU_THIS_PTR
prepareSSE();
2269 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2270 Bit8u order
= i
->Ib();
2272 /* op2 is a register or memory reference */
2274 op2
= BX_READ_XMM_REG(i
->rm());
2277 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2278 /* pointer, segment address pair */
2279 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2282 result
.xmm64u(0) = op1
.xmm64u((order
>> 0) & 0x1);
2283 result
.xmm64u(1) = op2
.xmm64u((order
>> 1) & 0x1);
2285 /* now write result back to destination */
2286 BX_WRITE_XMM_REG(i
->nnn(), result
);
2288 BX_INFO(("SHUFPD_VpdWpdIb: required SSE2, use --enable-sse option"));
2289 exception(BX_UD_EXCEPTION
, 0, 0);
2294 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLW_VdqWdq(bxInstruction_c
*i
)
2296 #if BX_SUPPORT_SSE >= 2
2297 BX_CPU_THIS_PTR
prepareSSE();
2299 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2301 /* op2 is a register or memory reference */
2303 op2
= BX_READ_XMM_REG(i
->rm());
2306 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2307 /* pointer, segment address pair */
2308 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2311 if(op2
.xmm64u(0) > 15) /* looking only to low 64 bits */
2318 Bit8u shift
= op2
.xmmubyte(0);
2320 op1
.xmm16u(0) >>= shift
;
2321 op1
.xmm16u(1) >>= shift
;
2322 op1
.xmm16u(2) >>= shift
;
2323 op1
.xmm16u(3) >>= shift
;
2324 op1
.xmm16u(4) >>= shift
;
2325 op1
.xmm16u(5) >>= shift
;
2326 op1
.xmm16u(6) >>= shift
;
2327 op1
.xmm16u(7) >>= shift
;
2330 /* now write result back to destination */
2331 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2333 BX_INFO(("PSRLW_VdqWdq: required SSE2, use --enable-sse option"));
2334 exception(BX_UD_EXCEPTION
, 0, 0);
2339 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLD_VdqWdq(bxInstruction_c
*i
)
2341 #if BX_SUPPORT_SSE >= 2
2342 BX_CPU_THIS_PTR
prepareSSE();
2344 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2346 /* op2 is a register or memory reference */
2348 op2
= BX_READ_XMM_REG(i
->rm());
2351 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2352 /* pointer, segment address pair */
2353 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2356 if(op2
.xmm64u(0) > 31) /* looking only to low 64 bits */
2363 Bit8u shift
= op2
.xmmubyte(0);
2365 op1
.xmm32u(0) >>= shift
;
2366 op1
.xmm32u(1) >>= shift
;
2367 op1
.xmm32u(2) >>= shift
;
2368 op1
.xmm32u(3) >>= shift
;
2371 /* now write result back to destination */
2372 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2374 BX_INFO(("PSRLD_VdqWdq: required SSE2, use --enable-sse option"));
2375 exception(BX_UD_EXCEPTION
, 0, 0);
2380 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLQ_VdqWdq(bxInstruction_c
*i
)
2382 #if BX_SUPPORT_SSE >= 2
2383 BX_CPU_THIS_PTR
prepareSSE();
2385 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2387 /* op2 is a register or memory reference */
2389 op2
= BX_READ_XMM_REG(i
->rm());
2392 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2393 /* pointer, segment address pair */
2394 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2397 if(op2
.xmm64u(0) > 63) /* looking only to low 64 bits */
2404 Bit8u shift
= op2
.xmmubyte(0);
2406 op1
.xmm64u(0) >>= shift
;
2407 op1
.xmm64u(1) >>= shift
;
2410 /* now write result back to destination */
2411 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2413 BX_INFO(("PSRLQ_VdqWdq: required SSE2, use --enable-sse option"));
2414 exception(BX_UD_EXCEPTION
, 0, 0);
2419 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDQ_VdqWdq(bxInstruction_c
*i
)
2421 #if BX_SUPPORT_SSE >= 2
2422 BX_CPU_THIS_PTR
prepareSSE();
2424 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2426 /* op2 is a register or memory reference */
2428 op2
= BX_READ_XMM_REG(i
->rm());
2431 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2432 /* pointer, segment address pair */
2433 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2436 op1
.xmm64u(0) += op2
.xmm64u(0);
2437 op1
.xmm64u(1) += op2
.xmm64u(1);
2439 /* now write result back to destination */
2440 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2442 BX_INFO(("PADDQ_VdqWdq: required SSE2, use --enable-sse option"));
2443 exception(BX_UD_EXCEPTION
, 0, 0);
2448 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULLW_VdqWdq(bxInstruction_c
*i
)
2450 #if BX_SUPPORT_SSE >= 2
2451 BX_CPU_THIS_PTR
prepareSSE();
2453 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2455 /* op2 is a register or memory reference */
2457 op2
= BX_READ_XMM_REG(i
->rm());
2460 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2461 /* pointer, segment address pair */
2462 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2465 Bit32u product1
= Bit32u(op1
.xmm16u(0)) * Bit32u(op2
.xmm16u(0));
2466 Bit32u product2
= Bit32u(op1
.xmm16u(1)) * Bit32u(op2
.xmm16u(1));
2467 Bit32u product3
= Bit32u(op1
.xmm16u(2)) * Bit32u(op2
.xmm16u(2));
2468 Bit32u product4
= Bit32u(op1
.xmm16u(3)) * Bit32u(op2
.xmm16u(3));
2469 Bit32u product5
= Bit32u(op1
.xmm16u(4)) * Bit32u(op2
.xmm16u(4));
2470 Bit32u product6
= Bit32u(op1
.xmm16u(5)) * Bit32u(op2
.xmm16u(5));
2471 Bit32u product7
= Bit32u(op1
.xmm16u(6)) * Bit32u(op2
.xmm16u(6));
2472 Bit32u product8
= Bit32u(op1
.xmm16u(7)) * Bit32u(op2
.xmm16u(7));
2474 op1
.xmm16u(0) = product1
& 0xffff;
2475 op1
.xmm16u(1) = product2
& 0xffff;
2476 op1
.xmm16u(2) = product3
& 0xffff;
2477 op1
.xmm16u(3) = product4
& 0xffff;
2478 op1
.xmm16u(4) = product5
& 0xffff;
2479 op1
.xmm16u(5) = product6
& 0xffff;
2480 op1
.xmm16u(6) = product7
& 0xffff;
2481 op1
.xmm16u(7) = product8
& 0xffff;
2483 /* now write result back to destination */
2484 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2486 BX_INFO(("PMULLW_VdqWdq: required SSE2, use --enable-sse option"));
2487 exception(BX_UD_EXCEPTION
, 0, 0);
2492 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBUSB_VdqWdq(bxInstruction_c
*i
)
2494 #if BX_SUPPORT_SSE >= 2
2495 BX_CPU_THIS_PTR
prepareSSE();
2497 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2499 /* op2 is a register or memory reference */
2501 op2
= BX_READ_XMM_REG(i
->rm());
2504 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2505 /* pointer, segment address pair */
2506 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2509 result
.xmm64u(0) = result
.xmm64u(1) = 0;
2511 for(unsigned j
=0; j
<16; j
++)
2513 if(op1
.xmmubyte(j
) > op2
.xmmubyte(j
))
2515 result
.xmmubyte(j
) = op1
.xmmubyte(j
) - op2
.xmmubyte(j
);
2519 /* now write result back to destination */
2520 BX_WRITE_XMM_REG(i
->nnn(), result
);
2522 BX_INFO(("PSUBUSB_VdqWdq: required SSE2, use --enable-sse option"));
2523 exception(BX_UD_EXCEPTION
, 0, 0);
2528 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBUSW_VdqWdq(bxInstruction_c
*i
)
2530 #if BX_SUPPORT_SSE >= 2
2531 BX_CPU_THIS_PTR
prepareSSE();
2533 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2535 /* op2 is a register or memory reference */
2537 op2
= BX_READ_XMM_REG(i
->rm());
2540 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2541 /* pointer, segment address pair */
2542 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2545 result
.xmm64u(0) = result
.xmm64u(1) = 0;
2547 for(unsigned j
=0; j
<8; j
++)
2549 if(op1
.xmm16u(j
) > op2
.xmm16u(j
))
2551 result
.xmm16u(j
) = op1
.xmm16u(j
) - op2
.xmm16u(j
);
2555 /* now write result back to destination */
2556 BX_WRITE_XMM_REG(i
->nnn(), result
);
2558 BX_INFO(("PSUBUSW_VdqWdq: required SSE2, use --enable-sse option"));
2559 exception(BX_UD_EXCEPTION
, 0, 0);
2564 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINUB_VdqWdq(bxInstruction_c
*i
)
2566 #if BX_SUPPORT_SSE >= 2
2567 BX_CPU_THIS_PTR
prepareSSE();
2569 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2571 /* op2 is a register or memory reference */
2573 op2
= BX_READ_XMM_REG(i
->rm());
2576 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2577 /* pointer, segment address pair */
2578 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2581 for(unsigned j
=0; j
<16; j
++) {
2582 if(op2
.xmmubyte(j
) < op1
.xmmubyte(j
)) op1
.xmmubyte(j
) = op2
.xmmubyte(j
);
2585 /* now write result back to destination */
2586 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2588 BX_INFO(("PMINUB_VdqWdq: required SSE2, use --enable-sse option"));
2589 exception(BX_UD_EXCEPTION
, 0, 0);
2594 /* ANDPD: 66 0F 54 */
2595 /* PAND: 66 0F DB */
2596 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ANDPS_VpsWps(bxInstruction_c
*i
)
2598 #if BX_SUPPORT_SSE >= 1
2599 BX_CPU_THIS_PTR
prepareSSE();
2601 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2603 /* op2 is a register or memory reference */
2605 op2
= BX_READ_XMM_REG(i
->rm());
2608 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2609 /* pointer, segment address pair */
2610 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2613 op1
.xmm64u(0) &= op2
.xmm64u(0);
2614 op1
.xmm64u(1) &= op2
.xmm64u(1);
2616 /* now write result back to destination */
2617 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2619 BX_INFO(("ANDPS_VpsWps: required SSE, use --enable-sse option"));
2620 exception(BX_UD_EXCEPTION
, 0, 0);
2625 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDUSB_VdqWdq(bxInstruction_c
*i
)
2627 #if BX_SUPPORT_SSE >= 2
2628 BX_CPU_THIS_PTR
prepareSSE();
2630 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2632 /* op2 is a register or memory reference */
2634 op2
= BX_READ_XMM_REG(i
->rm());
2637 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2638 /* pointer, segment address pair */
2639 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2642 for(unsigned j
=0; j
<16; j
++) {
2643 op1
.xmmubyte(j
) = SaturateWordSToByteU(Bit16s(op1
.xmmubyte(j
)) + Bit16s(op2
.xmmubyte(j
)));
2646 /* now write result back to destination */
2647 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2649 BX_INFO(("PADDUSB_VdqWdq: required SSE2, use --enable-sse option"));
2650 exception(BX_UD_EXCEPTION
, 0, 0);
2655 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDUSW_VdqWdq(bxInstruction_c
*i
)
2657 #if BX_SUPPORT_SSE >= 2
2658 BX_CPU_THIS_PTR
prepareSSE();
2660 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2662 /* op2 is a register or memory reference */
2664 op2
= BX_READ_XMM_REG(i
->rm());
2667 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2668 /* pointer, segment address pair */
2669 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2672 op1
.xmm16u(0) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(0)) + Bit32s(op2
.xmm16u(0)));
2673 op1
.xmm16u(1) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(1)) + Bit32s(op2
.xmm16u(1)));
2674 op1
.xmm16u(2) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(2)) + Bit32s(op2
.xmm16u(2)));
2675 op1
.xmm16u(3) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(3)) + Bit32s(op2
.xmm16u(3)));
2676 op1
.xmm16u(4) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(4)) + Bit32s(op2
.xmm16u(4)));
2677 op1
.xmm16u(5) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(5)) + Bit32s(op2
.xmm16u(5)));
2678 op1
.xmm16u(6) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(6)) + Bit32s(op2
.xmm16u(6)));
2679 op1
.xmm16u(7) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(7)) + Bit32s(op2
.xmm16u(7)));
2681 /* now write result back to destination */
2682 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2684 BX_INFO(("PADDUSW_VdqWdq: required SSE2, use --enable-sse option"));
2685 exception(BX_UD_EXCEPTION
, 0, 0);
2690 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXUB_VdqWdq(bxInstruction_c
*i
)
2692 #if BX_SUPPORT_SSE >= 2
2693 BX_CPU_THIS_PTR
prepareSSE();
2695 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2697 /* op2 is a register or memory reference */
2699 op2
= BX_READ_XMM_REG(i
->rm());
2702 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2703 /* pointer, segment address pair */
2704 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2707 for(unsigned j
=0; j
<16; j
++) {
2708 if(op2
.xmmubyte(j
) > op1
.xmmubyte(j
)) op1
.xmmubyte(j
) = op2
.xmmubyte(j
);
2711 /* now write result back to destination */
2712 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2714 BX_INFO(("PMAXUB_VdqWdq: required SSE2, use --enable-sse option"));
2715 exception(BX_UD_EXCEPTION
, 0, 0);
2720 /* ANDNPD: 66 0F 55 */
2721 /* PANDN: 66 0F DF */
2722 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ANDNPS_VpsWps(bxInstruction_c
*i
)
2724 #if BX_SUPPORT_SSE >= 1
2725 BX_CPU_THIS_PTR
prepareSSE();
2727 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2729 /* op2 is a register or memory reference */
2731 op2
= BX_READ_XMM_REG(i
->rm());
2734 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2735 /* pointer, segment address pair */
2736 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2739 op1
.xmm64u(0) = ~(op1
.xmm64u(0)) & op2
.xmm64u(0);
2740 op1
.xmm64u(1) = ~(op1
.xmm64u(1)) & op2
.xmm64u(1);
2742 /* now write result back to destination */
2743 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2745 BX_INFO(("ANDNPS_VpsWps: required SSE, use --enable-sse option"));
2746 exception(BX_UD_EXCEPTION
, 0, 0);
2751 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PAVGB_VdqWdq(bxInstruction_c
*i
)
2754 BX_CPU_THIS_PTR
prepareSSE();
2756 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2758 /* op2 is a register or memory reference */
2760 op2
= BX_READ_XMM_REG(i
->rm());
2763 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2764 /* pointer, segment address pair */
2765 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2768 for(unsigned j
=0; j
<16; j
++) {
2769 op1
.xmmubyte(j
) = (op1
.xmmubyte(j
) + op2
.xmmubyte(j
) + 1) >> 1;
2772 /* now write result back to destination */
2773 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2775 BX_INFO(("PAVGB_VdqWdq: required SSE, use --enable-sse option"));
2776 exception(BX_UD_EXCEPTION
, 0, 0);
2781 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAW_VdqWdq(bxInstruction_c
*i
)
2783 #if BX_SUPPORT_SSE >= 2
2784 BX_CPU_THIS_PTR
prepareSSE();
2786 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2788 /* op2 is a register or memory reference */
2790 op2
= BX_READ_XMM_REG(i
->rm());
2793 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2794 /* pointer, segment address pair */
2795 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2798 if(op2
.xmm64u(0) == 0) return;
2800 if(op2
.xmm64u(0) > 15) /* looking only to low 64 bits */
2802 result
.xmm16u(0) = (op1
.xmm16u(0) & 0x8000) ? 0xffff : 0;
2803 result
.xmm16u(1) = (op1
.xmm16u(1) & 0x8000) ? 0xffff : 0;
2804 result
.xmm16u(2) = (op1
.xmm16u(2) & 0x8000) ? 0xffff : 0;
2805 result
.xmm16u(3) = (op1
.xmm16u(3) & 0x8000) ? 0xffff : 0;
2806 result
.xmm16u(4) = (op1
.xmm16u(4) & 0x8000) ? 0xffff : 0;
2807 result
.xmm16u(5) = (op1
.xmm16u(5) & 0x8000) ? 0xffff : 0;
2808 result
.xmm16u(6) = (op1
.xmm16u(6) & 0x8000) ? 0xffff : 0;
2809 result
.xmm16u(7) = (op1
.xmm16u(7) & 0x8000) ? 0xffff : 0;
2813 Bit8u shift
= op2
.xmmubyte(0);
2815 result
.xmm16u(0) = op1
.xmm16u(0) >> shift
;
2816 result
.xmm16u(1) = op1
.xmm16u(1) >> shift
;
2817 result
.xmm16u(2) = op1
.xmm16u(2) >> shift
;
2818 result
.xmm16u(3) = op1
.xmm16u(3) >> shift
;
2819 result
.xmm16u(4) = op1
.xmm16u(4) >> shift
;
2820 result
.xmm16u(5) = op1
.xmm16u(5) >> shift
;
2821 result
.xmm16u(6) = op1
.xmm16u(6) >> shift
;
2822 result
.xmm16u(7) = op1
.xmm16u(7) >> shift
;
2824 if(op1
.xmm16u(0) & 0x8000) result
.xmm16u(0) |= (0xffff << (16 - shift
));
2825 if(op1
.xmm16u(1) & 0x8000) result
.xmm16u(1) |= (0xffff << (16 - shift
));
2826 if(op1
.xmm16u(2) & 0x8000) result
.xmm16u(2) |= (0xffff << (16 - shift
));
2827 if(op1
.xmm16u(3) & 0x8000) result
.xmm16u(3) |= (0xffff << (16 - shift
));
2828 if(op1
.xmm16u(4) & 0x8000) result
.xmm16u(4) |= (0xffff << (16 - shift
));
2829 if(op1
.xmm16u(5) & 0x8000) result
.xmm16u(5) |= (0xffff << (16 - shift
));
2830 if(op1
.xmm16u(6) & 0x8000) result
.xmm16u(6) |= (0xffff << (16 - shift
));
2831 if(op1
.xmm16u(7) & 0x8000) result
.xmm16u(7) |= (0xffff << (16 - shift
));
2834 /* now write result back to destination */
2835 BX_WRITE_XMM_REG(i
->nnn(), result
);
2837 BX_INFO(("PSRAW_VdqWdq: required SSE2, use --enable-sse option"));
2838 exception(BX_UD_EXCEPTION
, 0, 0);
2843 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAD_VdqWdq(bxInstruction_c
*i
)
2845 #if BX_SUPPORT_SSE >= 2
2846 BX_CPU_THIS_PTR
prepareSSE();
2848 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2850 /* op2 is a register or memory reference */
2852 op2
= BX_READ_XMM_REG(i
->rm());
2855 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2856 /* pointer, segment address pair */
2857 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2860 if(op2
.xmm64u(0) == 0) return;
2862 if(op2
.xmm64u(0) > 31) /* looking only to low 64 bits */
2864 result
.xmm32u(0) = (op1
.xmm32u(0) & 0x80000000) ? 0xffffffff : 0;
2865 result
.xmm32u(1) = (op1
.xmm32u(1) & 0x80000000) ? 0xffffffff : 0;
2866 result
.xmm32u(2) = (op1
.xmm32u(2) & 0x80000000) ? 0xffffffff : 0;
2867 result
.xmm32u(3) = (op1
.xmm32u(3) & 0x80000000) ? 0xffffffff : 0;
2871 Bit8u shift
= op2
.xmmubyte(0);
2873 result
.xmm32u(0) = op1
.xmm32u(0) >> shift
;
2874 result
.xmm32u(1) = op1
.xmm32u(1) >> shift
;
2875 result
.xmm32u(2) = op1
.xmm32u(2) >> shift
;
2876 result
.xmm32u(3) = op1
.xmm32u(3) >> shift
;
2878 if(op1
.xmm32u(0) & 0x80000000) result
.xmm32u(0) |= (0xffffffff << (32-shift
));
2879 if(op1
.xmm32u(1) & 0x80000000) result
.xmm32u(1) |= (0xffffffff << (32-shift
));
2880 if(op1
.xmm32u(2) & 0x80000000) result
.xmm32u(2) |= (0xffffffff << (32-shift
));
2881 if(op1
.xmm32u(3) & 0x80000000) result
.xmm32u(3) |= (0xffffffff << (32-shift
));
2884 /* now write result back to destination */
2885 BX_WRITE_XMM_REG(i
->nnn(), result
);
2887 BX_INFO(("PSRAD_VdqWdq: required SSE2, use --enable-sse option"));
2888 exception(BX_UD_EXCEPTION
, 0, 0);
2893 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PAVGW_VdqWdq(bxInstruction_c
*i
)
2896 BX_CPU_THIS_PTR
prepareSSE();
2898 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2900 /* op2 is a register or memory reference */
2902 op2
= BX_READ_XMM_REG(i
->rm());
2905 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2906 /* pointer, segment address pair */
2907 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2910 op1
.xmm16u(0) = (op1
.xmm16u(0) + op2
.xmm16u(0) + 1) >> 1;
2911 op1
.xmm16u(1) = (op1
.xmm16u(1) + op2
.xmm16u(1) + 1) >> 1;
2912 op1
.xmm16u(2) = (op1
.xmm16u(2) + op2
.xmm16u(2) + 1) >> 1;
2913 op1
.xmm16u(3) = (op1
.xmm16u(3) + op2
.xmm16u(3) + 1) >> 1;
2914 op1
.xmm16u(4) = (op1
.xmm16u(4) + op2
.xmm16u(4) + 1) >> 1;
2915 op1
.xmm16u(5) = (op1
.xmm16u(5) + op2
.xmm16u(5) + 1) >> 1;
2916 op1
.xmm16u(6) = (op1
.xmm16u(6) + op2
.xmm16u(6) + 1) >> 1;
2917 op1
.xmm16u(7) = (op1
.xmm16u(7) + op2
.xmm16u(7) + 1) >> 1;
2919 /* now write result back to destination */
2920 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2922 BX_INFO(("PAVGW_VdqWdq: required SSE, use --enable-sse option"));
2923 exception(BX_UD_EXCEPTION
, 0, 0);
2928 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULHUW_VdqWdq(bxInstruction_c
*i
)
2930 #if BX_SUPPORT_SSE >= 2
2931 BX_CPU_THIS_PTR
prepareSSE();
2933 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2935 /* op2 is a register or memory reference */
2937 op2
= BX_READ_XMM_REG(i
->rm());
2940 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2941 /* pointer, segment address pair */
2942 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2945 Bit32u product1
= Bit32u(op1
.xmm16u(0)) * Bit32u(op2
.xmm16u(0));
2946 Bit32u product2
= Bit32u(op1
.xmm16u(1)) * Bit32u(op2
.xmm16u(1));
2947 Bit32u product3
= Bit32u(op1
.xmm16u(2)) * Bit32u(op2
.xmm16u(2));
2948 Bit32u product4
= Bit32u(op1
.xmm16u(3)) * Bit32u(op2
.xmm16u(3));
2949 Bit32u product5
= Bit32u(op1
.xmm16u(4)) * Bit32u(op2
.xmm16u(4));
2950 Bit32u product6
= Bit32u(op1
.xmm16u(5)) * Bit32u(op2
.xmm16u(5));
2951 Bit32u product7
= Bit32u(op1
.xmm16u(6)) * Bit32u(op2
.xmm16u(6));
2952 Bit32u product8
= Bit32u(op1
.xmm16u(7)) * Bit32u(op2
.xmm16u(7));
2954 op1
.xmm16u(0) = (Bit16u
)(product1
>> 16);
2955 op1
.xmm16u(1) = (Bit16u
)(product2
>> 16);
2956 op1
.xmm16u(2) = (Bit16u
)(product3
>> 16);
2957 op1
.xmm16u(3) = (Bit16u
)(product4
>> 16);
2958 op1
.xmm16u(4) = (Bit16u
)(product5
>> 16);
2959 op1
.xmm16u(5) = (Bit16u
)(product6
>> 16);
2960 op1
.xmm16u(6) = (Bit16u
)(product7
>> 16);
2961 op1
.xmm16u(7) = (Bit16u
)(product8
>> 16);
2963 /* now write result back to destination */
2964 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2966 BX_INFO(("PMULHUW_VdqWdq: required SSE2, use --enable-sse option"));
2967 exception(BX_UD_EXCEPTION
, 0, 0);
2972 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULHW_VdqWdq(bxInstruction_c
*i
)
2974 #if BX_SUPPORT_SSE >= 2
2975 BX_CPU_THIS_PTR
prepareSSE();
2977 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2979 /* op2 is a register or memory reference */
2981 op2
= BX_READ_XMM_REG(i
->rm());
2984 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2985 /* pointer, segment address pair */
2986 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2989 Bit32s product1
= Bit32s(op1
.xmm16s(0)) * Bit32s(op2
.xmm16s(0));
2990 Bit32s product2
= Bit32s(op1
.xmm16s(1)) * Bit32s(op2
.xmm16s(1));
2991 Bit32s product3
= Bit32s(op1
.xmm16s(2)) * Bit32s(op2
.xmm16s(2));
2992 Bit32s product4
= Bit32s(op1
.xmm16s(3)) * Bit32s(op2
.xmm16s(3));
2993 Bit32s product5
= Bit32s(op1
.xmm16s(4)) * Bit32s(op2
.xmm16s(4));
2994 Bit32s product6
= Bit32s(op1
.xmm16s(5)) * Bit32s(op2
.xmm16s(5));
2995 Bit32s product7
= Bit32s(op1
.xmm16s(6)) * Bit32s(op2
.xmm16s(6));
2996 Bit32s product8
= Bit32s(op1
.xmm16s(7)) * Bit32s(op2
.xmm16s(7));
2998 op1
.xmm16u(0) = (Bit16u
)(product1
>> 16);
2999 op1
.xmm16u(1) = (Bit16u
)(product2
>> 16);
3000 op1
.xmm16u(2) = (Bit16u
)(product3
>> 16);
3001 op1
.xmm16u(3) = (Bit16u
)(product4
>> 16);
3002 op1
.xmm16u(4) = (Bit16u
)(product5
>> 16);
3003 op1
.xmm16u(5) = (Bit16u
)(product6
>> 16);
3004 op1
.xmm16u(6) = (Bit16u
)(product7
>> 16);
3005 op1
.xmm16u(7) = (Bit16u
)(product8
>> 16);
3007 /* now write result back to destination */
3008 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3010 BX_INFO(("PMULHW_VdqWdq: required SSE2, use --enable-sse option"));
3011 exception(BX_UD_EXCEPTION
, 0, 0);
3016 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBSB_VdqWdq(bxInstruction_c
*i
)
3018 #if BX_SUPPORT_SSE >= 2
3019 BX_CPU_THIS_PTR
prepareSSE();
3021 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3023 /* op2 is a register or memory reference */
3025 op2
= BX_READ_XMM_REG(i
->rm());
3028 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3029 /* pointer, segment address pair */
3030 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3033 for(unsigned j
=0; j
<16; j
++) {
3034 op1
.xmmsbyte(j
) = SaturateWordSToByteS(Bit16s(op1
.xmmsbyte(j
)) - Bit16s(op2
.xmmsbyte(j
)));
3037 /* now write result back to destination */
3038 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3040 BX_INFO(("PSUBSB_VdqWdq: required SSE2, use --enable-sse option"));
3041 exception(BX_UD_EXCEPTION
, 0, 0);
3046 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBSW_VdqWdq(bxInstruction_c
*i
)
3048 #if BX_SUPPORT_SSE >= 2
3049 BX_CPU_THIS_PTR
prepareSSE();
3051 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3053 /* op2 is a register or memory reference */
3055 op2
= BX_READ_XMM_REG(i
->rm());
3058 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3059 /* pointer, segment address pair */
3060 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3063 op1
.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(0)) - Bit32s(op2
.xmm16s(0)));
3064 op1
.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(1)) - Bit32s(op2
.xmm16s(1)));
3065 op1
.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(2)) - Bit32s(op2
.xmm16s(2)));
3066 op1
.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(3)) - Bit32s(op2
.xmm16s(3)));
3067 op1
.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(4)) - Bit32s(op2
.xmm16s(4)));
3068 op1
.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(5)) - Bit32s(op2
.xmm16s(5)));
3069 op1
.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(6)) - Bit32s(op2
.xmm16s(6)));
3070 op1
.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(7)) - Bit32s(op2
.xmm16s(7)));
3072 /* now write result back to destination */
3073 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3075 BX_INFO(("PSUBSW_VdqWdq: required SSE2, use --enable-sse option"));
3076 exception(BX_UD_EXCEPTION
, 0, 0);
3081 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINSW_VdqWdq(bxInstruction_c
*i
)
3083 #if BX_SUPPORT_SSE >= 2
3084 BX_CPU_THIS_PTR
prepareSSE();
3086 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3088 /* op2 is a register or memory reference */
3090 op2
= BX_READ_XMM_REG(i
->rm());
3093 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3094 /* pointer, segment address pair */
3095 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3098 if(op2
.xmm16s(0) < op1
.xmm16s(0)) op1
.xmm16s(0) = op2
.xmm16s(0);
3099 if(op2
.xmm16s(1) < op1
.xmm16s(1)) op1
.xmm16s(1) = op2
.xmm16s(1);
3100 if(op2
.xmm16s(2) < op1
.xmm16s(2)) op1
.xmm16s(2) = op2
.xmm16s(2);
3101 if(op2
.xmm16s(3) < op1
.xmm16s(3)) op1
.xmm16s(3) = op2
.xmm16s(3);
3102 if(op2
.xmm16s(4) < op1
.xmm16s(4)) op1
.xmm16s(4) = op2
.xmm16s(4);
3103 if(op2
.xmm16s(5) < op1
.xmm16s(5)) op1
.xmm16s(5) = op2
.xmm16s(5);
3104 if(op2
.xmm16s(6) < op1
.xmm16s(6)) op1
.xmm16s(6) = op2
.xmm16s(6);
3105 if(op2
.xmm16s(7) < op1
.xmm16s(7)) op1
.xmm16s(7) = op2
.xmm16s(7);
3107 /* now write result back to destination */
3108 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3110 BX_INFO(("PMINSW_VdqWdq: required SSE2, use --enable-sse option"));
3111 exception(BX_UD_EXCEPTION
, 0, 0);
3116 /* ORPD: 66 0F 56 */
3118 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ORPS_VpsWps(bxInstruction_c
*i
)
3120 #if BX_SUPPORT_SSE >= 1
3121 BX_CPU_THIS_PTR
prepareSSE();
3123 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3125 /* op2 is a register or memory reference */
3127 op2
= BX_READ_XMM_REG(i
->rm());
3130 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3131 /* pointer, segment address pair */
3132 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3135 op1
.xmm64u(0) |= op2
.xmm64u(0);
3136 op1
.xmm64u(1) |= op2
.xmm64u(1);
3138 /* now write result back to destination */
3139 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3141 BX_INFO(("ORPS_VpsWps: required SSE, use --enable-sse option"));
3142 exception(BX_UD_EXCEPTION
, 0, 0);
3147 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDSB_VdqWdq(bxInstruction_c
*i
)
3149 #if BX_SUPPORT_SSE >= 2
3150 BX_CPU_THIS_PTR
prepareSSE();
3152 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3154 /* op2 is a register or memory reference */
3156 op2
= BX_READ_XMM_REG(i
->rm());
3159 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3160 /* pointer, segment address pair */
3161 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3164 for(unsigned j
=0; j
<16; j
++) {
3165 op1
.xmmsbyte(j
) = SaturateWordSToByteS(Bit16s(op1
.xmmsbyte(j
)) + Bit16s(op2
.xmmsbyte(j
)));
3168 /* now write result back to destination */
3169 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3171 BX_INFO(("PADDSB_VdqWdq: required SSE2, use --enable-sse option"));
3172 exception(BX_UD_EXCEPTION
, 0, 0);
3177 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDSW_VdqWdq(bxInstruction_c
*i
)
3179 #if BX_SUPPORT_SSE >= 2
3180 BX_CPU_THIS_PTR
prepareSSE();
3182 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3184 /* op2 is a register or memory reference */
3186 op2
= BX_READ_XMM_REG(i
->rm());
3189 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3190 /* pointer, segment address pair */
3191 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3194 op1
.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(0)) + Bit32s(op2
.xmm16s(0)));
3195 op1
.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(1)) + Bit32s(op2
.xmm16s(1)));
3196 op1
.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(2)) + Bit32s(op2
.xmm16s(2)));
3197 op1
.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(3)) + Bit32s(op2
.xmm16s(3)));
3198 op1
.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(4)) + Bit32s(op2
.xmm16s(4)));
3199 op1
.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(5)) + Bit32s(op2
.xmm16s(5)));
3200 op1
.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(6)) + Bit32s(op2
.xmm16s(6)));
3201 op1
.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(7)) + Bit32s(op2
.xmm16s(7)));
3203 /* now write result back to destination */
3204 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3206 BX_INFO(("PADDSW_VdqWdq: required SSE2, use --enable-sse option"));
3207 exception(BX_UD_EXCEPTION
, 0, 0);
3212 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXSW_VdqWdq(bxInstruction_c
*i
)
3214 #if BX_SUPPORT_SSE >= 2
3215 BX_CPU_THIS_PTR
prepareSSE();
3217 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3219 /* op2 is a register or memory reference */
3221 op2
= BX_READ_XMM_REG(i
->rm());
3224 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3225 /* pointer, segment address pair */
3226 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3229 if(op2
.xmm16s(0) > op1
.xmm16s(0)) op1
.xmm16s(0) = op2
.xmm16s(0);
3230 if(op2
.xmm16s(1) > op1
.xmm16s(1)) op1
.xmm16s(1) = op2
.xmm16s(1);
3231 if(op2
.xmm16s(2) > op1
.xmm16s(2)) op1
.xmm16s(2) = op2
.xmm16s(2);
3232 if(op2
.xmm16s(3) > op1
.xmm16s(3)) op1
.xmm16s(3) = op2
.xmm16s(3);
3233 if(op2
.xmm16s(4) > op1
.xmm16s(4)) op1
.xmm16s(4) = op2
.xmm16s(4);
3234 if(op2
.xmm16s(5) > op1
.xmm16s(5)) op1
.xmm16s(5) = op2
.xmm16s(5);
3235 if(op2
.xmm16s(6) > op1
.xmm16s(6)) op1
.xmm16s(6) = op2
.xmm16s(6);
3236 if(op2
.xmm16s(7) > op1
.xmm16s(7)) op1
.xmm16s(7) = op2
.xmm16s(7);
3238 /* now write result back to destination */
3239 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3241 BX_INFO(("PMAXSW_VdqWdq: required SSE2, use --enable-sse option"));
3242 exception(BX_UD_EXCEPTION
, 0, 0);
3247 /* XORPD: 66 0F 57 */
3248 /* PXOR: 66 0F EF */
3249 void BX_CPP_AttrRegparmN(1) BX_CPU_C::XORPS_VpsWps(bxInstruction_c
*i
)
3251 #if BX_SUPPORT_SSE >= 1
3252 BX_CPU_THIS_PTR
prepareSSE();
3254 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3256 /* op2 is a register or memory reference */
3258 op2
= BX_READ_XMM_REG(i
->rm());
3261 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3262 /* pointer, segment address pair */
3263 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3266 op1
.xmm64u(0) ^= op2
.xmm64u(0);
3267 op1
.xmm64u(1) ^= op2
.xmm64u(1);
3269 /* now write result back to destination */
3270 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3272 BX_INFO(("XORPS_VpsWps: required SSE, use --enable-sse option"));
3273 exception(BX_UD_EXCEPTION
, 0, 0);
3278 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLW_VdqWdq(bxInstruction_c
*i
)
3280 #if BX_SUPPORT_SSE >= 2
3281 BX_CPU_THIS_PTR
prepareSSE();
3283 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3285 /* op2 is a register or memory reference */
3287 op2
= BX_READ_XMM_REG(i
->rm());
3290 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3291 /* pointer, segment address pair */
3292 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3295 if(op2
.xmm64u(0) > 15) /* looking only to low 64 bits */
3302 Bit8u shift
= op2
.xmmubyte(0);
3304 op1
.xmm16u(0) <<= shift
;
3305 op1
.xmm16u(1) <<= shift
;
3306 op1
.xmm16u(2) <<= shift
;
3307 op1
.xmm16u(3) <<= shift
;
3308 op1
.xmm16u(4) <<= shift
;
3309 op1
.xmm16u(5) <<= shift
;
3310 op1
.xmm16u(6) <<= shift
;
3311 op1
.xmm16u(7) <<= shift
;
3314 /* now write result back to destination */
3315 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3317 BX_INFO(("PSLLW_VdqWdq: required SSE2, use --enable-sse option"));
3318 exception(BX_UD_EXCEPTION
, 0, 0);
3323 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLD_VdqWdq(bxInstruction_c
*i
)
3325 #if BX_SUPPORT_SSE >= 2
3326 BX_CPU_THIS_PTR
prepareSSE();
3328 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3330 /* op2 is a register or memory reference */
3332 op2
= BX_READ_XMM_REG(i
->rm());
3335 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3336 /* pointer, segment address pair */
3337 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3340 if(op2
.xmm64u(0) > 31) /* looking only to low 64 bits */
3347 Bit8u shift
= op2
.xmmubyte(0);
3349 op1
.xmm32u(0) <<= shift
;
3350 op1
.xmm32u(1) <<= shift
;
3351 op1
.xmm32u(2) <<= shift
;
3352 op1
.xmm32u(3) <<= shift
;
3355 /* now write result back to destination */
3356 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3358 BX_INFO(("PSLLD_VdqWdq: required SSE2, use --enable-sse option"));
3359 exception(BX_UD_EXCEPTION
, 0, 0);
3364 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLQ_VdqWdq(bxInstruction_c
*i
)
3366 #if BX_SUPPORT_SSE >= 2
3367 BX_CPU_THIS_PTR
prepareSSE();
3369 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3371 /* op2 is a register or memory reference */
3373 op2
= BX_READ_XMM_REG(i
->rm());
3376 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3377 /* pointer, segment address pair */
3378 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3381 if(op2
.xmm64u(0) > 63) /* looking only to low 64 bits */
3388 Bit8u shift
= op2
.xmmubyte(0);
3390 op1
.xmm64u(0) <<= shift
;
3391 op1
.xmm64u(1) <<= shift
;
3394 /* now write result back to destination */
3395 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3397 BX_INFO(("PSLLQ_VdqWdq: required SSE2, use --enable-sse option"));
3398 exception(BX_UD_EXCEPTION
, 0, 0);
3403 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULUDQ_VdqWdq(bxInstruction_c
*i
)
3405 #if BX_SUPPORT_SSE >= 2
3406 BX_CPU_THIS_PTR
prepareSSE();
3408 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
3410 /* op2 is a register or memory reference */
3412 op2
= BX_READ_XMM_REG(i
->rm());
3415 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3416 /* pointer, segment address pair */
3417 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3420 result
.xmm64u(0) = Bit64u(op1
.xmm32u(0)) * Bit64u(op2
.xmm32u(0));
3421 result
.xmm64u(1) = Bit64u(op1
.xmm32u(2)) * Bit64u(op2
.xmm32u(2));
3423 /* now write result back to destination */
3424 BX_WRITE_XMM_REG(i
->nnn(), result
);
3426 BX_INFO(("PMULUDQ_VdqWdq: required SSE2, use --enable-sse option"));
3427 exception(BX_UD_EXCEPTION
, 0, 0);
3432 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMADDWD_VdqWdq(bxInstruction_c
*i
)
3434 #if BX_SUPPORT_SSE >= 2
3435 BX_CPU_THIS_PTR
prepareSSE();
3437 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
3439 /* op2 is a register or memory reference */
3441 op2
= BX_READ_XMM_REG(i
->rm());
3444 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3445 /* pointer, segment address pair */
3446 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3449 for(unsigned j
=0; j
<4; j
++)
3451 if(op1
.xmm32u(j
) == 0x80008000 && op2
.xmm32u(j
) == 0x80008000) {
3452 result
.xmm32u(j
) = 0x80000000;
3456 Bit32s(op1
.xmm16s(2*j
+0)) * Bit32s(op2
.xmm16s(2*j
+0)) +
3457 Bit32s(op1
.xmm16s(2*j
+1)) * Bit32s(op2
.xmm16s(2*j
+1));
3461 /* now write result back to destination */
3462 BX_WRITE_XMM_REG(i
->nnn(), result
);
3464 BX_INFO(("PMADDWD_VdqWdq: required SSE2, use --enable-sse option"));
3465 exception(BX_UD_EXCEPTION
, 0, 0);
3470 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSADBW_VdqWdq(bxInstruction_c
*i
)
3472 #if BX_SUPPORT_SSE >= 2
3473 BX_CPU_THIS_PTR
prepareSSE();
3475 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3476 Bit16u temp1
= 0, temp2
= 0;
3478 /* op2 is a register or memory reference */
3480 op2
= BX_READ_XMM_REG(i
->rm());
3483 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3484 /* pointer, segment address pair */
3485 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3488 temp1
+= abs(op1
.xmmubyte(0x0) - op2
.xmmubyte(0x0));
3489 temp1
+= abs(op1
.xmmubyte(0x1) - op2
.xmmubyte(0x1));
3490 temp1
+= abs(op1
.xmmubyte(0x2) - op2
.xmmubyte(0x2));
3491 temp1
+= abs(op1
.xmmubyte(0x3) - op2
.xmmubyte(0x3));
3492 temp1
+= abs(op1
.xmmubyte(0x4) - op2
.xmmubyte(0x4));
3493 temp1
+= abs(op1
.xmmubyte(0x5) - op2
.xmmubyte(0x5));
3494 temp1
+= abs(op1
.xmmubyte(0x6) - op2
.xmmubyte(0x6));
3495 temp1
+= abs(op1
.xmmubyte(0x7) - op2
.xmmubyte(0x7));
3497 temp2
+= abs(op1
.xmmubyte(0x8) - op2
.xmmubyte(0x8));
3498 temp2
+= abs(op1
.xmmubyte(0x9) - op2
.xmmubyte(0x9));
3499 temp2
+= abs(op1
.xmmubyte(0xA) - op2
.xmmubyte(0xA));
3500 temp2
+= abs(op1
.xmmubyte(0xB) - op2
.xmmubyte(0xB));
3501 temp2
+= abs(op1
.xmmubyte(0xC) - op2
.xmmubyte(0xC));
3502 temp2
+= abs(op1
.xmmubyte(0xD) - op2
.xmmubyte(0xD));
3503 temp2
+= abs(op1
.xmmubyte(0xE) - op2
.xmmubyte(0xE));
3504 temp2
+= abs(op1
.xmmubyte(0xF) - op2
.xmmubyte(0xF));
3506 op1
.xmm64u(0) = Bit64u(temp1
);
3507 op1
.xmm64u(1) = Bit64u(temp2
);
3509 /* now write result back to destination */
3510 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3512 BX_INFO(("PSADBW_VdqWdq: required SSE2, use --enable-sse option"));
3513 exception(BX_UD_EXCEPTION
, 0, 0);
3518 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBB_VdqWdq(bxInstruction_c
*i
)
3520 #if BX_SUPPORT_SSE >= 2
3521 BX_CPU_THIS_PTR
prepareSSE();
3523 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3525 /* op2 is a register or memory reference */
3527 op2
= BX_READ_XMM_REG(i
->rm());
3530 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3531 /* pointer, segment address pair */
3532 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3535 for(unsigned j
=0; j
<16; j
++) {
3536 op1
.xmmubyte(j
) -= op2
.xmmubyte(j
);
3539 /* now write result back to destination */
3540 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3542 BX_INFO(("PSUBB_VdqWdq: required SSE2, use --enable-sse option"));
3543 exception(BX_UD_EXCEPTION
, 0, 0);
3548 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBW_VdqWdq(bxInstruction_c
*i
)
3550 #if BX_SUPPORT_SSE >= 2
3551 BX_CPU_THIS_PTR
prepareSSE();
3553 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3555 /* op2 is a register or memory reference */
3557 op2
= BX_READ_XMM_REG(i
->rm());
3560 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3561 /* pointer, segment address pair */
3562 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3565 op1
.xmm16u(0) -= op2
.xmm16u(0);
3566 op1
.xmm16u(1) -= op2
.xmm16u(1);
3567 op1
.xmm16u(2) -= op2
.xmm16u(2);
3568 op1
.xmm16u(3) -= op2
.xmm16u(3);
3569 op1
.xmm16u(4) -= op2
.xmm16u(4);
3570 op1
.xmm16u(5) -= op2
.xmm16u(5);
3571 op1
.xmm16u(6) -= op2
.xmm16u(6);
3572 op1
.xmm16u(7) -= op2
.xmm16u(7);
3574 /* now write result back to destination */
3575 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3577 BX_INFO(("PSUBW_VdqWdq: required SSE2, use --enable-sse option"));
3578 exception(BX_UD_EXCEPTION
, 0, 0);
3583 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBD_VdqWdq(bxInstruction_c
*i
)
3585 #if BX_SUPPORT_SSE >= 2
3586 BX_CPU_THIS_PTR
prepareSSE();
3588 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3590 /* op2 is a register or memory reference */
3592 op2
= BX_READ_XMM_REG(i
->rm());
3595 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3596 /* pointer, segment address pair */
3597 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3600 op1
.xmm32u(0) -= op2
.xmm32u(0);
3601 op1
.xmm32u(1) -= op2
.xmm32u(1);
3602 op1
.xmm32u(2) -= op2
.xmm32u(2);
3603 op1
.xmm32u(3) -= op2
.xmm32u(3);
3605 /* now write result back to destination */
3606 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3608 BX_INFO(("PSUBD_VdqWdq: required SSE2, use --enable-sse option"));
3609 exception(BX_UD_EXCEPTION
, 0, 0);
3614 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBQ_VdqWdq(bxInstruction_c
*i
)
3616 #if BX_SUPPORT_SSE >= 2
3617 BX_CPU_THIS_PTR
prepareSSE();
3619 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3621 /* op2 is a register or memory reference */
3623 op2
= BX_READ_XMM_REG(i
->rm());
3626 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3627 /* pointer, segment address pair */
3628 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3631 op1
.xmm64u(0) -= op2
.xmm64u(0);
3632 op1
.xmm64u(1) -= op2
.xmm64u(1);
3634 /* now write result back to destination */
3635 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3637 BX_INFO(("PSUBQ_VdqWdq: required SSE2, use --enable-sse option"));
3638 exception(BX_UD_EXCEPTION
, 0, 0);
3643 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDB_VdqWdq(bxInstruction_c
*i
)
3645 #if BX_SUPPORT_SSE >= 2
3646 BX_CPU_THIS_PTR
prepareSSE();
3648 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3650 /* op2 is a register or memory reference */
3652 op2
= BX_READ_XMM_REG(i
->rm());
3655 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3656 /* pointer, segment address pair */
3657 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3660 for(unsigned j
=0; j
<16; j
++) {
3661 op1
.xmmubyte(j
) += op2
.xmmubyte(j
);
3664 /* now write result back to destination */
3665 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3667 BX_INFO(("PADDB_VdqWdq: required SSE2, use --enable-sse option"));
3668 exception(BX_UD_EXCEPTION
, 0, 0);
3673 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDW_VdqWdq(bxInstruction_c
*i
)
3675 #if BX_SUPPORT_SSE >= 2
3676 BX_CPU_THIS_PTR
prepareSSE();
3678 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3680 /* op2 is a register or memory reference */
3682 op2
= BX_READ_XMM_REG(i
->rm());
3685 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3686 /* pointer, segment address pair */
3687 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3690 op1
.xmm16u(0) += op2
.xmm16u(0);
3691 op1
.xmm16u(1) += op2
.xmm16u(1);
3692 op1
.xmm16u(2) += op2
.xmm16u(2);
3693 op1
.xmm16u(3) += op2
.xmm16u(3);
3694 op1
.xmm16u(4) += op2
.xmm16u(4);
3695 op1
.xmm16u(5) += op2
.xmm16u(5);
3696 op1
.xmm16u(6) += op2
.xmm16u(6);
3697 op1
.xmm16u(7) += op2
.xmm16u(7);
3699 /* now write result back to destination */
3700 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3702 BX_INFO(("PADDW_VdqWdq: required SSE2, use --enable-sse option"));
3703 exception(BX_UD_EXCEPTION
, 0, 0);
3708 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDD_VdqWdq(bxInstruction_c
*i
)
3710 #if BX_SUPPORT_SSE >= 2
3711 BX_CPU_THIS_PTR
prepareSSE();
3713 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3715 /* op2 is a register or memory reference */
3717 op2
= BX_READ_XMM_REG(i
->rm());
3720 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3721 /* pointer, segment address pair */
3722 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3725 op1
.xmm32u(0) += op2
.xmm32u(0);
3726 op1
.xmm32u(1) += op2
.xmm32u(1);
3727 op1
.xmm32u(2) += op2
.xmm32u(2);
3728 op1
.xmm32u(3) += op2
.xmm32u(3);
3730 /* now write result back to destination */
3731 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3733 BX_INFO(("PADDD_VdqWdq: required SSE2, use --enable-sse option"));
3734 exception(BX_UD_EXCEPTION
, 0, 0);
3738 /* 66 0F 71 Grp12 010 */
3739 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLW_UdqIb(bxInstruction_c
*i
)
3741 #if BX_SUPPORT_SSE >= 2
3742 BX_CPU_THIS_PTR
prepareSSE();
3744 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
3745 Bit8u shift
= i
->Ib();
3752 op
.xmm16u(0) >>= shift
;
3753 op
.xmm16u(1) >>= shift
;
3754 op
.xmm16u(2) >>= shift
;
3755 op
.xmm16u(3) >>= shift
;
3756 op
.xmm16u(4) >>= shift
;
3757 op
.xmm16u(5) >>= shift
;
3758 op
.xmm16u(6) >>= shift
;
3759 op
.xmm16u(7) >>= shift
;
3762 /* now write result back to destination */
3763 BX_WRITE_XMM_REG(i
->rm(), op
);
3765 BX_INFO(("PSRLW_UdqIb: required SSE2, use --enable-sse option"));
3766 exception(BX_UD_EXCEPTION
, 0, 0);
3770 /* 0F 71 Grp12 100 */
3771 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAW_UdqIb(bxInstruction_c
*i
)
3773 #if BX_SUPPORT_SSE >= 2
3774 BX_CPU_THIS_PTR
prepareSSE();
3776 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm()), result
;
3777 Bit8u shift
= i
->Ib();
3779 if(shift
== 0) return;
3782 result
.xmm16u(0) = (op
.xmm16u(0) & 0x8000) ? 0xffff : 0;
3783 result
.xmm16u(1) = (op
.xmm16u(1) & 0x8000) ? 0xffff : 0;
3784 result
.xmm16u(2) = (op
.xmm16u(2) & 0x8000) ? 0xffff : 0;
3785 result
.xmm16u(3) = (op
.xmm16u(3) & 0x8000) ? 0xffff : 0;
3786 result
.xmm16u(4) = (op
.xmm16u(4) & 0x8000) ? 0xffff : 0;
3787 result
.xmm16u(5) = (op
.xmm16u(5) & 0x8000) ? 0xffff : 0;
3788 result
.xmm16u(6) = (op
.xmm16u(6) & 0x8000) ? 0xffff : 0;
3789 result
.xmm16u(7) = (op
.xmm16u(7) & 0x8000) ? 0xffff : 0;
3792 result
.xmm16u(0) = op
.xmm16u(0) >> shift
;
3793 result
.xmm16u(1) = op
.xmm16u(1) >> shift
;
3794 result
.xmm16u(2) = op
.xmm16u(2) >> shift
;
3795 result
.xmm16u(3) = op
.xmm16u(3) >> shift
;
3796 result
.xmm16u(4) = op
.xmm16u(4) >> shift
;
3797 result
.xmm16u(5) = op
.xmm16u(5) >> shift
;
3798 result
.xmm16u(6) = op
.xmm16u(6) >> shift
;
3799 result
.xmm16u(7) = op
.xmm16u(7) >> shift
;
3801 if(op
.xmm16u(0) & 0x8000) result
.xmm16u(0) |= (0xffff << (16 - shift
));
3802 if(op
.xmm16u(1) & 0x8000) result
.xmm16u(1) |= (0xffff << (16 - shift
));
3803 if(op
.xmm16u(2) & 0x8000) result
.xmm16u(2) |= (0xffff << (16 - shift
));
3804 if(op
.xmm16u(3) & 0x8000) result
.xmm16u(3) |= (0xffff << (16 - shift
));
3805 if(op
.xmm16u(4) & 0x8000) result
.xmm16u(4) |= (0xffff << (16 - shift
));
3806 if(op
.xmm16u(5) & 0x8000) result
.xmm16u(5) |= (0xffff << (16 - shift
));
3807 if(op
.xmm16u(6) & 0x8000) result
.xmm16u(6) |= (0xffff << (16 - shift
));
3808 if(op
.xmm16u(7) & 0x8000) result
.xmm16u(7) |= (0xffff << (16 - shift
));
3811 /* now write result back to destination */
3812 BX_WRITE_XMM_REG(i
->rm(), result
);
3814 BX_INFO(("PSRAW_UdqIb: required SSE2, use --enable-sse option"));
3815 exception(BX_UD_EXCEPTION
, 0, 0);
3819 /* 66 0F 71 Grp12 110 */
3820 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLW_UdqIb(bxInstruction_c
*i
)
3822 #if BX_SUPPORT_SSE >= 2
3823 BX_CPU_THIS_PTR
prepareSSE();
3825 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
3826 Bit8u shift
= i
->Ib();
3833 op
.xmm16u(0) <<= shift
;
3834 op
.xmm16u(1) <<= shift
;
3835 op
.xmm16u(2) <<= shift
;
3836 op
.xmm16u(3) <<= shift
;
3837 op
.xmm16u(4) <<= shift
;
3838 op
.xmm16u(5) <<= shift
;
3839 op
.xmm16u(6) <<= shift
;
3840 op
.xmm16u(7) <<= shift
;
3843 /* now write result back to destination */
3844 BX_WRITE_XMM_REG(i
->rm(), op
);
3846 BX_INFO(("PSLLW_UdqIb: required SSE2, use --enable-sse option"));
3847 exception(BX_UD_EXCEPTION
, 0, 0);
3851 /* 66 0F 72 Grp13 010 */
3852 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLD_UdqIb(bxInstruction_c
*i
)
3854 #if BX_SUPPORT_SSE >= 2
3855 BX_CPU_THIS_PTR
prepareSSE();
3857 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
3858 Bit8u shift
= i
->Ib();
3865 op
.xmm32u(0) >>= shift
;
3866 op
.xmm32u(1) >>= shift
;
3867 op
.xmm32u(2) >>= shift
;
3868 op
.xmm32u(3) >>= shift
;
3871 /* now write result back to destination */
3872 BX_WRITE_XMM_REG(i
->rm(), op
);
3874 BX_INFO(("PSRLD_UdqIb: required SSE2, use --enable-sse option"));
3875 exception(BX_UD_EXCEPTION
, 0, 0);
3879 /* 0F 72 Grp13 100 */
3880 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAD_UdqIb(bxInstruction_c
*i
)
3882 #if BX_SUPPORT_SSE >= 2
3883 BX_CPU_THIS_PTR
prepareSSE();
3885 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm()), result
;
3886 Bit8u shift
= i
->Ib();
3888 if(shift
== 0) return;
3891 result
.xmm32u(0) = (op
.xmm32u(0) & 0x80000000) ? 0xffffffff : 0;
3892 result
.xmm32u(1) = (op
.xmm32u(1) & 0x80000000) ? 0xffffffff : 0;
3893 result
.xmm32u(2) = (op
.xmm32u(2) & 0x80000000) ? 0xffffffff : 0;
3894 result
.xmm32u(3) = (op
.xmm32u(3) & 0x80000000) ? 0xffffffff : 0;
3897 result
.xmm32u(0) = op
.xmm32u(0) >> shift
;
3898 result
.xmm32u(1) = op
.xmm32u(1) >> shift
;
3899 result
.xmm32u(2) = op
.xmm32u(2) >> shift
;
3900 result
.xmm32u(3) = op
.xmm32u(3) >> shift
;
3902 if(op
.xmm32u(0) & 0x80000000) result
.xmm32u(0) |= (0xffffffff << (32-shift
));
3903 if(op
.xmm32u(1) & 0x80000000) result
.xmm32u(1) |= (0xffffffff << (32-shift
));
3904 if(op
.xmm32u(2) & 0x80000000) result
.xmm32u(2) |= (0xffffffff << (32-shift
));
3905 if(op
.xmm32u(3) & 0x80000000) result
.xmm32u(3) |= (0xffffffff << (32-shift
));
3908 /* now write result back to destination */
3909 BX_WRITE_XMM_REG(i
->rm(), result
);
3911 BX_INFO(("PSRAD_UdqIb: required SSE2, use --enable-sse option"));
3912 exception(BX_UD_EXCEPTION
, 0, 0);
3916 /* 66 0F 72 Grp13 110 */
3917 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLD_UdqIb(bxInstruction_c
*i
)
3919 #if BX_SUPPORT_SSE >= 2
3920 BX_CPU_THIS_PTR
prepareSSE();
3922 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
3923 Bit8u shift
= i
->Ib();
3930 op
.xmm32u(0) <<= shift
;
3931 op
.xmm32u(1) <<= shift
;
3932 op
.xmm32u(2) <<= shift
;
3933 op
.xmm32u(3) <<= shift
;
3936 /* now write result back to destination */
3937 BX_WRITE_XMM_REG(i
->rm(), op
);
3939 BX_INFO(("PSLLD_UdqIb: required SSE2, use --enable-sse option"));
3940 exception(BX_UD_EXCEPTION
, 0, 0);
3944 /* 66 0F 73 Grp14 010 */
3945 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLQ_UdqIb(bxInstruction_c
*i
)
3947 #if BX_SUPPORT_SSE >= 2
3948 BX_CPU_THIS_PTR
prepareSSE();
3950 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
3951 Bit8u shift
= i
->Ib();
3958 op
.xmm64u(0) >>= shift
;
3959 op
.xmm64u(1) >>= shift
;
3962 /* now write result back to destination */
3963 BX_WRITE_XMM_REG(i
->rm(), op
);
3965 BX_INFO(("PSRLQ_UdqIb: required SSE2, use --enable-sse option"));
3966 exception(BX_UD_EXCEPTION
, 0, 0);
3970 /* 66 0F 73 Grp14 011 */
3971 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLDQ_UdqIb(bxInstruction_c
*i
)
3973 #if BX_SUPPORT_SSE >= 2
3974 BX_CPU_THIS_PTR
prepareSSE();
3976 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm()), result
;
3977 Bit8u shift
= i
->Ib();
3979 result
.xmm64u(0) = result
.xmm64u(1) = 0;
3981 for(unsigned j
=shift
; j
<16; j
++) {
3982 result
.xmmubyte(j
-shift
) = op
.xmmubyte(j
);
3985 /* now write result back to destination */
3986 BX_WRITE_XMM_REG(i
->rm(), result
);
3988 BX_INFO(("PSRLDQ_UdqIb: required SSE2, use --enable-sse option"));
3989 exception(BX_UD_EXCEPTION
, 0, 0);
3993 /* 66 0F 73 Grp14 110 */
3994 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLQ_UdqIb(bxInstruction_c
*i
)
3996 #if BX_SUPPORT_SSE >= 2
3997 BX_CPU_THIS_PTR
prepareSSE();
3999 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
4000 Bit8u shift
= i
->Ib();
4007 op
.xmm64u(0) <<= shift
;
4008 op
.xmm64u(1) <<= shift
;
4011 /* now write result back to destination */
4012 BX_WRITE_XMM_REG(i
->rm(), op
);
4014 BX_INFO(("PSLLQ_UdqIb: required SSE2, use --enable-sse option"));
4015 exception(BX_UD_EXCEPTION
, 0, 0);
4019 /* 66 0F 73 Grp14 111 */
4020 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLDQ_UdqIb(bxInstruction_c
*i
)
4022 #if BX_SUPPORT_SSE >= 2
4023 BX_CPU_THIS_PTR
prepareSSE();
4025 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm()), result
;
4026 Bit8u shift
= i
->Ib();
4028 result
.xmm64u(0) = result
.xmm64u(1) = 0;
4030 for(unsigned j
=shift
; j
<16; j
++) {
4031 result
.xmmubyte(j
) = op
.xmmubyte(j
-shift
);
4034 /* now write result back to destination */
4035 BX_WRITE_XMM_REG(i
->rm(), result
);
4037 BX_INFO(("PSLLDQ_UdqIb: required SSE2, use --enable-sse option"));
4038 exception(BX_UD_EXCEPTION
, 0, 0);