1 /////////////////////////////////////////////////////////////////////////
2 // $Id: sse.cc,v 1.47 2007/07/31 20:25:52 sshwarts Exp $
3 /////////////////////////////////////////////////////////////////////////
5 // Copyright (c) 2003 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #define NEED_CPU_REG_SHORTCUTS 1
26 #define LOG_THIS BX_CPU_THIS_PTR
28 /* ********************************************** */
29 /* SSE Integer Operations (128bit MMX extensions) */
30 /* ********************************************** */
32 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
35 void BX_CPU_C::PSHUFB_VdqWdq(bxInstruction_c
*i
)
37 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
38 BX_CPU_THIS_PTR
prepareSSE();
40 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
42 /* op2 is a register or memory reference */
44 op2
= BX_READ_XMM_REG(i
->rm());
47 /* pointer, segment address pair */
48 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
51 for(unsigned j
=0; j
<16; j
++)
53 unsigned mask
= op2
.xmmubyte(j
);
55 result
.xmmubyte(j
) = 0;
57 result
.xmmubyte(j
) = op1
.xmmubyte(mask
& 0xf);
60 BX_WRITE_XMM_REG(i
->nnn(), result
);
62 BX_INFO(("PSHUFB_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
68 void BX_CPU_C::PHADDW_VdqWdq(bxInstruction_c
*i
)
70 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
71 BX_CPU_THIS_PTR
prepareSSE();
73 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
75 /* op2 is a register or memory reference */
77 op2
= BX_READ_XMM_REG(i
->rm());
80 /* pointer, segment address pair */
81 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
84 result
.xmm16u(0) = op1
.xmm16u(0) + op1
.xmm16u(1);
85 result
.xmm16u(1) = op1
.xmm16u(2) + op1
.xmm16u(3);
86 result
.xmm16u(2) = op1
.xmm16u(4) + op1
.xmm16u(5);
87 result
.xmm16u(3) = op1
.xmm16u(6) + op1
.xmm16u(7);
89 result
.xmm16u(4) = op2
.xmm16u(0) + op2
.xmm16u(1);
90 result
.xmm16u(5) = op2
.xmm16u(2) + op2
.xmm16u(3);
91 result
.xmm16u(6) = op2
.xmm16u(4) + op2
.xmm16u(5);
92 result
.xmm16u(7) = op2
.xmm16u(6) + op2
.xmm16u(7);
94 BX_WRITE_XMM_REG(i
->nnn(), result
);
96 BX_INFO(("PHADDW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
102 void BX_CPU_C::PHADDD_VdqWdq(bxInstruction_c
*i
)
104 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
105 BX_CPU_THIS_PTR
prepareSSE();
107 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
109 /* op2 is a register or memory reference */
111 op2
= BX_READ_XMM_REG(i
->rm());
114 /* pointer, segment address pair */
115 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
118 result
.xmm32u(0) = op1
.xmm32u(0) + op1
.xmm32u(1);
119 result
.xmm32u(1) = op1
.xmm32u(2) + op1
.xmm32u(3);
120 result
.xmm32u(2) = op2
.xmm32u(0) + op2
.xmm32u(1);
121 result
.xmm32u(3) = op2
.xmm32u(2) + op2
.xmm32u(3);
123 BX_WRITE_XMM_REG(i
->nnn(), result
);
125 BX_INFO(("PHADDD_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
131 void BX_CPU_C::PHADDSW_VdqWdq(bxInstruction_c
*i
)
133 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
134 BX_CPU_THIS_PTR
prepareSSE();
136 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
138 /* op2 is a register or memory reference */
140 op2
= BX_READ_XMM_REG(i
->rm());
143 /* pointer, segment address pair */
144 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
147 result
.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(0)) + Bit32s(op1
.xmm16s(1)));
148 result
.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(2)) + Bit32s(op1
.xmm16s(3)));
149 result
.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(4)) + Bit32s(op1
.xmm16s(5)));
150 result
.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(6)) + Bit32s(op1
.xmm16s(7)));
152 result
.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(0)) + Bit32s(op2
.xmm16s(1)));
153 result
.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(2)) + Bit32s(op2
.xmm16s(3)));
154 result
.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(4)) + Bit32s(op2
.xmm16s(5)));
155 result
.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(6)) + Bit32s(op2
.xmm16s(7)));
157 /* now write result back to destination */
158 BX_WRITE_XMM_REG(i
->nnn(), result
);
160 BX_INFO(("PHADDSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
166 void BX_CPU_C::PMADDUBSW_VdqWdq(bxInstruction_c
*i
)
168 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
169 BX_CPU_THIS_PTR
prepareSSE();
171 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
173 /* op2 is a register or memory reference */
175 op2
= BX_READ_XMM_REG(i
->rm());
178 /* pointer, segment address pair */
179 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
182 for(unsigned j
=0; j
<8; j
++)
184 Bit32s temp
= Bit32s(op1
.xmmubyte(j
*2+0))*Bit32s(op2
.xmmsbyte(j
*2+0)) +
185 Bit32s(op1
.xmmubyte(j
*2+1))*Bit32s(op2
.xmmsbyte(j
*2+1));
187 result
.xmm16s(j
) = SaturateDwordSToWordS(temp
);
190 /* now write result back to destination */
191 BX_WRITE_XMM_REG(i
->nnn(), result
);
193 BX_INFO(("PMADDUBSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
199 void BX_CPU_C::PHSUBSW_VdqWdq(bxInstruction_c
*i
)
201 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
202 BX_CPU_THIS_PTR
prepareSSE();
204 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
206 /* op2 is a register or memory reference */
208 op2
= BX_READ_XMM_REG(i
->rm());
211 /* pointer, segment address pair */
212 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
215 result
.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(0)) - Bit32s(op1
.xmm16s(1)));
216 result
.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(2)) - Bit32s(op1
.xmm16s(3)));
217 result
.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(4)) - Bit32s(op1
.xmm16s(5)));
218 result
.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(6)) - Bit32s(op1
.xmm16s(7)));
220 result
.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(0)) - Bit32s(op2
.xmm16s(1)));
221 result
.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(2)) - Bit32s(op2
.xmm16s(3)));
222 result
.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(4)) - Bit32s(op2
.xmm16s(5)));
223 result
.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2
.xmm16s(6)) - Bit32s(op2
.xmm16s(7)));
225 /* now write result back to destination */
226 BX_WRITE_XMM_REG(i
->nnn(), result
);
228 BX_INFO(("PHSUBSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
234 void BX_CPU_C::PHSUBW_VdqWdq(bxInstruction_c
*i
)
236 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
237 BX_CPU_THIS_PTR
prepareSSE();
239 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
241 /* op2 is a register or memory reference */
243 op2
= BX_READ_XMM_REG(i
->rm());
246 /* pointer, segment address pair */
247 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
250 result
.xmm16u(0) = op1
.xmm16u(0) - op1
.xmm16u(1);
251 result
.xmm16u(1) = op1
.xmm16u(2) - op1
.xmm16u(3);
252 result
.xmm16u(2) = op1
.xmm16u(4) - op1
.xmm16u(5);
253 result
.xmm16u(3) = op1
.xmm16u(6) - op1
.xmm16u(7);
255 result
.xmm16u(4) = op2
.xmm16u(0) - op2
.xmm16u(1);
256 result
.xmm16u(5) = op2
.xmm16u(2) - op2
.xmm16u(3);
257 result
.xmm16u(6) = op2
.xmm16u(4) - op2
.xmm16u(5);
258 result
.xmm16u(7) = op2
.xmm16u(6) - op2
.xmm16u(7);
260 BX_WRITE_XMM_REG(i
->nnn(), result
);
262 BX_INFO(("PHSUBW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
268 void BX_CPU_C::PHSUBD_VdqWdq(bxInstruction_c
*i
)
270 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
271 BX_CPU_THIS_PTR
prepareSSE();
273 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
275 /* op2 is a register or memory reference */
277 op2
= BX_READ_XMM_REG(i
->rm());
280 /* pointer, segment address pair */
281 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
284 result
.xmm32u(0) = op1
.xmm32u(0) - op1
.xmm32u(1);
285 result
.xmm32u(1) = op1
.xmm32u(2) - op1
.xmm32u(3);
286 result
.xmm32u(2) = op2
.xmm32u(0) - op2
.xmm32u(1);
287 result
.xmm32u(3) = op2
.xmm32u(2) - op2
.xmm32u(3);
289 BX_WRITE_XMM_REG(i
->nnn(), result
);
291 BX_INFO(("PHSUBD_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
297 void BX_CPU_C::PSIGNB_VdqWdq(bxInstruction_c
*i
)
299 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
300 BX_CPU_THIS_PTR
prepareSSE();
302 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
304 /* op2 is a register or memory reference */
306 op2
= BX_READ_XMM_REG(i
->rm());
309 /* pointer, segment address pair */
310 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
313 for(unsigned j
=0; j
<16; j
++) {
314 int sign
= (op2
.xmmsbyte(j
) > 0) - (op2
.xmmsbyte(j
) < 0);
315 op1
.xmmsbyte(j
) *= sign
;
318 BX_WRITE_XMM_REG(i
->nnn(), op1
);
320 BX_INFO(("PSIGNB_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
326 void BX_CPU_C::PSIGNW_VdqWdq(bxInstruction_c
*i
)
328 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
329 BX_CPU_THIS_PTR
prepareSSE();
331 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
333 /* op2 is a register or memory reference */
335 op2
= BX_READ_XMM_REG(i
->rm());
338 /* pointer, segment address pair */
339 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
342 for(unsigned j
=0; j
<8; j
++) {
343 int sign
= (op2
.xmm16s(j
) > 0) - (op2
.xmm16s(j
) < 0);
344 op1
.xmm16s(j
) *= sign
;
347 BX_WRITE_XMM_REG(i
->nnn(), op1
);
349 BX_INFO(("PSIGNW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
355 void BX_CPU_C::PSIGND_VdqWdq(bxInstruction_c
*i
)
357 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
358 BX_CPU_THIS_PTR
prepareSSE();
360 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
362 /* op2 is a register or memory reference */
364 op2
= BX_READ_XMM_REG(i
->rm());
367 /* pointer, segment address pair */
368 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
371 for(unsigned j
=0; j
<4; j
++) {
372 int sign
= (op2
.xmm32s(j
) > 0) - (op2
.xmm32s(j
) < 0);
373 op1
.xmm32s(j
) *= sign
;
376 BX_WRITE_XMM_REG(i
->nnn(), op1
);
378 BX_INFO(("PSIGND_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
384 void BX_CPU_C::PMULHRSW_VdqWdq(bxInstruction_c
*i
)
386 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
387 BX_CPU_THIS_PTR
prepareSSE();
389 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
391 /* op2 is a register or memory reference */
393 op2
= BX_READ_XMM_REG(i
->rm());
396 /* pointer, segment address pair */
397 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
400 result
.xmm16u(0) = (((op1
.xmm16s(0) * op2
.xmm16s(0)) >> 14) + 1) >> 1;
401 result
.xmm16u(1) = (((op1
.xmm16s(1) * op2
.xmm16s(1)) >> 14) + 1) >> 1;
402 result
.xmm16u(2) = (((op1
.xmm16s(2) * op2
.xmm16s(2)) >> 14) + 1) >> 1;
403 result
.xmm16u(3) = (((op1
.xmm16s(3) * op2
.xmm16s(3)) >> 14) + 1) >> 1;
404 result
.xmm16u(4) = (((op1
.xmm16s(4) * op2
.xmm16s(4)) >> 14) + 1) >> 1;
405 result
.xmm16u(5) = (((op1
.xmm16s(5) * op2
.xmm16s(5)) >> 14) + 1) >> 1;
406 result
.xmm16u(6) = (((op1
.xmm16s(6) * op2
.xmm16s(6)) >> 14) + 1) >> 1;
407 result
.xmm16u(7) = (((op1
.xmm16s(7) * op2
.xmm16s(7)) >> 14) + 1) >> 1;
409 /* now write result back to destination */
410 BX_WRITE_XMM_REG(i
->nnn(), result
);
412 BX_INFO(("PMULHRSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
418 void BX_CPU_C::PABSB_VdqWdq(bxInstruction_c
*i
)
420 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
421 BX_CPU_THIS_PTR
prepareSSE();
423 BxPackedXmmRegister op
;
426 op
= BX_READ_XMM_REG(i
->rm());
429 /* pointer, segment address pair */
430 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
433 if(op
.xmmsbyte(0x0) < 0) op
.xmmubyte(0x0) = -op
.xmmsbyte(0x0);
434 if(op
.xmmsbyte(0x1) < 0) op
.xmmubyte(0x1) = -op
.xmmsbyte(0x1);
435 if(op
.xmmsbyte(0x2) < 0) op
.xmmubyte(0x2) = -op
.xmmsbyte(0x2);
436 if(op
.xmmsbyte(0x3) < 0) op
.xmmubyte(0x3) = -op
.xmmsbyte(0x3);
437 if(op
.xmmsbyte(0x4) < 0) op
.xmmubyte(0x4) = -op
.xmmsbyte(0x4);
438 if(op
.xmmsbyte(0x5) < 0) op
.xmmubyte(0x5) = -op
.xmmsbyte(0x5);
439 if(op
.xmmsbyte(0x6) < 0) op
.xmmubyte(0x6) = -op
.xmmsbyte(0x6);
440 if(op
.xmmsbyte(0x7) < 0) op
.xmmubyte(0x7) = -op
.xmmsbyte(0x7);
441 if(op
.xmmsbyte(0x8) < 0) op
.xmmubyte(0x8) = -op
.xmmsbyte(0x8);
442 if(op
.xmmsbyte(0x9) < 0) op
.xmmubyte(0x9) = -op
.xmmsbyte(0x9);
443 if(op
.xmmsbyte(0xa) < 0) op
.xmmubyte(0xa) = -op
.xmmsbyte(0xa);
444 if(op
.xmmsbyte(0xb) < 0) op
.xmmubyte(0xb) = -op
.xmmsbyte(0xb);
445 if(op
.xmmsbyte(0xc) < 0) op
.xmmubyte(0xc) = -op
.xmmsbyte(0xc);
446 if(op
.xmmsbyte(0xd) < 0) op
.xmmubyte(0xd) = -op
.xmmsbyte(0xd);
447 if(op
.xmmsbyte(0xe) < 0) op
.xmmubyte(0xe) = -op
.xmmsbyte(0xe);
448 if(op
.xmmsbyte(0xf) < 0) op
.xmmubyte(0xf) = -op
.xmmsbyte(0xf);
450 /* now write result back to destination */
451 BX_WRITE_XMM_REG(i
->nnn(), op
);
453 BX_INFO(("PABSB_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
459 void BX_CPU_C::PABSW_VdqWdq(bxInstruction_c
*i
)
461 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
462 BX_CPU_THIS_PTR
prepareSSE();
464 BxPackedXmmRegister op
;
467 op
= BX_READ_XMM_REG(i
->rm());
470 /* pointer, segment address pair */
471 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
474 if(op
.xmm16s(0) < 0) op
.xmm16u(0) = -op
.xmm16s(0);
475 if(op
.xmm16s(1) < 0) op
.xmm16u(1) = -op
.xmm16s(1);
476 if(op
.xmm16s(2) < 0) op
.xmm16u(2) = -op
.xmm16s(2);
477 if(op
.xmm16s(3) < 0) op
.xmm16u(3) = -op
.xmm16s(3);
478 if(op
.xmm16s(4) < 0) op
.xmm16u(4) = -op
.xmm16s(4);
479 if(op
.xmm16s(5) < 0) op
.xmm16u(5) = -op
.xmm16s(5);
480 if(op
.xmm16s(6) < 0) op
.xmm16u(6) = -op
.xmm16s(6);
481 if(op
.xmm16s(7) < 0) op
.xmm16u(7) = -op
.xmm16s(7);
483 /* now write result back to destination */
484 BX_WRITE_XMM_REG(i
->nnn(), op
);
486 BX_INFO(("PABSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
492 void BX_CPU_C::PABSD_VdqWdq(bxInstruction_c
*i
)
494 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
495 BX_CPU_THIS_PTR
prepareSSE();
497 BxPackedXmmRegister op
;
500 op
= BX_READ_XMM_REG(i
->rm());
503 /* pointer, segment address pair */
504 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
507 if(op
.xmm32s(0) < 0) op
.xmm32u(0) = -op
.xmm32s(0);
508 if(op
.xmm32s(1) < 0) op
.xmm32u(1) = -op
.xmm32s(1);
509 if(op
.xmm32s(2) < 0) op
.xmm32u(2) = -op
.xmm32s(2);
510 if(op
.xmm32s(3) < 0) op
.xmm32u(3) = -op
.xmm32s(3);
512 /* now write result back to destination */
513 BX_WRITE_XMM_REG(i
->nnn(), op
);
515 BX_INFO(("PABSD_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
521 void BX_CPU_C::PBLENDVB_VdqWdq(bxInstruction_c
*i
)
523 #if BX_SUPPORT_SSE >= 4
524 BX_CPU_THIS_PTR
prepareSSE();
526 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
,
527 mask
= BX_READ_XMM_REG(0); // XMM0
529 /* op2 is a register or memory reference */
531 op2
= BX_READ_XMM_REG(i
->rm());
534 /* pointer, segment address pair */
535 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
538 for(unsigned j
=0; j
<16; j
++)
539 if (mask
.xmmubyte(j
) & 0x80) op1
.xmmubyte(j
) = op2
.xmmubyte(j
);
541 /* now write result back to destination */
542 BX_WRITE_XMM_REG(i
->nnn(), op1
);
544 BX_INFO(("PBLENDVB_VdqWdq: required SSE4, use --enable-sse option"));
550 void BX_CPU_C::BLENDVPS_VpsWps(bxInstruction_c
*i
)
552 #if BX_SUPPORT_SSE >= 4
553 BX_CPU_THIS_PTR
prepareSSE();
555 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
,
556 mask
= BX_READ_XMM_REG(0); // XMM0
558 /* op2 is a register or memory reference */
560 op2
= BX_READ_XMM_REG(i
->rm());
563 /* pointer, segment address pair */
564 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
567 if (mask
.xmm32u(0) & 0x80000000) op1
.xmm32u(0) = op2
.xmm32u(0);
568 if (mask
.xmm32u(1) & 0x80000000) op1
.xmm32u(0) = op2
.xmm32u(0);
569 if (mask
.xmm32u(2) & 0x80000000) op1
.xmm32u(0) = op2
.xmm32u(0);
570 if (mask
.xmm32u(3) & 0x80000000) op1
.xmm32u(0) = op2
.xmm32u(0);
572 /* now write result back to destination */
573 BX_WRITE_XMM_REG(i
->nnn(), op1
);
575 BX_INFO(("BLENDVPS_VpsWps: required SSE4, use --enable-sse option"));
581 void BX_CPU_C::BLENDVPD_VpdWpd(bxInstruction_c
*i
)
583 #if BX_SUPPORT_SSE >= 4
584 BX_CPU_THIS_PTR
prepareSSE();
586 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
,
587 mask
= BX_READ_XMM_REG(0); // XMM0
589 /* op2 is a register or memory reference */
591 op2
= BX_READ_XMM_REG(i
->rm());
594 /* pointer, segment address pair */
595 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
598 if (mask
.xmm32u(1) & 0x80000000) op1
.xmm64u(0) = op2
.xmm64u(0);
599 if (mask
.xmm32u(3) & 0x80000000) op1
.xmm64u(0) = op2
.xmm64u(0);
601 /* now write result back to destination */
602 BX_WRITE_XMM_REG(i
->nnn(), op1
);
604 BX_INFO(("BLENDVPD_VpdWpd: required SSE4, use --enable-sse option"));
610 void BX_CPU_C::PTEST_VdqWdq(bxInstruction_c
*i
)
612 #if BX_SUPPORT_SSE >= 4
613 BX_CPU_THIS_PTR
prepareSSE();
615 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
618 /* op2 is a register or memory reference */
620 op2
= BX_READ_XMM_REG(i
->rm());
623 /* pointer, segment address pair */
624 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
627 if ((op2
.xmm64u(0) & op1
.xmm64u(0)) == 0 &&
628 (op2
.xmm64u(1) & op1
.xmm64u(1)) == 0) result
|= EFlagsZFMask
;
630 if ((op2
.xmm64u(0) & ~op1
.xmm64u(0)) == 0 &&
631 (op2
.xmm64u(1) & ~op1
.xmm64u(1)) == 0) result
|= EFlagsCFMask
;
633 setEFlagsOSZAPC(result
);
636 BX_INFO(("PTEST_VdqWdq: required SSE4, use --enable-sse option"));
642 void BX_CPU_C::PMULDQ_VdqWdq(bxInstruction_c
*i
)
644 #if BX_SUPPORT_SSE >= 4
645 BX_CPU_THIS_PTR
prepareSSE();
647 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
649 /* op2 is a register or memory reference */
651 op2
= BX_READ_XMM_REG(i
->rm());
654 /* pointer, segment address pair */
655 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
658 result
.xmm64s(0) = Bit64s(op1
.xmm32s(0)) * Bit64s(op2
.xmm32s(0));
659 result
.xmm64s(1) = Bit64s(op1
.xmm32s(2)) * Bit64s(op2
.xmm32s(2));
661 /* now write result back to destination */
662 BX_WRITE_XMM_REG(i
->nnn(), result
);
664 BX_INFO(("PMULDQ_VdqWdq: required SSE4, use --enable-sse option"));
670 void BX_CPU_C::PCMPEQQ_VdqWdq(bxInstruction_c
*i
)
672 #if BX_SUPPORT_SSE >= 4
673 BX_CPU_THIS_PTR
prepareSSE();
675 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
677 /* op2 is a register or memory reference */
679 op2
= BX_READ_XMM_REG(i
->rm());
682 /* pointer, segment address pair */
683 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
686 op1
.xmm64u(0) = (op1
.xmm64u(0) == op2
.xmm64u(0)) ?
687 BX_CONST64(0xffffffffffffffff) : 0;
689 op1
.xmm64u(1) = (op1
.xmm64u(1) == op2
.xmm64u(1)) ?
690 BX_CONST64(0xffffffffffffffff) : 0;
692 /* now write result back to destination */
693 BX_WRITE_XMM_REG(i
->nnn(), op1
);
695 BX_INFO(("PCMPEQQ_VdqWdq: required SSE4, use --enable-sse option"));
701 void BX_CPU_C::PACKUSDW_VdqWdq(bxInstruction_c
*i
)
703 #if BX_SUPPORT_SSE >= 4
704 BX_CPU_THIS_PTR
prepareSSE();
706 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
708 /* op2 is a register or memory reference */
710 op2
= BX_READ_XMM_REG(i
->rm());
713 /* pointer, segment address pair */
714 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
717 result
.xmm16u(0) = SaturateDwordSToWordU(op1
.xmm32s(0));
718 result
.xmm16u(1) = SaturateDwordSToWordU(op1
.xmm32s(1));
719 result
.xmm16u(2) = SaturateDwordSToWordU(op1
.xmm32s(2));
720 result
.xmm16u(3) = SaturateDwordSToWordU(op1
.xmm32s(3));
721 result
.xmm16u(4) = SaturateDwordSToWordU(op2
.xmm32s(0));
722 result
.xmm16u(5) = SaturateDwordSToWordU(op2
.xmm32s(1));
723 result
.xmm16u(6) = SaturateDwordSToWordU(op2
.xmm32s(2));
724 result
.xmm16u(7) = SaturateDwordSToWordU(op2
.xmm32s(3));
726 /* now write result back to destination */
727 BX_WRITE_XMM_REG(i
->nnn(), result
);
729 BX_INFO(("PACKUSDW_VdqWdq: required SSE4, use --enable-sse option"));
735 void BX_CPU_C::PMINSB_VdqWdq(bxInstruction_c
*i
)
737 #if BX_SUPPORT_SSE >= 4
738 BX_CPU_THIS_PTR
prepareSSE();
740 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
742 /* op2 is a register or memory reference */
744 op2
= BX_READ_XMM_REG(i
->rm());
747 /* pointer, segment address pair */
748 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
751 for(unsigned j
=0; j
<16; j
++) {
752 if(op2
.xmmsbyte(j
) < op1
.xmmsbyte(j
)) op1
.xmmubyte(j
) = op2
.xmmubyte(j
);
755 /* now write result back to destination */
756 BX_WRITE_XMM_REG(i
->nnn(), op1
);
758 BX_INFO(("PMINSB_VdqWdq: required SSE4, use --enable-sse option"));
764 void BX_CPU_C::PMINSD_VdqWdq(bxInstruction_c
*i
)
766 #if BX_SUPPORT_SSE >= 4
767 BX_CPU_THIS_PTR
prepareSSE();
769 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
771 /* op2 is a register or memory reference */
773 op2
= BX_READ_XMM_REG(i
->rm());
776 /* pointer, segment address pair */
777 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
780 if(op2
.xmm32s(0) < op1
.xmm32s(0)) op1
.xmm32u(0) = op2
.xmm32u(0);
781 if(op2
.xmm32s(1) < op1
.xmm32s(1)) op1
.xmm32u(1) = op2
.xmm32u(1);
782 if(op2
.xmm32s(2) < op1
.xmm32s(2)) op1
.xmm32u(2) = op2
.xmm32u(2);
783 if(op2
.xmm32s(3) < op1
.xmm32s(3)) op1
.xmm32u(3) = op2
.xmm32u(3);
785 /* now write result back to destination */
786 BX_WRITE_XMM_REG(i
->nnn(), op1
);
788 BX_INFO(("PMINSD_VdqWdq: required SSE4, use --enable-sse option"));
794 void BX_CPU_C::PMINUW_VdqWdq(bxInstruction_c
*i
)
796 #if BX_SUPPORT_SSE >= 4
797 BX_CPU_THIS_PTR
prepareSSE();
799 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
801 /* op2 is a register or memory reference */
803 op2
= BX_READ_XMM_REG(i
->rm());
806 /* pointer, segment address pair */
807 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
810 if(op2
.xmm16u(0) < op1
.xmm16u(0)) op1
.xmm16u(0) = op2
.xmm16u(0);
811 if(op2
.xmm16u(1) < op1
.xmm16u(1)) op1
.xmm16u(1) = op2
.xmm16u(1);
812 if(op2
.xmm16u(2) < op1
.xmm16u(2)) op1
.xmm16u(2) = op2
.xmm16u(2);
813 if(op2
.xmm16u(3) < op1
.xmm16u(3)) op1
.xmm16u(3) = op2
.xmm16u(3);
814 if(op2
.xmm16u(4) < op1
.xmm16u(4)) op1
.xmm16u(4) = op2
.xmm16u(4);
815 if(op2
.xmm16u(5) < op1
.xmm16u(5)) op1
.xmm16u(5) = op2
.xmm16u(5);
816 if(op2
.xmm16u(6) < op1
.xmm16u(6)) op1
.xmm16u(6) = op2
.xmm16u(6);
817 if(op2
.xmm16u(7) < op1
.xmm16u(7)) op1
.xmm16u(7) = op2
.xmm16u(7);
819 /* now write result back to destination */
820 BX_WRITE_XMM_REG(i
->nnn(), op1
);
822 BX_INFO(("PMINUW_VdqWdq: required SSE4, use --enable-sse option"));
828 void BX_CPU_C::PMINUD_VdqWdq(bxInstruction_c
*i
)
830 #if BX_SUPPORT_SSE >= 4
831 BX_CPU_THIS_PTR
prepareSSE();
833 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
835 /* op2 is a register or memory reference */
837 op2
= BX_READ_XMM_REG(i
->rm());
840 /* pointer, segment address pair */
841 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
844 if(op2
.xmm32u(0) < op1
.xmm32u(0)) op1
.xmm32u(0) = op2
.xmm32u(0);
845 if(op2
.xmm32u(1) < op1
.xmm32u(1)) op1
.xmm32u(1) = op2
.xmm32u(1);
846 if(op2
.xmm32u(2) < op1
.xmm32u(2)) op1
.xmm32u(2) = op2
.xmm32u(2);
847 if(op2
.xmm32u(3) < op1
.xmm32u(3)) op1
.xmm32u(3) = op2
.xmm32u(3);
849 /* now write result back to destination */
850 BX_WRITE_XMM_REG(i
->nnn(), op1
);
852 BX_INFO(("PMINUD_VdqWdq: required SSE4, use --enable-sse option"));
858 void BX_CPU_C::PMAXSB_VdqWdq(bxInstruction_c
*i
)
860 #if BX_SUPPORT_SSE >= 4
861 BX_CPU_THIS_PTR
prepareSSE();
863 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
865 /* op2 is a register or memory reference */
867 op2
= BX_READ_XMM_REG(i
->rm());
870 /* pointer, segment address pair */
871 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
874 for(unsigned j
=0; j
<16; j
++) {
875 if(op2
.xmmsbyte(j
) > op1
.xmmsbyte(j
)) op1
.xmmubyte(j
) = op2
.xmmubyte(j
);
878 /* now write result back to destination */
879 BX_WRITE_XMM_REG(i
->nnn(), op1
);
881 BX_INFO(("PMAXSB_VdqWdq: required SSE4, use --enable-sse option"));
887 void BX_CPU_C::PMAXSD_VdqWdq(bxInstruction_c
*i
)
889 #if BX_SUPPORT_SSE >= 4
890 BX_CPU_THIS_PTR
prepareSSE();
892 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
894 /* op2 is a register or memory reference */
896 op2
= BX_READ_XMM_REG(i
->rm());
899 /* pointer, segment address pair */
900 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
903 if(op2
.xmm32s(0) > op1
.xmm32s(0)) op1
.xmm32u(0) = op2
.xmm32u(0);
904 if(op2
.xmm32s(1) > op1
.xmm32s(1)) op1
.xmm32u(1) = op2
.xmm32u(1);
905 if(op2
.xmm32s(2) > op1
.xmm32s(2)) op1
.xmm32u(2) = op2
.xmm32u(2);
906 if(op2
.xmm32s(3) > op1
.xmm32s(3)) op1
.xmm32u(3) = op2
.xmm32u(3);
908 /* now write result back to destination */
909 BX_WRITE_XMM_REG(i
->nnn(), op1
);
911 BX_INFO(("PMAXSD_VdqWdq: required SSE4, use --enable-sse option"));
917 void BX_CPU_C::PMAXUW_VdqWdq(bxInstruction_c
*i
)
919 #if BX_SUPPORT_SSE >= 4
920 BX_CPU_THIS_PTR
prepareSSE();
922 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
924 /* op2 is a register or memory reference */
926 op2
= BX_READ_XMM_REG(i
->rm());
929 /* pointer, segment address pair */
930 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
933 if(op2
.xmm16u(0) > op1
.xmm16u(0)) op1
.xmm16u(0) = op2
.xmm16u(0);
934 if(op2
.xmm16u(1) > op1
.xmm16u(1)) op1
.xmm16u(1) = op2
.xmm16u(1);
935 if(op2
.xmm16u(2) > op1
.xmm16u(2)) op1
.xmm16u(2) = op2
.xmm16u(2);
936 if(op2
.xmm16u(3) > op1
.xmm16u(3)) op1
.xmm16u(3) = op2
.xmm16u(3);
937 if(op2
.xmm16u(4) > op1
.xmm16u(4)) op1
.xmm16u(4) = op2
.xmm16u(4);
938 if(op2
.xmm16u(5) > op1
.xmm16u(5)) op1
.xmm16u(5) = op2
.xmm16u(5);
939 if(op2
.xmm16u(6) > op1
.xmm16u(6)) op1
.xmm16u(6) = op2
.xmm16u(6);
940 if(op2
.xmm16u(7) > op1
.xmm16u(7)) op1
.xmm16u(7) = op2
.xmm16u(7);
942 /* now write result back to destination */
943 BX_WRITE_XMM_REG(i
->nnn(), op1
);
945 BX_INFO(("PMAXUW_VdqWdq: required SSE4, use --enable-sse option"));
951 void BX_CPU_C::PMAXUD_VdqWdq(bxInstruction_c
*i
)
953 #if BX_SUPPORT_SSE >= 4
954 BX_CPU_THIS_PTR
prepareSSE();
956 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
958 /* op2 is a register or memory reference */
960 op2
= BX_READ_XMM_REG(i
->rm());
963 /* pointer, segment address pair */
964 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
967 if(op2
.xmm32u(0) > op1
.xmm32u(0)) op1
.xmm32u(0) = op2
.xmm32u(0);
968 if(op2
.xmm32u(1) > op1
.xmm32u(1)) op1
.xmm32u(1) = op2
.xmm32u(1);
969 if(op2
.xmm32u(2) > op1
.xmm32u(2)) op1
.xmm32u(2) = op2
.xmm32u(2);
970 if(op2
.xmm32u(3) > op1
.xmm32u(3)) op1
.xmm32u(3) = op2
.xmm32u(3);
972 /* now write result back to destination */
973 BX_WRITE_XMM_REG(i
->nnn(), op1
);
975 BX_INFO(("PMAXUD_VdqWdq: required SSE4, use --enable-sse option"));
981 void BX_CPU_C::PMULLD_VdqWdq(bxInstruction_c
*i
)
983 #if BX_SUPPORT_SSE >= 4
984 BX_CPU_THIS_PTR
prepareSSE();
986 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
988 /* op2 is a register or memory reference */
990 op2
= BX_READ_XMM_REG(i
->rm());
993 /* pointer, segment address pair */
994 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
997 Bit64s product1
= Bit64s(op1
.xmm32s(0)) * Bit64s(op2
.xmm32s(0));
998 Bit64s product2
= Bit64s(op1
.xmm32s(1)) * Bit64s(op2
.xmm32s(1));
999 Bit64s product3
= Bit64s(op1
.xmm32s(2)) * Bit64s(op2
.xmm32s(2));
1000 Bit64s product4
= Bit64s(op1
.xmm32s(3)) * Bit64s(op2
.xmm32s(3));
1002 result
.xmm32u(0) = (Bit32u
)(product1
& 0xFFFFFFFF);
1003 result
.xmm32u(1) = (Bit32u
)(product2
& 0xFFFFFFFF);
1004 result
.xmm32u(2) = (Bit32u
)(product3
& 0xFFFFFFFF);
1005 result
.xmm32u(3) = (Bit32u
)(product4
& 0xFFFFFFFF);
1007 /* now write result back to destination */
1008 BX_WRITE_XMM_REG(i
->nnn(), result
);
1010 BX_INFO(("PMULLD_VdqWdq: required SSE4, use --enable-sse option"));
1016 void BX_CPU_C::PHMINPOSUW_VdqWdq(bxInstruction_c
*i
)
1018 #if BX_SUPPORT_SSE >= 4
1019 BX_CPU_THIS_PTR
prepareSSE();
1021 BxPackedXmmRegister op
, result
;
1023 /* op2 is a register or memory reference */
1025 op
= BX_READ_XMM_REG(i
->rm());
1028 /* pointer, segment address pair */
1029 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
1034 for (unsigned j
=1; j
< 8; j
++) {
1035 if (op
.xmm16u(j
) < op
.xmm16u(min
)) min
= j
;
1038 result
.xmm16u(0) = op
.xmm16u(min
);
1039 result
.xmm16u(1) = min
;
1040 result
.xmm32u(1) = 0;
1041 result
.xmm64u(1) = 0;
1043 /* now write result back to destination */
1044 BX_WRITE_XMM_REG(i
->nnn(), result
);
1046 BX_INFO(("PHMINPOSUW_VdqWdq: required SSE4, use --enable-sse option"));
1052 void BX_CPU_C::BLENDPS_VpsWpsIb(bxInstruction_c
*i
)
1054 #if BX_SUPPORT_SSE >= 4
1055 BX_CPU_THIS_PTR
prepareSSE();
1057 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1058 Bit8u mask
= i
->Ib();
1060 /* op2 is a register or memory reference */
1062 op2
= BX_READ_XMM_REG(i
->rm());
1065 /* pointer, segment address pair */
1066 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1069 if (mask
& 0x1) op1
.xmm32u(0) = op2
.xmm32u(0);
1070 if (mask
& 0x2) op1
.xmm32u(1) = op2
.xmm32u(1);
1071 if (mask
& 0x4) op1
.xmm32u(2) = op2
.xmm32u(2);
1072 if (mask
& 0x8) op1
.xmm32u(3) = op2
.xmm32u(3);
1074 /* now write result back to destination */
1075 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1077 BX_INFO(("BLENDPS_VpsWpsIb: required SSE4, use --enable-sse option"));
1083 void BX_CPU_C::BLENDPD_VpdWpdIb(bxInstruction_c
*i
)
1085 #if BX_SUPPORT_SSE >= 4
1086 BX_CPU_THIS_PTR
prepareSSE();
1088 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1089 Bit8u mask
= i
->Ib();
1091 /* op2 is a register or memory reference */
1093 op2
= BX_READ_XMM_REG(i
->rm());
1096 /* pointer, segment address pair */
1097 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1100 if (mask
& 0x1) op1
.xmm64u(0) = op2
.xmm64u(0);
1101 if (mask
& 0x2) op1
.xmm64u(1) = op2
.xmm64u(1);
1103 /* now write result back to destination */
1104 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1106 BX_INFO(("BLENDPD_VpdWpdIb: required SSE4, use --enable-sse option"));
1112 void BX_CPU_C::PBLENDW_VdqWdqIb(bxInstruction_c
*i
)
1114 #if BX_SUPPORT_SSE >= 4
1115 BX_CPU_THIS_PTR
prepareSSE();
1117 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1118 Bit8u mask
= i
->Ib();
1120 /* op2 is a register or memory reference */
1122 op2
= BX_READ_XMM_REG(i
->rm());
1125 /* pointer, segment address pair */
1126 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1129 if (mask
& 0x01) op1
.xmm16u(0) = op2
.xmm16u(0);
1130 if (mask
& 0x02) op1
.xmm16u(1) = op2
.xmm16u(1);
1131 if (mask
& 0x04) op1
.xmm16u(2) = op2
.xmm16u(2);
1132 if (mask
& 0x08) op1
.xmm16u(3) = op2
.xmm16u(3);
1133 if (mask
& 0x10) op1
.xmm16u(4) = op2
.xmm16u(4);
1134 if (mask
& 0x20) op1
.xmm16u(5) = op2
.xmm16u(5);
1135 if (mask
& 0x40) op1
.xmm16u(6) = op2
.xmm16u(6);
1136 if (mask
& 0x80) op1
.xmm16u(7) = op2
.xmm16u(7);
1138 /* now write result back to destination */
1139 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1141 BX_INFO(("PBLENDW_VdqWdqIb: required SSE4, use --enable-sse option"));
1147 void BX_CPU_C::PEXTRB_HbdUdqIb(bxInstruction_c
*i
)
1149 #if BX_SUPPORT_SSE >= 4
1150 BX_CPU_THIS_PTR
prepareSSE();
1152 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
1153 Bit8u result
= op
.xmmubyte(i
->Ib() & 0xF);
1155 /* result is a register or memory reference */
1157 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
1160 write_virtual_byte(i
->seg(), RMAddr(i
), &result
);
1163 BX_INFO(("PEXTRB_HbdUdqIb: required SSE4, use --enable-sse option"));
1169 void BX_CPU_C::PEXTRW_HwdUdqIb(bxInstruction_c
*i
)
1171 #if BX_SUPPORT_SSE >= 4
1172 BX_CPU_THIS_PTR
prepareSSE();
1174 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
1175 Bit16u result
= op
.xmm16u(i
->Ib() & 7);
1177 /* result is a register or memory reference */
1179 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
1182 write_virtual_word(i
->seg(), RMAddr(i
), &result
);
1185 BX_INFO(("PEXTRW_HwdUdqIb: required SSE4, use --enable-sse option"));
1191 void BX_CPU_C::PEXTRD_HdUdqIb(bxInstruction_c
*i
)
1193 #if BX_SUPPORT_SSE >= 4
1194 BX_CPU_THIS_PTR
prepareSSE();
1196 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
1198 #if BX_SUPPORT_X86_64
1199 if (i
->os64L()) /* 64 bit operand size mode */
1201 Bit64u result
= op
.xmm64u(i
->Ib() & 1);
1203 /* result is a register or memory reference */
1205 BX_WRITE_64BIT_REG(i
->nnn(), result
);
1208 write_virtual_qword(i
->seg(), RMAddr(i
), &result
);
1214 Bit32u result
= op
.xmm32u(i
->Ib() & 3);
1216 /* result is a register or memory reference */
1218 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
1221 write_virtual_dword(i
->seg(), RMAddr(i
), &result
);
1225 BX_INFO(("PEXTRD_HdUdqIb: required SSE4, use --enable-sse option"));
1231 void BX_CPU_C::EXTRACTPS_HdUpsIb(bxInstruction_c
*i
)
1233 #if BX_SUPPORT_SSE >= 4
1234 BX_CPU_THIS_PTR
prepareSSE();
1236 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
1237 Bit32u result
= op
.xmm32u(i
->Ib() & 3);
1239 /* result is a register or memory reference */
1241 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
1244 write_virtual_dword(i
->seg(), RMAddr(i
), &result
);
1247 BX_INFO(("EXTRACTPS_HdUpsIb: required SSE4, use --enable-sse option"));
1253 void BX_CPU_C::PINSRB_VdqEbIb(bxInstruction_c
*i
)
1255 #if BX_SUPPORT_SSE >= 4
1256 BX_CPU_THIS_PTR
prepareSSE();
1258 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn());
1261 /* op2 is a register or memory reference */
1263 op2
= BX_READ_16BIT_REG(i
->rm()); // won't allow reading of AH/CH/BH/DH
1266 /* pointer, segment address pair */
1267 read_virtual_byte(i
->seg(), RMAddr(i
), &op2
);
1270 op1
.xmmubyte(i
->Ib() & 0xF) = op2
;
1272 /* now write result back to destination */
1273 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1275 BX_INFO(("PINSRB_VdqEbIb: required SSE4, use --enable-sse option"));
1281 void BX_CPU_C::INSERTPS_VpsWssIb(bxInstruction_c
*i
)
1283 #if BX_SUPPORT_SSE >= 4
1284 BX_CPU_THIS_PTR
prepareSSE();
1286 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn());
1287 Bit8u control
= i
->Ib();
1290 /* op2 is a register or memory reference */
1292 BxPackedXmmRegister temp
= BX_READ_XMM_REG(i
->rm());
1293 op2
= temp
.xmm32u((control
>> 6) & 3);
1296 /* pointer, segment address pair */
1297 read_virtual_dword(i
->seg(), RMAddr(i
), &op2
);
1300 op1
.xmm32u((control
>> 4) & 3) = op2
;
1302 if (control
& 1) op1
.xmm32u(0) = 0;
1303 if (control
& 2) op1
.xmm32u(1) = 0;
1304 if (control
& 4) op1
.xmm32u(2) = 0;
1305 if (control
& 8) op1
.xmm32u(3) = 0;
1307 /* now write result back to destination */
1308 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1310 BX_INFO(("INSERTPS_VpsWssIb: required SSE4, use --enable-sse option"));
1316 void BX_CPU_C::PINSRD_VdqEdIb(bxInstruction_c
*i
)
1318 #if BX_SUPPORT_SSE >= 4
1319 BX_CPU_THIS_PTR
prepareSSE();
1321 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn());
1323 #if BX_SUPPORT_X86_64
1324 if (i
->os64L()) /* 64 bit operand size mode */
1328 /* op2 is a register or memory reference */
1330 op2
= BX_READ_64BIT_REG(i
->rm());
1333 /* pointer, segment address pair */
1334 read_virtual_qword(i
->seg(), RMAddr(i
), &op2
);
1337 op1
.xmm64u(i
->Ib() & 1) = op2
;
1344 /* op2 is a register or memory reference */
1346 op2
= BX_READ_32BIT_REG(i
->rm());
1349 /* pointer, segment address pair */
1350 read_virtual_dword(i
->seg(), RMAddr(i
), &op2
);
1353 op1
.xmm32u(i
->Ib() & 3) = op2
;
1356 /* now write result back to destination */
1357 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1359 BX_INFO(("PINSRD_VdqEdIb: required SSE4, use --enable-sse option"));
1365 void BX_CPU_C::MPSADBW_VdqWdqIb(bxInstruction_c
*i
)
1367 #if BX_SUPPORT_SSE >= 4
1368 BX_CPU_THIS_PTR
prepareSSE();
1370 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1372 /* op2 is a register or memory reference */
1374 op2
= BX_READ_XMM_REG(i
->rm());
1377 /* pointer, segment address pair */
1378 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1381 unsigned src_offset
= (i
->Ib() & 3) * 4;
1382 unsigned dst_offset
= ((i
->Ib() >> 2) & 1) * 4;
1384 for (unsigned j
=0; j
< 8; j
++)
1386 result
.xmm16u(j
) = 0;
1388 for (unsigned k
=0; k
< 4; k
++) {
1389 Bit8u temp1
= op1
.xmmubyte(j
+ k
+ dst_offset
);
1390 Bit8u temp2
= op2
.xmmubyte( k
+ src_offset
);
1392 result
.xmm16u(j
) += (temp1
- temp2
);
1394 result
.xmm16u(j
) += (temp2
- temp1
);
1398 BX_WRITE_XMM_REG(i
->nnn(), result
);
1400 BX_INFO(("MPSADBW_VdqWdqIb: required SSE4, use --enable-sse option"));
1405 #endif // (BX_SUPPORT_SSE >= 4 || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
1408 void BX_CPU_C::PUNPCKLBW_VdqWq(bxInstruction_c
*i
)
1410 #if BX_SUPPORT_SSE >= 2
1411 BX_CPU_THIS_PTR
prepareSSE();
1413 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1415 /* op2 is a register or memory reference */
1417 op2
= BX_READ_XMM_REG(i
->rm());
1420 /* pointer, segment address pair */
1421 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1424 result
.xmmubyte(0x0) = op1
.xmmubyte(0);
1425 result
.xmmubyte(0x1) = op2
.xmmubyte(0);
1426 result
.xmmubyte(0x2) = op1
.xmmubyte(1);
1427 result
.xmmubyte(0x3) = op2
.xmmubyte(1);
1428 result
.xmmubyte(0x4) = op1
.xmmubyte(2);
1429 result
.xmmubyte(0x5) = op2
.xmmubyte(2);
1430 result
.xmmubyte(0x6) = op1
.xmmubyte(3);
1431 result
.xmmubyte(0x7) = op2
.xmmubyte(3);
1432 result
.xmmubyte(0x8) = op1
.xmmubyte(4);
1433 result
.xmmubyte(0x9) = op2
.xmmubyte(4);
1434 result
.xmmubyte(0xA) = op1
.xmmubyte(5);
1435 result
.xmmubyte(0xB) = op2
.xmmubyte(5);
1436 result
.xmmubyte(0xC) = op1
.xmmubyte(6);
1437 result
.xmmubyte(0xD) = op2
.xmmubyte(6);
1438 result
.xmmubyte(0xE) = op1
.xmmubyte(7);
1439 result
.xmmubyte(0xF) = op2
.xmmubyte(7);
1441 /* now write result back to destination */
1442 BX_WRITE_XMM_REG(i
->nnn(), result
);
1444 BX_INFO(("PUNPCKLBW_VdqWq: required SSE2, use --enable-sse option"));
1450 void BX_CPU_C::PUNPCKLWD_VdqWq(bxInstruction_c
*i
)
1452 #if BX_SUPPORT_SSE >= 2
1453 BX_CPU_THIS_PTR
prepareSSE();
1455 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1457 /* op2 is a register or memory reference */
1459 op2
= BX_READ_XMM_REG(i
->rm());
1462 /* pointer, segment address pair */
1463 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1466 result
.xmm16u(0) = op1
.xmm16u(0);
1467 result
.xmm16u(1) = op2
.xmm16u(0);
1468 result
.xmm16u(2) = op1
.xmm16u(1);
1469 result
.xmm16u(3) = op2
.xmm16u(1);
1470 result
.xmm16u(4) = op1
.xmm16u(2);
1471 result
.xmm16u(5) = op2
.xmm16u(2);
1472 result
.xmm16u(6) = op1
.xmm16u(3);
1473 result
.xmm16u(7) = op2
.xmm16u(3);
1475 /* now write result back to destination */
1476 BX_WRITE_XMM_REG(i
->nnn(), result
);
1478 BX_INFO(("PUNPCKLWD_VdqWq: required SSE2, use --enable-sse option"));
1483 /* UNPCKLPS: 0F 14 */
1484 /* PUNPCKLDQ: 66 0F 62 */
1485 void BX_CPU_C::UNPCKLPS_VpsWq(bxInstruction_c
*i
)
1487 #if BX_SUPPORT_SSE >= 1
1488 BX_CPU_THIS_PTR
prepareSSE();
1490 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1492 /* op2 is a register or memory reference */
1494 op2
= BX_READ_XMM_REG(i
->rm());
1497 /* pointer, segment address pair */
1498 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1501 result
.xmm32u(0) = op1
.xmm32u(0);
1502 result
.xmm32u(1) = op2
.xmm32u(0);
1503 result
.xmm32u(2) = op1
.xmm32u(1);
1504 result
.xmm32u(3) = op2
.xmm32u(1);
1506 /* now write result back to destination */
1507 BX_WRITE_XMM_REG(i
->nnn(), result
);
1509 BX_INFO(("UNPCKLPS_VpsWq: required SSE, use --enable-sse option"));
1515 void BX_CPU_C::PACKSSWB_VdqWq(bxInstruction_c
*i
)
1517 #if BX_SUPPORT_SSE >= 2
1518 BX_CPU_THIS_PTR
prepareSSE();
1520 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1522 /* op2 is a register or memory reference */
1524 op2
= BX_READ_XMM_REG(i
->rm());
1527 /* pointer, segment address pair */
1528 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1531 result
.xmmsbyte(0x0) = SaturateWordSToByteS(op1
.xmm16s(0));
1532 result
.xmmsbyte(0x1) = SaturateWordSToByteS(op1
.xmm16s(1));
1533 result
.xmmsbyte(0x2) = SaturateWordSToByteS(op1
.xmm16s(2));
1534 result
.xmmsbyte(0x3) = SaturateWordSToByteS(op1
.xmm16s(3));
1535 result
.xmmsbyte(0x4) = SaturateWordSToByteS(op1
.xmm16s(4));
1536 result
.xmmsbyte(0x5) = SaturateWordSToByteS(op1
.xmm16s(5));
1537 result
.xmmsbyte(0x6) = SaturateWordSToByteS(op1
.xmm16s(6));
1538 result
.xmmsbyte(0x7) = SaturateWordSToByteS(op1
.xmm16s(7));
1540 result
.xmmsbyte(0x8) = SaturateWordSToByteS(op2
.xmm16s(0));
1541 result
.xmmsbyte(0x9) = SaturateWordSToByteS(op2
.xmm16s(1));
1542 result
.xmmsbyte(0xA) = SaturateWordSToByteS(op2
.xmm16s(2));
1543 result
.xmmsbyte(0xB) = SaturateWordSToByteS(op2
.xmm16s(3));
1544 result
.xmmsbyte(0xC) = SaturateWordSToByteS(op2
.xmm16s(4));
1545 result
.xmmsbyte(0xD) = SaturateWordSToByteS(op2
.xmm16s(5));
1546 result
.xmmsbyte(0xE) = SaturateWordSToByteS(op2
.xmm16s(6));
1547 result
.xmmsbyte(0xF) = SaturateWordSToByteS(op2
.xmm16s(7));
1549 /* now write result back to destination */
1550 BX_WRITE_XMM_REG(i
->nnn(), result
);
1552 BX_INFO(("PACKSSWB_VdqWq: required SSE2, use --enable-sse option"));
1558 void BX_CPU_C::PCMPGTB_VdqWq(bxInstruction_c
*i
)
1560 #if BX_SUPPORT_SSE >= 2
1561 BX_CPU_THIS_PTR
prepareSSE();
1563 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1565 /* op2 is a register or memory reference */
1567 op2
= BX_READ_XMM_REG(i
->rm());
1570 /* pointer, segment address pair */
1571 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1574 for(unsigned j
=0; j
<16; j
++) {
1575 op1
.xmmsbyte(j
) = (op1
.xmmsbyte(j
) > op2
.xmmsbyte(j
)) ? 0xff : 0;
1578 /* now write result back to destination */
1579 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1581 BX_INFO(("PCMPGTB_VdqWq: required SSE2, use --enable-sse option"));
1587 void BX_CPU_C::PCMPGTW_VdqWq(bxInstruction_c
*i
)
1589 #if BX_SUPPORT_SSE >= 2
1590 BX_CPU_THIS_PTR
prepareSSE();
1592 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1594 /* op2 is a register or memory reference */
1596 op2
= BX_READ_XMM_REG(i
->rm());
1599 /* pointer, segment address pair */
1600 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1603 op1
.xmm16s(0) = (op1
.xmm16s(0) > op2
.xmm16s(0)) ? 0xffff : 0;
1604 op1
.xmm16s(1) = (op1
.xmm16s(1) > op2
.xmm16s(1)) ? 0xffff : 0;
1605 op1
.xmm16s(2) = (op1
.xmm16s(2) > op2
.xmm16s(2)) ? 0xffff : 0;
1606 op1
.xmm16s(3) = (op1
.xmm16s(3) > op2
.xmm16s(3)) ? 0xffff : 0;
1607 op1
.xmm16s(4) = (op1
.xmm16s(4) > op2
.xmm16s(4)) ? 0xffff : 0;
1608 op1
.xmm16s(5) = (op1
.xmm16s(5) > op2
.xmm16s(5)) ? 0xffff : 0;
1609 op1
.xmm16s(6) = (op1
.xmm16s(6) > op2
.xmm16s(6)) ? 0xffff : 0;
1610 op1
.xmm16s(7) = (op1
.xmm16s(7) > op2
.xmm16s(7)) ? 0xffff : 0;
1612 /* now write result back to destination */
1613 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1615 BX_INFO(("PCMPGTW_VdqWq: required SSE2, use --enable-sse option"));
1621 void BX_CPU_C::PCMPGTD_VdqWdq(bxInstruction_c
*i
)
1623 #if BX_SUPPORT_SSE >= 2
1624 BX_CPU_THIS_PTR
prepareSSE();
1626 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1628 /* op2 is a register or memory reference */
1630 op2
= BX_READ_XMM_REG(i
->rm());
1633 /* pointer, segment address pair */
1634 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1637 op1
.xmm32s(0) = (op1
.xmm32s(0) > op2
.xmm32s(0)) ? 0xffffffff : 0;
1638 op1
.xmm32s(1) = (op1
.xmm32s(1) > op2
.xmm32s(1)) ? 0xffffffff : 0;
1639 op1
.xmm32s(2) = (op1
.xmm32s(2) > op2
.xmm32s(2)) ? 0xffffffff : 0;
1640 op1
.xmm32s(3) = (op1
.xmm32s(3) > op2
.xmm32s(3)) ? 0xffffffff : 0;
1642 /* now write result back to destination */
1643 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1645 BX_INFO(("PCMPGTD_VdqWdq: required SSE2, use --enable-sse option"));
1651 void BX_CPU_C::PACKUSWB_VdqWdq(bxInstruction_c
*i
)
1653 #if BX_SUPPORT_SSE >= 2
1654 BX_CPU_THIS_PTR
prepareSSE();
1656 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1658 /* op2 is a register or memory reference */
1660 op2
= BX_READ_XMM_REG(i
->rm());
1663 /* pointer, segment address pair */
1664 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1667 result
.xmmubyte(0x0) = SaturateWordSToByteU(op1
.xmm16s(0));
1668 result
.xmmubyte(0x1) = SaturateWordSToByteU(op1
.xmm16s(1));
1669 result
.xmmubyte(0x2) = SaturateWordSToByteU(op1
.xmm16s(2));
1670 result
.xmmubyte(0x3) = SaturateWordSToByteU(op1
.xmm16s(3));
1671 result
.xmmubyte(0x4) = SaturateWordSToByteU(op1
.xmm16s(4));
1672 result
.xmmubyte(0x5) = SaturateWordSToByteU(op1
.xmm16s(5));
1673 result
.xmmubyte(0x6) = SaturateWordSToByteU(op1
.xmm16s(6));
1674 result
.xmmubyte(0x7) = SaturateWordSToByteU(op1
.xmm16s(7));
1676 result
.xmmubyte(0x8) = SaturateWordSToByteU(op2
.xmm16s(0));
1677 result
.xmmubyte(0x9) = SaturateWordSToByteU(op2
.xmm16s(1));
1678 result
.xmmubyte(0xA) = SaturateWordSToByteU(op2
.xmm16s(2));
1679 result
.xmmubyte(0xB) = SaturateWordSToByteU(op2
.xmm16s(3));
1680 result
.xmmubyte(0xC) = SaturateWordSToByteU(op2
.xmm16s(4));
1681 result
.xmmubyte(0xD) = SaturateWordSToByteU(op2
.xmm16s(5));
1682 result
.xmmubyte(0xE) = SaturateWordSToByteU(op2
.xmm16s(6));
1683 result
.xmmubyte(0xF) = SaturateWordSToByteU(op2
.xmm16s(7));
1685 /* now write result back to destination */
1686 BX_WRITE_XMM_REG(i
->nnn(), result
);
1688 BX_INFO(("PACKUSWB_VdqWdq: required SSE2, use --enable-sse option"));
1694 void BX_CPU_C::PUNPCKHBW_VdqWq(bxInstruction_c
*i
)
1696 #if BX_SUPPORT_SSE >= 2
1697 BX_CPU_THIS_PTR
prepareSSE();
1699 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1701 /* op2 is a register or memory reference */
1703 op2
= BX_READ_XMM_REG(i
->rm());
1706 /* pointer, segment address pair */
1707 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1710 result
.xmmubyte(0x0) = op1
.xmmubyte(0x8);
1711 result
.xmmubyte(0x1) = op2
.xmmubyte(0x8);
1712 result
.xmmubyte(0x2) = op1
.xmmubyte(0x9);
1713 result
.xmmubyte(0x3) = op2
.xmmubyte(0x9);
1714 result
.xmmubyte(0x4) = op1
.xmmubyte(0xA);
1715 result
.xmmubyte(0x5) = op2
.xmmubyte(0xA);
1716 result
.xmmubyte(0x6) = op1
.xmmubyte(0xB);
1717 result
.xmmubyte(0x7) = op2
.xmmubyte(0xB);
1718 result
.xmmubyte(0x8) = op1
.xmmubyte(0xC);
1719 result
.xmmubyte(0x9) = op2
.xmmubyte(0xC);
1720 result
.xmmubyte(0xA) = op1
.xmmubyte(0xD);
1721 result
.xmmubyte(0xB) = op2
.xmmubyte(0xD);
1722 result
.xmmubyte(0xC) = op1
.xmmubyte(0xE);
1723 result
.xmmubyte(0xD) = op2
.xmmubyte(0xE);
1724 result
.xmmubyte(0xE) = op1
.xmmubyte(0xF);
1725 result
.xmmubyte(0xF) = op2
.xmmubyte(0xF);
1727 /* now write result back to destination */
1728 BX_WRITE_XMM_REG(i
->nnn(), result
);
1730 BX_INFO(("PUNPCKHBW_VdqWq: required SSE2, use --enable-sse option"));
1736 void BX_CPU_C::PUNPCKHWD_VdqWq(bxInstruction_c
*i
)
1738 #if BX_SUPPORT_SSE >= 2
1739 BX_CPU_THIS_PTR
prepareSSE();
1741 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1743 /* op2 is a register or memory reference */
1745 op2
= BX_READ_XMM_REG(i
->rm());
1748 /* pointer, segment address pair */
1749 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1752 result
.xmm16u(0) = op1
.xmm16u(4);
1753 result
.xmm16u(1) = op2
.xmm16u(4);
1754 result
.xmm16u(2) = op1
.xmm16u(5);
1755 result
.xmm16u(3) = op2
.xmm16u(5);
1756 result
.xmm16u(4) = op1
.xmm16u(6);
1757 result
.xmm16u(5) = op2
.xmm16u(6);
1758 result
.xmm16u(6) = op1
.xmm16u(7);
1759 result
.xmm16u(7) = op2
.xmm16u(7);
1761 /* now write result back to destination */
1762 BX_WRITE_XMM_REG(i
->nnn(), result
);
1764 BX_INFO(("PUNPCKHWD_VdqWq: required SSE2, use --enable-sse option"));
1769 /* UNPCKHPS: 0F 15 */
1770 /* PUNPCKHDQ: 66 0F 6A */
1771 void BX_CPU_C::UNPCKHPS_VpsWq(bxInstruction_c
*i
)
1773 #if BX_SUPPORT_SSE >= 1
1774 BX_CPU_THIS_PTR
prepareSSE();
1776 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1778 /* op2 is a register or memory reference */
1780 op2
= BX_READ_XMM_REG(i
->rm());
1783 /* pointer, segment address pair */
1784 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1787 result
.xmm32u(0) = op1
.xmm32u(2);
1788 result
.xmm32u(1) = op2
.xmm32u(2);
1789 result
.xmm32u(2) = op1
.xmm32u(3);
1790 result
.xmm32u(3) = op2
.xmm32u(3);
1792 /* now write result back to destination */
1793 BX_WRITE_XMM_REG(i
->nnn(), result
);
1795 BX_INFO(("UNPCKHPS_VpsWq: required SSE, use --enable-sse option"));
1801 void BX_CPU_C::PACKSSDW_VdqWdq(bxInstruction_c
*i
)
1803 #if BX_SUPPORT_SSE >= 2
1804 BX_CPU_THIS_PTR
prepareSSE();
1806 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1808 /* op2 is a register or memory reference */
1810 op2
= BX_READ_XMM_REG(i
->rm());
1813 /* pointer, segment address pair */
1814 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1817 result
.xmm16s(0) = SaturateDwordSToWordS(op1
.xmm32s(0));
1818 result
.xmm16s(1) = SaturateDwordSToWordS(op1
.xmm32s(1));
1819 result
.xmm16s(2) = SaturateDwordSToWordS(op1
.xmm32s(2));
1820 result
.xmm16s(3) = SaturateDwordSToWordS(op1
.xmm32s(3));
1822 result
.xmm16s(4) = SaturateDwordSToWordS(op2
.xmm32s(0));
1823 result
.xmm16s(5) = SaturateDwordSToWordS(op2
.xmm32s(1));
1824 result
.xmm16s(6) = SaturateDwordSToWordS(op2
.xmm32s(2));
1825 result
.xmm16s(7) = SaturateDwordSToWordS(op2
.xmm32s(3));
1827 /* now write result back to destination */
1828 BX_WRITE_XMM_REG(i
->nnn(), result
);
1830 BX_INFO(("PACKSSDW_VdqWdq: required SSE2, use --enable-sse option"));
1834 /* UNPCKLPD: 66 0F 14 */
1835 /* PUNPCKLQDQ: 66 0F 6C */
1836 void BX_CPU_C::PUNPCKLQDQ_VdqWq(bxInstruction_c
*i
)
1838 #if BX_SUPPORT_SSE >= 2
1839 BX_CPU_THIS_PTR
prepareSSE();
1841 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1843 /* op2 is a register or memory reference */
1845 op2
= BX_READ_XMM_REG(i
->rm());
1848 /* pointer, segment address pair */
1849 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1852 op1
.xmm64u(1) = op2
.xmm64u(0);
1854 /* now write result back to destination */
1855 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1857 BX_INFO(("PUNPCKLQDQ_VdqWq: required SSE2, use --enable-sse option"));
1862 /* UNPCKHPD: 66 0F 15 */
1863 /* PUNPCKHQDQ: 66 0F 6D */
1864 void BX_CPU_C::PUNPCKHQDQ_VdqWq(bxInstruction_c
*i
)
1866 #if BX_SUPPORT_SSE >= 2
1867 BX_CPU_THIS_PTR
prepareSSE();
1869 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1871 /* op2 is a register or memory reference */
1873 op2
= BX_READ_XMM_REG(i
->rm());
1876 /* pointer, segment address pair */
1877 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1880 result
.xmm64u(0) = op1
.xmm64u(1);
1881 result
.xmm64u(1) = op2
.xmm64u(1);
1883 /* now write result back to destination */
1884 BX_WRITE_XMM_REG(i
->nnn(), result
);
1886 BX_INFO(("PUNPCKHQDQ_VdqWq: required SSE2, use --enable-sse option"));
1892 void BX_CPU_C::PSHUFD_VdqWdqIb(bxInstruction_c
*i
)
1894 #if BX_SUPPORT_SSE >= 2
1895 BX_CPU_THIS_PTR
prepareSSE();
1897 BxPackedXmmRegister op
, result
;
1898 Bit8u order
= i
->Ib();
1900 /* op is a register or memory reference */
1902 op
= BX_READ_XMM_REG(i
->rm());
1905 /* pointer, segment address pair */
1906 read_virtual_dqword(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
1909 result
.xmm32u(0) = op
.xmm32u((order
>> 0) & 0x3);
1910 result
.xmm32u(1) = op
.xmm32u((order
>> 2) & 0x3);
1911 result
.xmm32u(2) = op
.xmm32u((order
>> 4) & 0x3);
1912 result
.xmm32u(3) = op
.xmm32u((order
>> 6) & 0x3);
1914 /* now write result back to destination */
1915 BX_WRITE_XMM_REG(i
->nnn(), result
);
1917 BX_INFO(("PSHUFD_VdqWdqIb: required SSE2, use --enable-sse option"));
1923 void BX_CPU_C::PSHUFHW_VqWqIb(bxInstruction_c
*i
)
1925 #if BX_SUPPORT_SSE >= 2
1926 BX_CPU_THIS_PTR
prepareSSE();
1928 BxPackedXmmRegister op
, result
;
1929 Bit8u order
= i
->Ib();
1931 /* op is a register or memory reference */
1933 op
= BX_READ_XMM_REG(i
->rm());
1936 /* pointer, segment address pair */
1937 read_virtual_dqword(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
1940 result
.xmm64u(0) = op
.xmm64u(0);
1941 result
.xmm16u(4) = op
.xmm16u(4 + ((order
>> 0) & 0x3));
1942 result
.xmm16u(5) = op
.xmm16u(4 + ((order
>> 2) & 0x3));
1943 result
.xmm16u(6) = op
.xmm16u(4 + ((order
>> 4) & 0x3));
1944 result
.xmm16u(7) = op
.xmm16u(4 + ((order
>> 6) & 0x3));
1946 /* now write result back to destination */
1947 BX_WRITE_XMM_REG(i
->nnn(), result
);
1949 BX_INFO(("PSHUFHW_VqWqIb: required SSE2, use --enable-sse option"));
1955 void BX_CPU_C::PSHUFLW_VqWqIb(bxInstruction_c
*i
)
1957 #if BX_SUPPORT_SSE >= 2
1958 BX_CPU_THIS_PTR
prepareSSE();
1960 BxPackedXmmRegister op
, result
;
1961 Bit8u order
= i
->Ib();
1963 /* op is a register or memory reference */
1965 op
= BX_READ_XMM_REG(i
->rm());
1968 /* pointer, segment address pair */
1969 read_virtual_dqword(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
1972 result
.xmm16u(0) = op
.xmm16u((order
>> 0) & 0x3);
1973 result
.xmm16u(1) = op
.xmm16u((order
>> 2) & 0x3);
1974 result
.xmm16u(2) = op
.xmm16u((order
>> 4) & 0x3);
1975 result
.xmm16u(3) = op
.xmm16u((order
>> 6) & 0x3);
1976 result
.xmm64u(1) = op
.xmm64u(1);
1978 /* now write result back to destination */
1979 BX_WRITE_XMM_REG(i
->nnn(), result
);
1981 BX_INFO(("PSHUFLW_VqWqIb: required SSE, use --enable-sse option"));
1987 void BX_CPU_C::PCMPEQB_VdqWdq(bxInstruction_c
*i
)
1989 #if BX_SUPPORT_SSE >= 2
1990 BX_CPU_THIS_PTR
prepareSSE();
1992 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1994 /* op2 is a register or memory reference */
1996 op2
= BX_READ_XMM_REG(i
->rm());
1999 /* pointer, segment address pair */
2000 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2003 for(unsigned j
=0; j
<16; j
++) {
2004 op1
.xmmubyte(j
) = (op1
.xmmubyte(j
) == op2
.xmmubyte(j
)) ? 0xff : 0;
2007 /* now write result back to destination */
2008 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2010 BX_INFO(("PCMPEQB_VdqWdq: required SSE2, use --enable-sse option"));
2016 void BX_CPU_C::PCMPEQW_VdqWdq(bxInstruction_c
*i
)
2018 #if BX_SUPPORT_SSE >= 2
2019 BX_CPU_THIS_PTR
prepareSSE();
2021 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2023 /* op2 is a register or memory reference */
2025 op2
= BX_READ_XMM_REG(i
->rm());
2028 /* pointer, segment address pair */
2029 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2032 op1
.xmm16u(0) = (op1
.xmm16u(0) == op2
.xmm16u(0)) ? 0xffff : 0;
2033 op1
.xmm16u(1) = (op1
.xmm16u(1) == op2
.xmm16u(1)) ? 0xffff : 0;
2034 op1
.xmm16u(2) = (op1
.xmm16u(2) == op2
.xmm16u(2)) ? 0xffff : 0;
2035 op1
.xmm16u(3) = (op1
.xmm16u(3) == op2
.xmm16u(3)) ? 0xffff : 0;
2036 op1
.xmm16u(4) = (op1
.xmm16u(4) == op2
.xmm16u(4)) ? 0xffff : 0;
2037 op1
.xmm16u(5) = (op1
.xmm16u(5) == op2
.xmm16u(5)) ? 0xffff : 0;
2038 op1
.xmm16u(6) = (op1
.xmm16u(6) == op2
.xmm16u(6)) ? 0xffff : 0;
2039 op1
.xmm16u(7) = (op1
.xmm16u(7) == op2
.xmm16u(7)) ? 0xffff : 0;
2041 /* now write result back to destination */
2042 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2044 BX_INFO(("PCMPEQW_VdqWdq: required SSE2, use --enable-sse option"));
2050 void BX_CPU_C::PCMPEQD_VdqWdq(bxInstruction_c
*i
)
2052 #if BX_SUPPORT_SSE >= 2
2053 BX_CPU_THIS_PTR
prepareSSE();
2055 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2057 /* op2 is a register or memory reference */
2059 op2
= BX_READ_XMM_REG(i
->rm());
2062 /* pointer, segment address pair */
2063 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2066 op1
.xmm32u(0) = (op1
.xmm32u(0) == op2
.xmm32u(0)) ? 0xffffffff : 0;
2067 op1
.xmm32u(1) = (op1
.xmm32u(1) == op2
.xmm32u(1)) ? 0xffffffff : 0;
2068 op1
.xmm32u(2) = (op1
.xmm32u(2) == op2
.xmm32u(2)) ? 0xffffffff : 0;
2069 op1
.xmm32u(3) = (op1
.xmm32u(3) == op2
.xmm32u(3)) ? 0xffffffff : 0;
2071 /* now write result back to destination */
2072 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2074 BX_INFO(("PCMPEQD_VdqWdq: required SSE2, use --enable-sse option"));
2080 void BX_CPU_C::PINSRW_VdqEwIb(bxInstruction_c
*i
)
2082 #if BX_SUPPORT_SSE >= 2
2083 BX_CPU_THIS_PTR
prepareSSE();
2085 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn());
2087 Bit8u count
= i
->Ib() & 0x7;
2089 /* op2 is a register or memory reference */
2091 op2
= BX_READ_16BIT_REG(i
->rm());
2094 /* pointer, segment address pair */
2095 read_virtual_word(i
->seg(), RMAddr(i
), &op2
);
2098 op1
.xmm16u(count
) = op2
;
2100 /* now write result back to destination */
2101 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2103 BX_INFO(("PINSRW_VdqEdIb: required SSE2, use --enable-sse option"));
2109 void BX_CPU_C::PEXTRW_GdUdqIb(bxInstruction_c
*i
)
2111 #if BX_SUPPORT_SSE >= 2
2112 BX_CPU_THIS_PTR
prepareSSE();
2114 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
2115 Bit8u count
= i
->Ib() & 0x7;
2116 Bit32u result
= (Bit32u
) op
.xmm16u(count
);
2118 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
2120 BX_INFO(("PEXTRW_GdUdqIb: required SSE2, use --enable-sse option"));
2126 void BX_CPU_C::SHUFPS_VpsWpsIb(bxInstruction_c
*i
)
2128 #if BX_SUPPORT_SSE >= 1
2129 BX_CPU_THIS_PTR
prepareSSE();
2131 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2132 Bit8u order
= i
->Ib();
2134 /* op2 is a register or memory reference */
2136 op2
= BX_READ_XMM_REG(i
->rm());
2139 /* pointer, segment address pair */
2140 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2143 result
.xmm32u(0) = op1
.xmm32u((order
>> 0) & 0x3);
2144 result
.xmm32u(1) = op1
.xmm32u((order
>> 2) & 0x3);
2145 result
.xmm32u(2) = op2
.xmm32u((order
>> 4) & 0x3);
2146 result
.xmm32u(3) = op2
.xmm32u((order
>> 6) & 0x3);
2148 /* now write result back to destination */
2149 BX_WRITE_XMM_REG(i
->nnn(), result
);
2151 BX_INFO(("SHUFPS_VpsWpsIb: required SSE, use --enable-sse option"));
2157 void BX_CPU_C::SHUFPD_VpdWpdIb(bxInstruction_c
*i
)
2159 #if BX_SUPPORT_SSE >= 2
2160 BX_CPU_THIS_PTR
prepareSSE();
2162 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2163 Bit8u order
= i
->Ib();
2165 /* op2 is a register or memory reference */
2167 op2
= BX_READ_XMM_REG(i
->rm());
2170 /* pointer, segment address pair */
2171 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2174 result
.xmm64u(0) = op1
.xmm64u((order
>> 0) & 0x1);
2175 result
.xmm64u(1) = op2
.xmm64u((order
>> 1) & 0x1);
2177 /* now write result back to destination */
2178 BX_WRITE_XMM_REG(i
->nnn(), result
);
2180 BX_INFO(("SHUFPD_VpdWpdIb: required SSE2, use --enable-sse option"));
2186 void BX_CPU_C::PSRLW_VdqWdq(bxInstruction_c
*i
)
2188 #if BX_SUPPORT_SSE >= 2
2189 BX_CPU_THIS_PTR
prepareSSE();
2191 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2193 /* op2 is a register or memory reference */
2195 op2
= BX_READ_XMM_REG(i
->rm());
2198 /* pointer, segment address pair */
2199 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2202 if(op2
.xmm64u(0) > 15) /* looking only to low 64 bits */
2209 Bit8u shift
= op2
.xmmubyte(0);
2211 op1
.xmm16u(0) >>= shift
;
2212 op1
.xmm16u(1) >>= shift
;
2213 op1
.xmm16u(2) >>= shift
;
2214 op1
.xmm16u(3) >>= shift
;
2215 op1
.xmm16u(4) >>= shift
;
2216 op1
.xmm16u(5) >>= shift
;
2217 op1
.xmm16u(6) >>= shift
;
2218 op1
.xmm16u(7) >>= shift
;
2221 /* now write result back to destination */
2222 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2224 BX_INFO(("PSRLW_VdqWdq: required SSE2, use --enable-sse option"));
2230 void BX_CPU_C::PSRLD_VdqWdq(bxInstruction_c
*i
)
2232 #if BX_SUPPORT_SSE >= 2
2233 BX_CPU_THIS_PTR
prepareSSE();
2235 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2237 /* op2 is a register or memory reference */
2239 op2
= BX_READ_XMM_REG(i
->rm());
2242 /* pointer, segment address pair */
2243 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2246 if(op2
.xmm64u(0) > 31) /* looking only to low 64 bits */
2253 Bit8u shift
= op2
.xmmubyte(0);
2255 op1
.xmm32u(0) >>= shift
;
2256 op1
.xmm32u(1) >>= shift
;
2257 op1
.xmm32u(2) >>= shift
;
2258 op1
.xmm32u(3) >>= shift
;
2261 /* now write result back to destination */
2262 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2264 BX_INFO(("PSRLD_VdqWdq: required SSE2, use --enable-sse option"));
2270 void BX_CPU_C::PSRLQ_VdqWdq(bxInstruction_c
*i
)
2272 #if BX_SUPPORT_SSE >= 2
2273 BX_CPU_THIS_PTR
prepareSSE();
2275 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2277 /* op2 is a register or memory reference */
2279 op2
= BX_READ_XMM_REG(i
->rm());
2282 /* pointer, segment address pair */
2283 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2286 if(op2
.xmm64u(0) > 63) /* looking only to low 64 bits */
2293 Bit8u shift
= op2
.xmmubyte(0);
2295 op1
.xmm64u(0) >>= shift
;
2296 op1
.xmm64u(1) >>= shift
;
2299 /* now write result back to destination */
2300 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2302 BX_INFO(("PSRLQ_VdqWdq: required SSE2, use --enable-sse option"));
2308 void BX_CPU_C::PADDQ_VdqWdq(bxInstruction_c
*i
)
2310 #if BX_SUPPORT_SSE >= 2
2311 BX_CPU_THIS_PTR
prepareSSE();
2313 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2315 /* op2 is a register or memory reference */
2317 op2
= BX_READ_XMM_REG(i
->rm());
2320 /* pointer, segment address pair */
2321 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2324 op1
.xmm64u(0) += op2
.xmm64u(0);
2325 op1
.xmm64u(1) += op2
.xmm64u(1);
2327 /* now write result back to destination */
2328 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2330 BX_INFO(("PADDQ_VdqWdq: required SSE2, use --enable-sse option"));
2336 void BX_CPU_C::PMULLW_VdqWdq(bxInstruction_c
*i
)
2338 #if BX_SUPPORT_SSE >= 2
2339 BX_CPU_THIS_PTR
prepareSSE();
2341 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2343 /* op2 is a register or memory reference */
2345 op2
= BX_READ_XMM_REG(i
->rm());
2348 /* pointer, segment address pair */
2349 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2352 Bit32u product1
= Bit32u(op1
.xmm16u(0)) * Bit32u(op2
.xmm16u(0));
2353 Bit32u product2
= Bit32u(op1
.xmm16u(1)) * Bit32u(op2
.xmm16u(1));
2354 Bit32u product3
= Bit32u(op1
.xmm16u(2)) * Bit32u(op2
.xmm16u(2));
2355 Bit32u product4
= Bit32u(op1
.xmm16u(3)) * Bit32u(op2
.xmm16u(3));
2356 Bit32u product5
= Bit32u(op1
.xmm16u(4)) * Bit32u(op2
.xmm16u(4));
2357 Bit32u product6
= Bit32u(op1
.xmm16u(5)) * Bit32u(op2
.xmm16u(5));
2358 Bit32u product7
= Bit32u(op1
.xmm16u(6)) * Bit32u(op2
.xmm16u(6));
2359 Bit32u product8
= Bit32u(op1
.xmm16u(7)) * Bit32u(op2
.xmm16u(7));
2361 result
.xmm16u(0) = product1
& 0xffff;
2362 result
.xmm16u(1) = product2
& 0xffff;
2363 result
.xmm16u(2) = product3
& 0xffff;
2364 result
.xmm16u(3) = product4
& 0xffff;
2365 result
.xmm16u(4) = product5
& 0xffff;
2366 result
.xmm16u(5) = product6
& 0xffff;
2367 result
.xmm16u(6) = product7
& 0xffff;
2368 result
.xmm16u(7) = product8
& 0xffff;
2370 /* now write result back to destination */
2371 BX_WRITE_XMM_REG(i
->nnn(), result
);
2373 BX_INFO(("PMULLW_VdqWdq: required SSE2, use --enable-sse option"));
2379 void BX_CPU_C::PSUBUSB_VdqWdq(bxInstruction_c
*i
)
2381 #if BX_SUPPORT_SSE >= 2
2382 BX_CPU_THIS_PTR
prepareSSE();
2384 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2386 /* op2 is a register or memory reference */
2388 op2
= BX_READ_XMM_REG(i
->rm());
2391 /* pointer, segment address pair */
2392 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2395 result
.xmm64u(0) = result
.xmm64u(1) = 0;
2397 for(unsigned j
=0; j
<16; j
++)
2399 if(op1
.xmmubyte(j
) > op2
.xmmubyte(j
))
2401 result
.xmmubyte(j
) = op1
.xmmubyte(j
) - op2
.xmmubyte(j
);
2405 /* now write result back to destination */
2406 BX_WRITE_XMM_REG(i
->nnn(), result
);
2408 BX_INFO(("PSUBUSB_VdqWdq: required SSE2, use --enable-sse option"));
2414 void BX_CPU_C::PSUBUSW_VdqWdq(bxInstruction_c
*i
)
2416 #if BX_SUPPORT_SSE >= 2
2417 BX_CPU_THIS_PTR
prepareSSE();
2419 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2421 /* op2 is a register or memory reference */
2423 op2
= BX_READ_XMM_REG(i
->rm());
2426 /* pointer, segment address pair */
2427 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2430 result
.xmm64u(0) = result
.xmm64u(1) = 0;
2432 for(unsigned j
=0; j
<8; j
++)
2434 if(op1
.xmm16u(j
) > op2
.xmm16u(j
))
2436 result
.xmm16u(j
) = op1
.xmm16u(j
) - op2
.xmm16u(j
);
2440 /* now write result back to destination */
2441 BX_WRITE_XMM_REG(i
->nnn(), result
);
2443 BX_INFO(("PSUBUSW_VdqWdq: required SSE2, use --enable-sse option"));
2449 void BX_CPU_C::PMINUB_VdqWdq(bxInstruction_c
*i
)
2451 #if BX_SUPPORT_SSE >= 2
2452 BX_CPU_THIS_PTR
prepareSSE();
2454 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2456 /* op2 is a register or memory reference */
2458 op2
= BX_READ_XMM_REG(i
->rm());
2461 /* pointer, segment address pair */
2462 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2465 for(unsigned j
=0; j
<16; j
++) {
2466 if(op2
.xmmubyte(j
) < op1
.xmmubyte(j
)) op1
.xmmubyte(j
) = op2
.xmmubyte(j
);
2469 /* now write result back to destination */
2470 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2472 BX_INFO(("PMINUB_VdqWdq: required SSE2, use --enable-sse option"));
2478 /* ANDPD: 66 0F 54 */
2479 /* PAND: 66 0F DB */
2480 void BX_CPU_C::ANDPS_VpsWps(bxInstruction_c
*i
)
2482 #if BX_SUPPORT_SSE >= 1
2483 BX_CPU_THIS_PTR
prepareSSE();
2485 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2487 /* op2 is a register or memory reference */
2489 op2
= BX_READ_XMM_REG(i
->rm());
2492 /* pointer, segment address pair */
2493 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2496 op1
.xmm64u(0) &= op2
.xmm64u(0);
2497 op1
.xmm64u(1) &= op2
.xmm64u(1);
2499 /* now write result back to destination */
2500 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2502 BX_INFO(("ANDPS_VpsWps: required SSE, use --enable-sse option"));
2508 void BX_CPU_C::PADDUSB_VdqWdq(bxInstruction_c
*i
)
2510 #if BX_SUPPORT_SSE >= 2
2511 BX_CPU_THIS_PTR
prepareSSE();
2513 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2515 /* op2 is a register or memory reference */
2517 op2
= BX_READ_XMM_REG(i
->rm());
2520 /* pointer, segment address pair */
2521 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2524 for(unsigned j
=0; j
<16; j
++) {
2525 result
.xmmubyte(j
) = SaturateWordSToByteU(Bit16s(op1
.xmmubyte(j
)) + Bit16s(op2
.xmmubyte(j
)));
2528 /* now write result back to destination */
2529 BX_WRITE_XMM_REG(i
->nnn(), result
);
2531 BX_INFO(("PADDUSB_VdqWdq: required SSE2, use --enable-sse option"));
2537 void BX_CPU_C::PADDUSW_VdqWdq(bxInstruction_c
*i
)
2539 #if BX_SUPPORT_SSE >= 2
2540 BX_CPU_THIS_PTR
prepareSSE();
2542 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2544 /* op2 is a register or memory reference */
2546 op2
= BX_READ_XMM_REG(i
->rm());
2549 /* pointer, segment address pair */
2550 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2553 result
.xmm16u(0) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(0)) + Bit32s(op2
.xmm16u(0)));
2554 result
.xmm16u(1) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(1)) + Bit32s(op2
.xmm16u(1)));
2555 result
.xmm16u(2) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(2)) + Bit32s(op2
.xmm16u(2)));
2556 result
.xmm16u(3) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(3)) + Bit32s(op2
.xmm16u(3)));
2557 result
.xmm16u(4) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(4)) + Bit32s(op2
.xmm16u(4)));
2558 result
.xmm16u(5) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(5)) + Bit32s(op2
.xmm16u(5)));
2559 result
.xmm16u(6) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(6)) + Bit32s(op2
.xmm16u(6)));
2560 result
.xmm16u(7) = SaturateDwordSToWordU(Bit32s(op1
.xmm16u(7)) + Bit32s(op2
.xmm16u(7)));
2562 /* now write result back to destination */
2563 BX_WRITE_XMM_REG(i
->nnn(), result
);
2565 BX_INFO(("PADDUSW_VdqWdq: required SSE2, use --enable-sse option"));
2571 void BX_CPU_C::PMAXUB_VdqWdq(bxInstruction_c
*i
)
2573 #if BX_SUPPORT_SSE >= 2
2574 BX_CPU_THIS_PTR
prepareSSE();
2576 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2578 /* op2 is a register or memory reference */
2580 op2
= BX_READ_XMM_REG(i
->rm());
2583 /* pointer, segment address pair */
2584 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2587 for(unsigned j
=0; j
<16; j
++) {
2588 if(op2
.xmmubyte(j
) > op1
.xmmubyte(j
)) op1
.xmmubyte(j
) = op2
.xmmubyte(j
);
2591 /* now write result back to destination */
2592 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2594 BX_INFO(("PMAXUB_VdqWdq: required SSE2, use --enable-sse option"));
2600 /* ANDNPD: 66 0F 55 */
2601 /* PANDN: 66 0F DF */
2602 void BX_CPU_C::ANDNPS_VpsWps(bxInstruction_c
*i
)
2604 #if BX_SUPPORT_SSE >= 1
2605 BX_CPU_THIS_PTR
prepareSSE();
2607 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2609 /* op2 is a register or memory reference */
2611 op2
= BX_READ_XMM_REG(i
->rm());
2614 /* pointer, segment address pair */
2615 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2618 op1
.xmm64u(0) = ~(op1
.xmm64u(0)) & op2
.xmm64u(0);
2619 op1
.xmm64u(1) = ~(op1
.xmm64u(1)) & op2
.xmm64u(1);
2621 /* now write result back to destination */
2622 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2624 BX_INFO(("ANDNPS_VpsWps: required SSE, use --enable-sse option"));
2630 void BX_CPU_C::PAVGB_VdqWdq(bxInstruction_c
*i
)
2633 BX_CPU_THIS_PTR
prepareSSE();
2635 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2637 /* op2 is a register or memory reference */
2639 op2
= BX_READ_XMM_REG(i
->rm());
2642 /* pointer, segment address pair */
2643 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2646 for(unsigned j
=0; j
<16; j
++) {
2647 op1
.xmmubyte(j
) = (op1
.xmmubyte(j
) + op2
.xmmubyte(j
) + 1) >> 1;
2650 /* now write result back to destination */
2651 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2653 BX_INFO(("PAVGB_VdqWdq: required SSE, use --enable-sse option"));
2659 void BX_CPU_C::PSRAW_VdqWdq(bxInstruction_c
*i
)
2661 #if BX_SUPPORT_SSE >= 2
2662 BX_CPU_THIS_PTR
prepareSSE();
2664 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2666 /* op2 is a register or memory reference */
2668 op2
= BX_READ_XMM_REG(i
->rm());
2671 /* pointer, segment address pair */
2672 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2675 if(op2
.xmm64u(0) == 0) return;
2677 if(op2
.xmm64u(0) > 15) /* looking only to low 64 bits */
2679 result
.xmm16u(0) = (op1
.xmm16u(0) & 0x8000) ? 0xffff : 0;
2680 result
.xmm16u(1) = (op1
.xmm16u(1) & 0x8000) ? 0xffff : 0;
2681 result
.xmm16u(2) = (op1
.xmm16u(2) & 0x8000) ? 0xffff : 0;
2682 result
.xmm16u(3) = (op1
.xmm16u(3) & 0x8000) ? 0xffff : 0;
2683 result
.xmm16u(4) = (op1
.xmm16u(4) & 0x8000) ? 0xffff : 0;
2684 result
.xmm16u(5) = (op1
.xmm16u(5) & 0x8000) ? 0xffff : 0;
2685 result
.xmm16u(6) = (op1
.xmm16u(6) & 0x8000) ? 0xffff : 0;
2686 result
.xmm16u(7) = (op1
.xmm16u(7) & 0x8000) ? 0xffff : 0;
2690 Bit8u shift
= op2
.xmmubyte(0);
2692 result
.xmm16u(0) = op1
.xmm16u(0) >> shift
;
2693 result
.xmm16u(1) = op1
.xmm16u(1) >> shift
;
2694 result
.xmm16u(2) = op1
.xmm16u(2) >> shift
;
2695 result
.xmm16u(3) = op1
.xmm16u(3) >> shift
;
2696 result
.xmm16u(4) = op1
.xmm16u(4) >> shift
;
2697 result
.xmm16u(5) = op1
.xmm16u(5) >> shift
;
2698 result
.xmm16u(6) = op1
.xmm16u(6) >> shift
;
2699 result
.xmm16u(7) = op1
.xmm16u(7) >> shift
;
2701 if(op1
.xmm16u(0) & 0x8000) result
.xmm16u(0) |= (0xffff << (16 - shift
));
2702 if(op1
.xmm16u(1) & 0x8000) result
.xmm16u(1) |= (0xffff << (16 - shift
));
2703 if(op1
.xmm16u(2) & 0x8000) result
.xmm16u(2) |= (0xffff << (16 - shift
));
2704 if(op1
.xmm16u(3) & 0x8000) result
.xmm16u(3) |= (0xffff << (16 - shift
));
2705 if(op1
.xmm16u(4) & 0x8000) result
.xmm16u(4) |= (0xffff << (16 - shift
));
2706 if(op1
.xmm16u(5) & 0x8000) result
.xmm16u(5) |= (0xffff << (16 - shift
));
2707 if(op1
.xmm16u(6) & 0x8000) result
.xmm16u(6) |= (0xffff << (16 - shift
));
2708 if(op1
.xmm16u(7) & 0x8000) result
.xmm16u(7) |= (0xffff << (16 - shift
));
2711 /* now write result back to destination */
2712 BX_WRITE_XMM_REG(i
->nnn(), result
);
2714 BX_INFO(("PSRAW_VdqWdq: required SSE2, use --enable-sse option"));
2720 void BX_CPU_C::PSRAD_VdqWdq(bxInstruction_c
*i
)
2722 #if BX_SUPPORT_SSE >= 2
2723 BX_CPU_THIS_PTR
prepareSSE();
2725 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2727 /* op2 is a register or memory reference */
2729 op2
= BX_READ_XMM_REG(i
->rm());
2732 /* pointer, segment address pair */
2733 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2736 if(op2
.xmm64u(0) == 0) return;
2738 if(op2
.xmm64u(0) > 31) /* looking only to low 64 bits */
2740 result
.xmm32u(0) = (op1
.xmm32u(0) & 0x80000000) ? 0xffffffff : 0;
2741 result
.xmm32u(1) = (op1
.xmm32u(1) & 0x80000000) ? 0xffffffff : 0;
2742 result
.xmm32u(2) = (op1
.xmm32u(2) & 0x80000000) ? 0xffffffff : 0;
2743 result
.xmm32u(3) = (op1
.xmm32u(3) & 0x80000000) ? 0xffffffff : 0;
2747 Bit8u shift
= op2
.xmmubyte(0);
2749 result
.xmm32u(0) = op1
.xmm32u(0) >> shift
;
2750 result
.xmm32u(1) = op1
.xmm32u(1) >> shift
;
2751 result
.xmm32u(2) = op1
.xmm32u(2) >> shift
;
2752 result
.xmm32u(3) = op1
.xmm32u(3) >> shift
;
2754 if(op1
.xmm32u(0) & 0x80000000) result
.xmm32u(0) |= (0xffffffff << (32-shift
));
2755 if(op1
.xmm32u(1) & 0x80000000) result
.xmm32u(1) |= (0xffffffff << (32-shift
));
2756 if(op1
.xmm32u(2) & 0x80000000) result
.xmm32u(2) |= (0xffffffff << (32-shift
));
2757 if(op1
.xmm32u(3) & 0x80000000) result
.xmm32u(3) |= (0xffffffff << (32-shift
));
2760 /* now write result back to destination */
2761 BX_WRITE_XMM_REG(i
->nnn(), result
);
2763 BX_INFO(("PSRAD_VdqWdq: required SSE2, use --enable-sse option"));
2769 void BX_CPU_C::PAVGW_VdqWdq(bxInstruction_c
*i
)
2772 BX_CPU_THIS_PTR
prepareSSE();
2774 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2776 /* op2 is a register or memory reference */
2778 op2
= BX_READ_XMM_REG(i
->rm());
2781 /* pointer, segment address pair */
2782 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2785 op1
.xmm16u(0) = (op1
.xmm16u(0) + op2
.xmm16u(0) + 1) >> 1;
2786 op1
.xmm16u(1) = (op1
.xmm16u(1) + op2
.xmm16u(1) + 1) >> 1;
2787 op1
.xmm16u(2) = (op1
.xmm16u(2) + op2
.xmm16u(2) + 1) >> 1;
2788 op1
.xmm16u(3) = (op1
.xmm16u(3) + op2
.xmm16u(3) + 1) >> 1;
2789 op1
.xmm16u(4) = (op1
.xmm16u(4) + op2
.xmm16u(4) + 1) >> 1;
2790 op1
.xmm16u(5) = (op1
.xmm16u(5) + op2
.xmm16u(5) + 1) >> 1;
2791 op1
.xmm16u(6) = (op1
.xmm16u(6) + op2
.xmm16u(6) + 1) >> 1;
2792 op1
.xmm16u(7) = (op1
.xmm16u(7) + op2
.xmm16u(7) + 1) >> 1;
2794 /* now write result back to destination */
2795 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2797 BX_INFO(("PAVGW_VdqWdq: required SSE, use --enable-sse option"));
2803 void BX_CPU_C::PMULHUW_VdqWdq(bxInstruction_c
*i
)
2805 #if BX_SUPPORT_SSE >= 2
2806 BX_CPU_THIS_PTR
prepareSSE();
2808 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2810 /* op2 is a register or memory reference */
2812 op2
= BX_READ_XMM_REG(i
->rm());
2815 /* pointer, segment address pair */
2816 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2819 Bit32u product1
= Bit32u(op1
.xmm16u(0)) * Bit32u(op2
.xmm16u(0));
2820 Bit32u product2
= Bit32u(op1
.xmm16u(1)) * Bit32u(op2
.xmm16u(1));
2821 Bit32u product3
= Bit32u(op1
.xmm16u(2)) * Bit32u(op2
.xmm16u(2));
2822 Bit32u product4
= Bit32u(op1
.xmm16u(3)) * Bit32u(op2
.xmm16u(3));
2823 Bit32u product5
= Bit32u(op1
.xmm16u(4)) * Bit32u(op2
.xmm16u(4));
2824 Bit32u product6
= Bit32u(op1
.xmm16u(5)) * Bit32u(op2
.xmm16u(5));
2825 Bit32u product7
= Bit32u(op1
.xmm16u(6)) * Bit32u(op2
.xmm16u(6));
2826 Bit32u product8
= Bit32u(op1
.xmm16u(7)) * Bit32u(op2
.xmm16u(7));
2828 result
.xmm16u(0) = (Bit16u
)(product1
>> 16);
2829 result
.xmm16u(1) = (Bit16u
)(product2
>> 16);
2830 result
.xmm16u(2) = (Bit16u
)(product3
>> 16);
2831 result
.xmm16u(3) = (Bit16u
)(product4
>> 16);
2832 result
.xmm16u(4) = (Bit16u
)(product5
>> 16);
2833 result
.xmm16u(5) = (Bit16u
)(product6
>> 16);
2834 result
.xmm16u(6) = (Bit16u
)(product7
>> 16);
2835 result
.xmm16u(7) = (Bit16u
)(product8
>> 16);
2837 /* now write result back to destination */
2838 BX_WRITE_XMM_REG(i
->nnn(), result
);
2840 BX_INFO(("PMULHUW_VdqWdq: required SSE2, use --enable-sse option"));
2846 void BX_CPU_C::PMULHW_VdqWdq(bxInstruction_c
*i
)
2848 #if BX_SUPPORT_SSE >= 2
2849 BX_CPU_THIS_PTR
prepareSSE();
2851 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2853 /* op2 is a register or memory reference */
2855 op2
= BX_READ_XMM_REG(i
->rm());
2858 /* pointer, segment address pair */
2859 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2862 Bit32s product1
= Bit32s(op1
.xmm16s(0)) * Bit32s(op2
.xmm16s(0));
2863 Bit32s product2
= Bit32s(op1
.xmm16s(1)) * Bit32s(op2
.xmm16s(1));
2864 Bit32s product3
= Bit32s(op1
.xmm16s(2)) * Bit32s(op2
.xmm16s(2));
2865 Bit32s product4
= Bit32s(op1
.xmm16s(3)) * Bit32s(op2
.xmm16s(3));
2866 Bit32s product5
= Bit32s(op1
.xmm16s(4)) * Bit32s(op2
.xmm16s(4));
2867 Bit32s product6
= Bit32s(op1
.xmm16s(5)) * Bit32s(op2
.xmm16s(5));
2868 Bit32s product7
= Bit32s(op1
.xmm16s(6)) * Bit32s(op2
.xmm16s(6));
2869 Bit32s product8
= Bit32s(op1
.xmm16s(7)) * Bit32s(op2
.xmm16s(7));
2871 result
.xmm16u(0) = (Bit16u
)(product1
>> 16);
2872 result
.xmm16u(1) = (Bit16u
)(product2
>> 16);
2873 result
.xmm16u(2) = (Bit16u
)(product3
>> 16);
2874 result
.xmm16u(3) = (Bit16u
)(product4
>> 16);
2875 result
.xmm16u(4) = (Bit16u
)(product5
>> 16);
2876 result
.xmm16u(5) = (Bit16u
)(product6
>> 16);
2877 result
.xmm16u(6) = (Bit16u
)(product7
>> 16);
2878 result
.xmm16u(7) = (Bit16u
)(product8
>> 16);
2880 /* now write result back to destination */
2881 BX_WRITE_XMM_REG(i
->nnn(), result
);
2883 BX_INFO(("PMULHW_VdqWdq: required SSE2, use --enable-sse option"));
2889 void BX_CPU_C::PSUBSB_VdqWdq(bxInstruction_c
*i
)
2891 #if BX_SUPPORT_SSE >= 2
2892 BX_CPU_THIS_PTR
prepareSSE();
2894 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2896 /* op2 is a register or memory reference */
2898 op2
= BX_READ_XMM_REG(i
->rm());
2901 /* pointer, segment address pair */
2902 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2905 for(unsigned j
=0; j
<16; j
++) {
2906 result
.xmmsbyte(j
) = SaturateWordSToByteS(Bit16s(op1
.xmmsbyte(j
)) - Bit16s(op2
.xmmsbyte(j
)));
2909 /* now write result back to destination */
2910 BX_WRITE_XMM_REG(i
->nnn(), result
);
2912 BX_INFO(("PSUBSB_VdqWdq: required SSE2, use --enable-sse option"));
2918 void BX_CPU_C::PSUBSW_VdqWdq(bxInstruction_c
*i
)
2920 #if BX_SUPPORT_SSE >= 2
2921 BX_CPU_THIS_PTR
prepareSSE();
2923 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
2925 /* op2 is a register or memory reference */
2927 op2
= BX_READ_XMM_REG(i
->rm());
2930 /* pointer, segment address pair */
2931 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2934 result
.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(0)) - Bit32s(op2
.xmm16s(0)));
2935 result
.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(1)) - Bit32s(op2
.xmm16s(1)));
2936 result
.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(2)) - Bit32s(op2
.xmm16s(2)));
2937 result
.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(3)) - Bit32s(op2
.xmm16s(3)));
2938 result
.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(4)) - Bit32s(op2
.xmm16s(4)));
2939 result
.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(5)) - Bit32s(op2
.xmm16s(5)));
2940 result
.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(6)) - Bit32s(op2
.xmm16s(6)));
2941 result
.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(7)) - Bit32s(op2
.xmm16s(7)));
2943 /* now write result back to destination */
2944 BX_WRITE_XMM_REG(i
->nnn(), result
);
2946 BX_INFO(("PSUBSW_VdqWdq: required SSE2, use --enable-sse option"));
2952 void BX_CPU_C::PMINSW_VdqWdq(bxInstruction_c
*i
)
2954 #if BX_SUPPORT_SSE >= 2
2955 BX_CPU_THIS_PTR
prepareSSE();
2957 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2959 /* op2 is a register or memory reference */
2961 op2
= BX_READ_XMM_REG(i
->rm());
2964 /* pointer, segment address pair */
2965 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
2968 if(op2
.xmm16s(0) < op1
.xmm16s(0)) op1
.xmm16s(0) = op2
.xmm16s(0);
2969 if(op2
.xmm16s(1) < op1
.xmm16s(1)) op1
.xmm16s(1) = op2
.xmm16s(1);
2970 if(op2
.xmm16s(2) < op1
.xmm16s(2)) op1
.xmm16s(2) = op2
.xmm16s(2);
2971 if(op2
.xmm16s(3) < op1
.xmm16s(3)) op1
.xmm16s(3) = op2
.xmm16s(3);
2972 if(op2
.xmm16s(4) < op1
.xmm16s(4)) op1
.xmm16s(4) = op2
.xmm16s(4);
2973 if(op2
.xmm16s(5) < op1
.xmm16s(5)) op1
.xmm16s(5) = op2
.xmm16s(5);
2974 if(op2
.xmm16s(6) < op1
.xmm16s(6)) op1
.xmm16s(6) = op2
.xmm16s(6);
2975 if(op2
.xmm16s(7) < op1
.xmm16s(7)) op1
.xmm16s(7) = op2
.xmm16s(7);
2977 /* now write result back to destination */
2978 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2980 BX_INFO(("PMINSW_VdqWdq: required SSE2, use --enable-sse option"));
2986 /* ORPD: 66 0F 56 */
2988 void BX_CPU_C::ORPS_VpsWps(bxInstruction_c
*i
)
2990 #if BX_SUPPORT_SSE >= 1
2991 BX_CPU_THIS_PTR
prepareSSE();
2993 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2995 /* op2 is a register or memory reference */
2997 op2
= BX_READ_XMM_REG(i
->rm());
3000 /* pointer, segment address pair */
3001 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3004 op1
.xmm64u(0) |= op2
.xmm64u(0);
3005 op1
.xmm64u(1) |= op2
.xmm64u(1);
3007 /* now write result back to destination */
3008 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3010 BX_INFO(("ORPS_VpsWps: required SSE, use --enable-sse option"));
3016 void BX_CPU_C::PADDSB_VdqWdq(bxInstruction_c
*i
)
3018 #if BX_SUPPORT_SSE >= 2
3019 BX_CPU_THIS_PTR
prepareSSE();
3021 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
3023 /* op2 is a register or memory reference */
3025 op2
= BX_READ_XMM_REG(i
->rm());
3028 /* pointer, segment address pair */
3029 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3032 for(unsigned j
=0; j
<16; j
++) {
3033 result
.xmmsbyte(j
) = SaturateWordSToByteS(Bit16s(op1
.xmmsbyte(j
)) + Bit16s(op2
.xmmsbyte(j
)));
3036 /* now write result back to destination */
3037 BX_WRITE_XMM_REG(i
->nnn(), result
);
3039 BX_INFO(("PADDSB_VdqWdq: required SSE2, use --enable-sse option"));
3045 void BX_CPU_C::PADDSW_VdqWdq(bxInstruction_c
*i
)
3047 #if BX_SUPPORT_SSE >= 2
3048 BX_CPU_THIS_PTR
prepareSSE();
3050 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
3052 /* op2 is a register or memory reference */
3054 op2
= BX_READ_XMM_REG(i
->rm());
3057 /* pointer, segment address pair */
3058 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3061 result
.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(0)) + Bit32s(op2
.xmm16s(0)));
3062 result
.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(1)) + Bit32s(op2
.xmm16s(1)));
3063 result
.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(2)) + Bit32s(op2
.xmm16s(2)));
3064 result
.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(3)) + Bit32s(op2
.xmm16s(3)));
3065 result
.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(4)) + Bit32s(op2
.xmm16s(4)));
3066 result
.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(5)) + Bit32s(op2
.xmm16s(5)));
3067 result
.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(6)) + Bit32s(op2
.xmm16s(6)));
3068 result
.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op1
.xmm16s(7)) + Bit32s(op2
.xmm16s(7)));
3070 /* now write result back to destination */
3071 BX_WRITE_XMM_REG(i
->nnn(), result
);
3073 BX_INFO(("PADDSW_VdqWdq: required SSE2, use --enable-sse option"));
3079 void BX_CPU_C::PMAXSW_VdqWdq(bxInstruction_c
*i
)
3081 #if BX_SUPPORT_SSE >= 2
3082 BX_CPU_THIS_PTR
prepareSSE();
3084 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3086 /* op2 is a register or memory reference */
3088 op2
= BX_READ_XMM_REG(i
->rm());
3091 /* pointer, segment address pair */
3092 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3095 if(op2
.xmm16s(0) > op1
.xmm16s(0)) op1
.xmm16s(0) = op2
.xmm16s(0);
3096 if(op2
.xmm16s(1) > op1
.xmm16s(1)) op1
.xmm16s(1) = op2
.xmm16s(1);
3097 if(op2
.xmm16s(2) > op1
.xmm16s(2)) op1
.xmm16s(2) = op2
.xmm16s(2);
3098 if(op2
.xmm16s(3) > op1
.xmm16s(3)) op1
.xmm16s(3) = op2
.xmm16s(3);
3099 if(op2
.xmm16s(4) > op1
.xmm16s(4)) op1
.xmm16s(4) = op2
.xmm16s(4);
3100 if(op2
.xmm16s(5) > op1
.xmm16s(5)) op1
.xmm16s(5) = op2
.xmm16s(5);
3101 if(op2
.xmm16s(6) > op1
.xmm16s(6)) op1
.xmm16s(6) = op2
.xmm16s(6);
3102 if(op2
.xmm16s(7) > op1
.xmm16s(7)) op1
.xmm16s(7) = op2
.xmm16s(7);
3104 /* now write result back to destination */
3105 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3107 BX_INFO(("PMAXSW_VdqWdq: required SSE2, use --enable-sse option"));
3113 /* XORPD: 66 0F 57 */
3114 /* PXOR: 66 0F EF */
3115 void BX_CPU_C::XORPS_VpsWps(bxInstruction_c
*i
)
3117 #if BX_SUPPORT_SSE >= 1
3118 BX_CPU_THIS_PTR
prepareSSE();
3120 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3122 /* op2 is a register or memory reference */
3124 op2
= BX_READ_XMM_REG(i
->rm());
3127 /* pointer, segment address pair */
3128 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3131 op1
.xmm64u(0) ^= op2
.xmm64u(0);
3132 op1
.xmm64u(1) ^= op2
.xmm64u(1);
3134 /* now write result back to destination */
3135 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3137 BX_INFO(("XORPS_VpsWps: required SSE, use --enable-sse option"));
3143 void BX_CPU_C::PSLLW_VdqWdq(bxInstruction_c
*i
)
3145 #if BX_SUPPORT_SSE >= 2
3146 BX_CPU_THIS_PTR
prepareSSE();
3148 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3150 /* op2 is a register or memory reference */
3152 op2
= BX_READ_XMM_REG(i
->rm());
3155 /* pointer, segment address pair */
3156 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3159 if(op2
.xmm64u(0) > 15) /* looking only to low 64 bits */
3166 Bit8u shift
= op2
.xmmubyte(0);
3168 op1
.xmm16u(0) <<= shift
;
3169 op1
.xmm16u(1) <<= shift
;
3170 op1
.xmm16u(2) <<= shift
;
3171 op1
.xmm16u(3) <<= shift
;
3172 op1
.xmm16u(4) <<= shift
;
3173 op1
.xmm16u(5) <<= shift
;
3174 op1
.xmm16u(6) <<= shift
;
3175 op1
.xmm16u(7) <<= shift
;
3178 /* now write result back to destination */
3179 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3181 BX_INFO(("PSLLW_VdqWdq: required SSE2, use --enable-sse option"));
3187 void BX_CPU_C::PSLLD_VdqWdq(bxInstruction_c
*i
)
3189 #if BX_SUPPORT_SSE >= 2
3190 BX_CPU_THIS_PTR
prepareSSE();
3192 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3194 /* op2 is a register or memory reference */
3196 op2
= BX_READ_XMM_REG(i
->rm());
3199 /* pointer, segment address pair */
3200 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3203 if(op2
.xmm64u(0) > 31) /* looking only to low 64 bits */
3210 Bit8u shift
= op2
.xmmubyte(0);
3212 op1
.xmm32u(0) <<= shift
;
3213 op1
.xmm32u(1) <<= shift
;
3214 op1
.xmm32u(2) <<= shift
;
3215 op1
.xmm32u(3) <<= shift
;
3218 /* now write result back to destination */
3219 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3221 BX_INFO(("PSLLD_VdqWdq: required SSE2, use --enable-sse option"));
3227 void BX_CPU_C::PSLLQ_VdqWdq(bxInstruction_c
*i
)
3229 #if BX_SUPPORT_SSE >= 2
3230 BX_CPU_THIS_PTR
prepareSSE();
3232 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3234 /* op2 is a register or memory reference */
3236 op2
= BX_READ_XMM_REG(i
->rm());
3239 /* pointer, segment address pair */
3240 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3243 if(op2
.xmm64u(0) > 63) /* looking only to low 64 bits */
3250 Bit8u shift
= op2
.xmmubyte(0);
3252 op1
.xmm64u(0) <<= shift
;
3253 op1
.xmm64u(1) <<= shift
;
3256 /* now write result back to destination */
3257 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3259 BX_INFO(("PSLLQ_VdqWdq: required SSE2, use --enable-sse option"));
3265 void BX_CPU_C::PMULUDQ_VdqWdq(bxInstruction_c
*i
)
3267 #if BX_SUPPORT_SSE >= 2
3268 BX_CPU_THIS_PTR
prepareSSE();
3270 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
3272 /* op2 is a register or memory reference */
3274 op2
= BX_READ_XMM_REG(i
->rm());
3277 /* pointer, segment address pair */
3278 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3281 result
.xmm64u(0) = Bit64u(op1
.xmm32u(0)) * Bit64u(op2
.xmm32u(0));
3282 result
.xmm64u(1) = Bit64u(op1
.xmm32u(2)) * Bit64u(op2
.xmm32u(2));
3284 /* now write result back to destination */
3285 BX_WRITE_XMM_REG(i
->nnn(), result
);
3287 BX_INFO(("PMULUDQ_VdqWdq: required SSE2, use --enable-sse option"));
3293 void BX_CPU_C::PMADDWD_VdqWdq(bxInstruction_c
*i
)
3295 #if BX_SUPPORT_SSE >= 2
3296 BX_CPU_THIS_PTR
prepareSSE();
3298 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
3300 /* op2 is a register or memory reference */
3302 op2
= BX_READ_XMM_REG(i
->rm());
3305 /* pointer, segment address pair */
3306 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3309 for(unsigned j
=0; j
<4; j
++)
3311 if(op1
.xmm32u(j
) == 0x80008000 && op2
.xmm32u(j
) == 0x80008000) {
3312 result
.xmm32u(j
) = 0x80000000;
3316 Bit32s(op1
.xmm16s(2*j
+0)) * Bit32s(op2
.xmm16s(2*j
+0)) +
3317 Bit32s(op1
.xmm16s(2*j
+1)) * Bit32s(op2
.xmm16s(2*j
+1));
3321 /* now write result back to destination */
3322 BX_WRITE_XMM_REG(i
->nnn(), result
);
3324 BX_INFO(("PMADDWD_VdqWdq: required SSE2, use --enable-sse option"));
3330 void BX_CPU_C::PSADBW_VdqWdq(bxInstruction_c
*i
)
3332 #if BX_SUPPORT_SSE >= 2
3333 BX_CPU_THIS_PTR
prepareSSE();
3335 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3336 Bit16u temp1
= 0, temp2
= 0;
3338 /* op2 is a register or memory reference */
3340 op2
= BX_READ_XMM_REG(i
->rm());
3343 /* pointer, segment address pair */
3344 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3347 temp1
+= abs(op1
.xmmubyte(0x0) - op2
.xmmubyte(0x0));
3348 temp1
+= abs(op1
.xmmubyte(0x1) - op2
.xmmubyte(0x1));
3349 temp1
+= abs(op1
.xmmubyte(0x2) - op2
.xmmubyte(0x2));
3350 temp1
+= abs(op1
.xmmubyte(0x3) - op2
.xmmubyte(0x3));
3351 temp1
+= abs(op1
.xmmubyte(0x4) - op2
.xmmubyte(0x4));
3352 temp1
+= abs(op1
.xmmubyte(0x5) - op2
.xmmubyte(0x5));
3353 temp1
+= abs(op1
.xmmubyte(0x6) - op2
.xmmubyte(0x6));
3354 temp1
+= abs(op1
.xmmubyte(0x7) - op2
.xmmubyte(0x7));
3356 temp2
+= abs(op1
.xmmubyte(0x8) - op2
.xmmubyte(0x8));
3357 temp2
+= abs(op1
.xmmubyte(0x9) - op2
.xmmubyte(0x9));
3358 temp2
+= abs(op1
.xmmubyte(0xA) - op2
.xmmubyte(0xA));
3359 temp2
+= abs(op1
.xmmubyte(0xB) - op2
.xmmubyte(0xB));
3360 temp2
+= abs(op1
.xmmubyte(0xC) - op2
.xmmubyte(0xC));
3361 temp2
+= abs(op1
.xmmubyte(0xD) - op2
.xmmubyte(0xD));
3362 temp2
+= abs(op1
.xmmubyte(0xE) - op2
.xmmubyte(0xE));
3363 temp2
+= abs(op1
.xmmubyte(0xF) - op2
.xmmubyte(0xF));
3365 op1
.xmm64u(0) = Bit64u(temp1
);
3366 op1
.xmm64u(1) = Bit64u(temp2
);
3368 /* now write result back to destination */
3369 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3371 BX_INFO(("PSADBW_VdqWdq: required SSE2, use --enable-sse option"));
3377 void BX_CPU_C::PSUBB_VdqWdq(bxInstruction_c
*i
)
3379 #if BX_SUPPORT_SSE >= 2
3380 BX_CPU_THIS_PTR
prepareSSE();
3382 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3384 /* op2 is a register or memory reference */
3386 op2
= BX_READ_XMM_REG(i
->rm());
3389 /* pointer, segment address pair */
3390 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3393 for(unsigned j
=0; j
<16; j
++) {
3394 op1
.xmmubyte(j
) -= op2
.xmmubyte(j
);
3397 /* now write result back to destination */
3398 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3400 BX_INFO(("PSUBB_VdqWdq: required SSE2, use --enable-sse option"));
3406 void BX_CPU_C::PSUBW_VdqWdq(bxInstruction_c
*i
)
3408 #if BX_SUPPORT_SSE >= 2
3409 BX_CPU_THIS_PTR
prepareSSE();
3411 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3413 /* op2 is a register or memory reference */
3415 op2
= BX_READ_XMM_REG(i
->rm());
3418 /* pointer, segment address pair */
3419 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3422 op1
.xmm16u(0) -= op2
.xmm16u(0);
3423 op1
.xmm16u(1) -= op2
.xmm16u(1);
3424 op1
.xmm16u(2) -= op2
.xmm16u(2);
3425 op1
.xmm16u(3) -= op2
.xmm16u(3);
3426 op1
.xmm16u(4) -= op2
.xmm16u(4);
3427 op1
.xmm16u(5) -= op2
.xmm16u(5);
3428 op1
.xmm16u(6) -= op2
.xmm16u(6);
3429 op1
.xmm16u(7) -= op2
.xmm16u(7);
3431 /* now write result back to destination */
3432 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3434 BX_INFO(("PSUBW_VdqWdq: required SSE2, use --enable-sse option"));
3440 void BX_CPU_C::PSUBD_VdqWdq(bxInstruction_c
*i
)
3442 #if BX_SUPPORT_SSE >= 2
3443 BX_CPU_THIS_PTR
prepareSSE();
3445 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3447 /* op2 is a register or memory reference */
3449 op2
= BX_READ_XMM_REG(i
->rm());
3452 /* pointer, segment address pair */
3453 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3456 op1
.xmm32u(0) -= op2
.xmm32u(0);
3457 op1
.xmm32u(1) -= op2
.xmm32u(1);
3458 op1
.xmm32u(2) -= op2
.xmm32u(2);
3459 op1
.xmm32u(3) -= op2
.xmm32u(3);
3461 /* now write result back to destination */
3462 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3464 BX_INFO(("PSUBD_VdqWdq: required SSE2, use --enable-sse option"));
3470 void BX_CPU_C::PSUBQ_VdqWdq(bxInstruction_c
*i
)
3472 #if BX_SUPPORT_SSE >= 2
3473 BX_CPU_THIS_PTR
prepareSSE();
3475 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3477 /* op2 is a register or memory reference */
3479 op2
= BX_READ_XMM_REG(i
->rm());
3482 /* pointer, segment address pair */
3483 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3486 op1
.xmm64u(0) -= op2
.xmm64u(0);
3487 op1
.xmm64u(1) -= op2
.xmm64u(1);
3489 /* now write result back to destination */
3490 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3492 BX_INFO(("PSUBQ_VdqWdq: required SSE2, use --enable-sse option"));
3498 void BX_CPU_C::PADDB_VdqWdq(bxInstruction_c
*i
)
3500 #if BX_SUPPORT_SSE >= 2
3501 BX_CPU_THIS_PTR
prepareSSE();
3503 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3505 /* op2 is a register or memory reference */
3507 op2
= BX_READ_XMM_REG(i
->rm());
3510 /* pointer, segment address pair */
3511 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3514 for(unsigned j
=0; j
<16; j
++) {
3515 op1
.xmmubyte(j
) += op2
.xmmubyte(j
);
3518 /* now write result back to destination */
3519 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3521 BX_INFO(("PADDB_VdqWdq: required SSE2, use --enable-sse option"));
3527 void BX_CPU_C::PADDW_VdqWdq(bxInstruction_c
*i
)
3529 #if BX_SUPPORT_SSE >= 2
3530 BX_CPU_THIS_PTR
prepareSSE();
3532 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3534 /* op2 is a register or memory reference */
3536 op2
= BX_READ_XMM_REG(i
->rm());
3539 /* pointer, segment address pair */
3540 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3543 op1
.xmm16u(0) += op2
.xmm16u(0);
3544 op1
.xmm16u(1) += op2
.xmm16u(1);
3545 op1
.xmm16u(2) += op2
.xmm16u(2);
3546 op1
.xmm16u(3) += op2
.xmm16u(3);
3547 op1
.xmm16u(4) += op2
.xmm16u(4);
3548 op1
.xmm16u(5) += op2
.xmm16u(5);
3549 op1
.xmm16u(6) += op2
.xmm16u(6);
3550 op1
.xmm16u(7) += op2
.xmm16u(7);
3552 /* now write result back to destination */
3553 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3555 BX_INFO(("PADDW_VdqWdq: required SSE2, use --enable-sse option"));
3561 void BX_CPU_C::PADDD_VdqWdq(bxInstruction_c
*i
)
3563 #if BX_SUPPORT_SSE >= 2
3564 BX_CPU_THIS_PTR
prepareSSE();
3566 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3568 /* op2 is a register or memory reference */
3570 op2
= BX_READ_XMM_REG(i
->rm());
3573 /* pointer, segment address pair */
3574 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
3577 op1
.xmm32u(0) += op2
.xmm32u(0);
3578 op1
.xmm32u(1) += op2
.xmm32u(1);
3579 op1
.xmm32u(2) += op2
.xmm32u(2);
3580 op1
.xmm32u(3) += op2
.xmm32u(3);
3582 /* now write result back to destination */
3583 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3585 BX_INFO(("PADDD_VdqWdq: required SSE2, use --enable-sse option"));
3590 /* 66 0F 71 Grp12 010 */
3591 void BX_CPU_C::PSRLW_UdqIb(bxInstruction_c
*i
)
3593 #if BX_SUPPORT_SSE >= 2
3594 BX_CPU_THIS_PTR
prepareSSE();
3596 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
3597 Bit8u shift
= i
->Ib();
3604 op
.xmm16u(0) >>= shift
;
3605 op
.xmm16u(1) >>= shift
;
3606 op
.xmm16u(2) >>= shift
;
3607 op
.xmm16u(3) >>= shift
;
3608 op
.xmm16u(4) >>= shift
;
3609 op
.xmm16u(5) >>= shift
;
3610 op
.xmm16u(6) >>= shift
;
3611 op
.xmm16u(7) >>= shift
;
3614 /* now write result back to destination */
3615 BX_WRITE_XMM_REG(i
->rm(), op
);
3617 BX_INFO(("PSRLW_UdqIb: required SSE2, use --enable-sse option"));
3622 /* 0F 71 Grp12 100 */
3623 void BX_CPU_C::PSRAW_UdqIb(bxInstruction_c
*i
)
3625 #if BX_SUPPORT_SSE >= 2
3626 BX_CPU_THIS_PTR
prepareSSE();
3628 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm()), result
;
3629 Bit8u shift
= i
->Ib();
3631 if(shift
== 0) return;
3634 result
.xmm16u(0) = (op
.xmm16u(0) & 0x8000) ? 0xffff : 0;
3635 result
.xmm16u(1) = (op
.xmm16u(1) & 0x8000) ? 0xffff : 0;
3636 result
.xmm16u(2) = (op
.xmm16u(2) & 0x8000) ? 0xffff : 0;
3637 result
.xmm16u(3) = (op
.xmm16u(3) & 0x8000) ? 0xffff : 0;
3638 result
.xmm16u(4) = (op
.xmm16u(4) & 0x8000) ? 0xffff : 0;
3639 result
.xmm16u(5) = (op
.xmm16u(5) & 0x8000) ? 0xffff : 0;
3640 result
.xmm16u(6) = (op
.xmm16u(6) & 0x8000) ? 0xffff : 0;
3641 result
.xmm16u(7) = (op
.xmm16u(7) & 0x8000) ? 0xffff : 0;
3644 result
.xmm16u(0) = op
.xmm16u(0) >> shift
;
3645 result
.xmm16u(1) = op
.xmm16u(1) >> shift
;
3646 result
.xmm16u(2) = op
.xmm16u(2) >> shift
;
3647 result
.xmm16u(3) = op
.xmm16u(3) >> shift
;
3648 result
.xmm16u(4) = op
.xmm16u(4) >> shift
;
3649 result
.xmm16u(5) = op
.xmm16u(5) >> shift
;
3650 result
.xmm16u(6) = op
.xmm16u(6) >> shift
;
3651 result
.xmm16u(7) = op
.xmm16u(7) >> shift
;
3653 if(op
.xmm16u(0) & 0x8000) result
.xmm16u(0) |= (0xffff << (16 - shift
));
3654 if(op
.xmm16u(1) & 0x8000) result
.xmm16u(1) |= (0xffff << (16 - shift
));
3655 if(op
.xmm16u(2) & 0x8000) result
.xmm16u(2) |= (0xffff << (16 - shift
));
3656 if(op
.xmm16u(3) & 0x8000) result
.xmm16u(3) |= (0xffff << (16 - shift
));
3657 if(op
.xmm16u(4) & 0x8000) result
.xmm16u(4) |= (0xffff << (16 - shift
));
3658 if(op
.xmm16u(5) & 0x8000) result
.xmm16u(5) |= (0xffff << (16 - shift
));
3659 if(op
.xmm16u(6) & 0x8000) result
.xmm16u(6) |= (0xffff << (16 - shift
));
3660 if(op
.xmm16u(7) & 0x8000) result
.xmm16u(7) |= (0xffff << (16 - shift
));
3663 /* now write result back to destination */
3664 BX_WRITE_XMM_REG(i
->rm(), result
);
3666 BX_INFO(("PSRAW_UdqIb: required SSE2, use --enable-sse option"));
3671 /* 66 0F 71 Grp12 110 */
3672 void BX_CPU_C::PSLLW_UdqIb(bxInstruction_c
*i
)
3674 #if BX_SUPPORT_SSE >= 2
3675 BX_CPU_THIS_PTR
prepareSSE();
3677 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
3678 Bit8u shift
= i
->Ib();
3685 op
.xmm16u(0) <<= shift
;
3686 op
.xmm16u(1) <<= shift
;
3687 op
.xmm16u(2) <<= shift
;
3688 op
.xmm16u(3) <<= shift
;
3689 op
.xmm16u(4) <<= shift
;
3690 op
.xmm16u(5) <<= shift
;
3691 op
.xmm16u(6) <<= shift
;
3692 op
.xmm16u(7) <<= shift
;
3695 /* now write result back to destination */
3696 BX_WRITE_XMM_REG(i
->rm(), op
);
3698 BX_INFO(("PSLLW_UdqIb: required SSE2, use --enable-sse option"));
3703 /* 66 0F 72 Grp13 010 */
3704 void BX_CPU_C::PSRLD_UdqIb(bxInstruction_c
*i
)
3706 #if BX_SUPPORT_SSE >= 2
3707 BX_CPU_THIS_PTR
prepareSSE();
3709 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
3710 Bit8u shift
= i
->Ib();
3717 op
.xmm32u(0) >>= shift
;
3718 op
.xmm32u(1) >>= shift
;
3719 op
.xmm32u(2) >>= shift
;
3720 op
.xmm32u(3) >>= shift
;
3723 /* now write result back to destination */
3724 BX_WRITE_XMM_REG(i
->rm(), op
);
3726 BX_INFO(("PSRLD_UdqIb: required SSE2, use --enable-sse option"));
3731 /* 0F 72 Grp13 100 */
3732 void BX_CPU_C::PSRAD_UdqIb(bxInstruction_c
*i
)
3734 #if BX_SUPPORT_SSE >= 2
3735 BX_CPU_THIS_PTR
prepareSSE();
3737 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm()), result
;
3738 Bit8u shift
= i
->Ib();
3740 if(shift
== 0) return;
3743 result
.xmm32u(0) = (op
.xmm32u(0) & 0x80000000) ? 0xffffffff : 0;
3744 result
.xmm32u(1) = (op
.xmm32u(1) & 0x80000000) ? 0xffffffff : 0;
3745 result
.xmm32u(2) = (op
.xmm32u(2) & 0x80000000) ? 0xffffffff : 0;
3746 result
.xmm32u(3) = (op
.xmm32u(3) & 0x80000000) ? 0xffffffff : 0;
3749 result
.xmm32u(0) = op
.xmm32u(0) >> shift
;
3750 result
.xmm32u(1) = op
.xmm32u(1) >> shift
;
3751 result
.xmm32u(2) = op
.xmm32u(2) >> shift
;
3752 result
.xmm32u(3) = op
.xmm32u(3) >> shift
;
3754 if(op
.xmm32u(0) & 0x80000000) result
.xmm32u(0) |= (0xffffffff << (32-shift
));
3755 if(op
.xmm32u(1) & 0x80000000) result
.xmm32u(1) |= (0xffffffff << (32-shift
));
3756 if(op
.xmm32u(2) & 0x80000000) result
.xmm32u(2) |= (0xffffffff << (32-shift
));
3757 if(op
.xmm32u(3) & 0x80000000) result
.xmm32u(3) |= (0xffffffff << (32-shift
));
3760 /* now write result back to destination */
3761 BX_WRITE_XMM_REG(i
->rm(), result
);
3763 BX_INFO(("PSRAD_UdqIb: required SSE2, use --enable-sse option"));
3768 /* 66 0F 72 Grp13 110 */
3769 void BX_CPU_C::PSLLD_UdqIb(bxInstruction_c
*i
)
3771 #if BX_SUPPORT_SSE >= 2
3772 BX_CPU_THIS_PTR
prepareSSE();
3774 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
3775 Bit8u shift
= i
->Ib();
3782 op
.xmm32u(0) <<= shift
;
3783 op
.xmm32u(1) <<= shift
;
3784 op
.xmm32u(2) <<= shift
;
3785 op
.xmm32u(3) <<= shift
;
3788 /* now write result back to destination */
3789 BX_WRITE_XMM_REG(i
->rm(), op
);
3791 BX_INFO(("PSLLD_UdqIb: required SSE2, use --enable-sse option"));
3796 /* 66 0F 73 Grp14 010 */
3797 void BX_CPU_C::PSRLQ_UdqIb(bxInstruction_c
*i
)
3799 #if BX_SUPPORT_SSE >= 2
3800 BX_CPU_THIS_PTR
prepareSSE();
3802 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
3803 Bit8u shift
= i
->Ib();
3810 op
.xmm64u(0) >>= shift
;
3811 op
.xmm64u(1) >>= shift
;
3814 /* now write result back to destination */
3815 BX_WRITE_XMM_REG(i
->rm(), op
);
3817 BX_INFO(("PSRLQ_UdqIb: required SSE2, use --enable-sse option"));
3822 /* 66 0F 73 Grp14 011 */
3823 void BX_CPU_C::PSRLDQ_UdqIb(bxInstruction_c
*i
)
3825 #if BX_SUPPORT_SSE >= 2
3826 BX_CPU_THIS_PTR
prepareSSE();
3828 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm()), result
;
3829 Bit8u shift
= i
->Ib();
3831 result
.xmm64u(0) = result
.xmm64u(1) = 0;
3833 for(unsigned j
=shift
; j
<16; j
++) {
3834 result
.xmmubyte(j
-shift
) = op
.xmmubyte(j
);
3837 /* now write result back to destination */
3838 BX_WRITE_XMM_REG(i
->rm(), result
);
3840 BX_INFO(("PSRLDQ_UdqIb: required SSE2, use --enable-sse option"));
3845 /* 66 0F 73 Grp14 110 */
3846 void BX_CPU_C::PSLLQ_UdqIb(bxInstruction_c
*i
)
3848 #if BX_SUPPORT_SSE >= 2
3849 BX_CPU_THIS_PTR
prepareSSE();
3851 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
3852 Bit8u shift
= i
->Ib();
3859 op
.xmm64u(0) <<= shift
;
3860 op
.xmm64u(1) <<= shift
;
3863 /* now write result back to destination */
3864 BX_WRITE_XMM_REG(i
->rm(), op
);
3866 BX_INFO(("PSLLQ_UdqIb: required SSE2, use --enable-sse option"));
3871 /* 66 0F 73 Grp14 111 */
3872 void BX_CPU_C::PSLLDQ_UdqIb(bxInstruction_c
*i
)
3874 #if BX_SUPPORT_SSE >= 2
3875 BX_CPU_THIS_PTR
prepareSSE();
3877 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm()), result
;
3878 Bit8u shift
= i
->Ib();
3880 result
.xmm64u(0) = result
.xmm64u(1) = 0;
3882 for(unsigned j
=shift
; j
<16; j
++) {
3883 result
.xmmubyte(j
) = op
.xmmubyte(j
-shift
);
3886 /* now write result back to destination */
3887 BX_WRITE_XMM_REG(i
->rm(), result
);
3889 BX_INFO(("PSLLDQ_UdqIb: required SSE2, use --enable-sse option"));