- added instructions how to update the online documentation
[bochs-mirror.git] / cpu / sse.cc
blob470c147325f2be7b5d6d2170ec3419bd43d398a6
1 /////////////////////////////////////////////////////////////////////////
2 // $Id: sse.cc,v 1.63 2008/09/19 19:18:57 sshwarts Exp $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 // Copyright (c) 2003 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 /////////////////////////////////////////////////////////////////////////
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
29 /* ********************************************** */
30 /* SSE Integer Operations (128bit MMX extensions) */
31 /* ********************************************** */
33 // for 3-byte opcodes
34 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
36 /* 66 0F 38 00 */
37 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSHUFB_VdqWdq(bxInstruction_c *i)
39 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
40 BX_CPU_THIS_PTR prepareSSE();
42 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
44 /* op2 is a register or memory reference */
45 if (i->modC0()) {
46 op2 = BX_READ_XMM_REG(i->rm());
48 else {
49 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
50 /* pointer, segment address pair */
51 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
54 for(unsigned j=0; j<16; j++)
56 unsigned mask = op2.xmmubyte(j);
57 if (mask & 0x80)
58 result.xmmubyte(j) = 0;
59 else
60 result.xmmubyte(j) = op1.xmmubyte(mask & 0xf);
63 BX_WRITE_XMM_REG(i->nnn(), result);
64 #else
65 BX_INFO(("PSHUFB_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
66 exception(BX_UD_EXCEPTION, 0, 0);
67 #endif
70 /* 66 0F 38 01 */
71 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDW_VdqWdq(bxInstruction_c *i)
73 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
74 BX_CPU_THIS_PTR prepareSSE();
76 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
78 /* op2 is a register or memory reference */
79 if (i->modC0()) {
80 op2 = BX_READ_XMM_REG(i->rm());
82 else {
83 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
84 /* pointer, segment address pair */
85 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
88 result.xmm16u(0) = op1.xmm16u(0) + op1.xmm16u(1);
89 result.xmm16u(1) = op1.xmm16u(2) + op1.xmm16u(3);
90 result.xmm16u(2) = op1.xmm16u(4) + op1.xmm16u(5);
91 result.xmm16u(3) = op1.xmm16u(6) + op1.xmm16u(7);
93 result.xmm16u(4) = op2.xmm16u(0) + op2.xmm16u(1);
94 result.xmm16u(5) = op2.xmm16u(2) + op2.xmm16u(3);
95 result.xmm16u(6) = op2.xmm16u(4) + op2.xmm16u(5);
96 result.xmm16u(7) = op2.xmm16u(6) + op2.xmm16u(7);
98 BX_WRITE_XMM_REG(i->nnn(), result);
99 #else
100 BX_INFO(("PHADDW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
101 exception(BX_UD_EXCEPTION, 0, 0);
102 #endif
105 /* 66 0F 38 02 */
106 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDD_VdqWdq(bxInstruction_c *i)
108 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
109 BX_CPU_THIS_PTR prepareSSE();
111 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
113 /* op2 is a register or memory reference */
114 if (i->modC0()) {
115 op2 = BX_READ_XMM_REG(i->rm());
117 else {
118 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
119 /* pointer, segment address pair */
120 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
123 result.xmm32u(0) = op1.xmm32u(0) + op1.xmm32u(1);
124 result.xmm32u(1) = op1.xmm32u(2) + op1.xmm32u(3);
125 result.xmm32u(2) = op2.xmm32u(0) + op2.xmm32u(1);
126 result.xmm32u(3) = op2.xmm32u(2) + op2.xmm32u(3);
128 BX_WRITE_XMM_REG(i->nnn(), result);
129 #else
130 BX_INFO(("PHADDD_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
131 exception(BX_UD_EXCEPTION, 0, 0);
132 #endif
135 /* 66 0F 38 03 */
136 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDSW_VdqWdq(bxInstruction_c *i)
138 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
139 BX_CPU_THIS_PTR prepareSSE();
141 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
143 /* op2 is a register or memory reference */
144 if (i->modC0()) {
145 op2 = BX_READ_XMM_REG(i->rm());
147 else {
148 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
149 /* pointer, segment address pair */
150 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
153 result.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) + Bit32s(op1.xmm16s(1)));
154 result.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) + Bit32s(op1.xmm16s(3)));
155 result.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) + Bit32s(op1.xmm16s(5)));
156 result.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) + Bit32s(op1.xmm16s(7)));
158 result.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(0)) + Bit32s(op2.xmm16s(1)));
159 result.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(2)) + Bit32s(op2.xmm16s(3)));
160 result.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(4)) + Bit32s(op2.xmm16s(5)));
161 result.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(6)) + Bit32s(op2.xmm16s(7)));
163 /* now write result back to destination */
164 BX_WRITE_XMM_REG(i->nnn(), result);
165 #else
166 BX_INFO(("PHADDSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
167 exception(BX_UD_EXCEPTION, 0, 0);
168 #endif
171 /* 66 0F 38 04 */
172 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMADDUBSW_VdqWdq(bxInstruction_c *i)
174 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
175 BX_CPU_THIS_PTR prepareSSE();
177 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
179 /* op2 is a register or memory reference */
180 if (i->modC0()) {
181 op2 = BX_READ_XMM_REG(i->rm());
183 else {
184 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
185 /* pointer, segment address pair */
186 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
189 for(unsigned j=0; j<8; j++)
191 Bit32s temp = Bit32s(op1.xmmubyte(j*2+0))*Bit32s(op2.xmmsbyte(j*2+0)) +
192 Bit32s(op1.xmmubyte(j*2+1))*Bit32s(op2.xmmsbyte(j*2+1));
194 result.xmm16s(j) = SaturateDwordSToWordS(temp);
197 /* now write result back to destination */
198 BX_WRITE_XMM_REG(i->nnn(), result);
199 #else
200 BX_INFO(("PMADDUBSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
201 exception(BX_UD_EXCEPTION, 0, 0);
202 #endif
205 /* 66 0F 38 05 */
206 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBSW_VdqWdq(bxInstruction_c *i)
208 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
209 BX_CPU_THIS_PTR prepareSSE();
211 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
213 /* op2 is a register or memory reference */
214 if (i->modC0()) {
215 op2 = BX_READ_XMM_REG(i->rm());
217 else {
218 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
219 /* pointer, segment address pair */
220 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
223 result.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) - Bit32s(op1.xmm16s(1)));
224 result.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) - Bit32s(op1.xmm16s(3)));
225 result.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) - Bit32s(op1.xmm16s(5)));
226 result.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) - Bit32s(op1.xmm16s(7)));
228 result.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(0)) - Bit32s(op2.xmm16s(1)));
229 result.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(2)) - Bit32s(op2.xmm16s(3)));
230 result.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(4)) - Bit32s(op2.xmm16s(5)));
231 result.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(6)) - Bit32s(op2.xmm16s(7)));
233 /* now write result back to destination */
234 BX_WRITE_XMM_REG(i->nnn(), result);
235 #else
236 BX_INFO(("PHSUBSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
237 exception(BX_UD_EXCEPTION, 0, 0);
238 #endif
241 /* 66 0F 38 05 */
242 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBW_VdqWdq(bxInstruction_c *i)
244 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
245 BX_CPU_THIS_PTR prepareSSE();
247 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
249 /* op2 is a register or memory reference */
250 if (i->modC0()) {
251 op2 = BX_READ_XMM_REG(i->rm());
253 else {
254 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
255 /* pointer, segment address pair */
256 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
259 result.xmm16u(0) = op1.xmm16u(0) - op1.xmm16u(1);
260 result.xmm16u(1) = op1.xmm16u(2) - op1.xmm16u(3);
261 result.xmm16u(2) = op1.xmm16u(4) - op1.xmm16u(5);
262 result.xmm16u(3) = op1.xmm16u(6) - op1.xmm16u(7);
264 result.xmm16u(4) = op2.xmm16u(0) - op2.xmm16u(1);
265 result.xmm16u(5) = op2.xmm16u(2) - op2.xmm16u(3);
266 result.xmm16u(6) = op2.xmm16u(4) - op2.xmm16u(5);
267 result.xmm16u(7) = op2.xmm16u(6) - op2.xmm16u(7);
269 BX_WRITE_XMM_REG(i->nnn(), result);
270 #else
271 BX_INFO(("PHSUBW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
272 exception(BX_UD_EXCEPTION, 0, 0);
273 #endif
276 /* 66 0F 38 06 */
277 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBD_VdqWdq(bxInstruction_c *i)
279 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
280 BX_CPU_THIS_PTR prepareSSE();
282 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
284 /* op2 is a register or memory reference */
285 if (i->modC0()) {
286 op2 = BX_READ_XMM_REG(i->rm());
288 else {
289 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
290 /* pointer, segment address pair */
291 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
294 result.xmm32u(0) = op1.xmm32u(0) - op1.xmm32u(1);
295 result.xmm32u(1) = op1.xmm32u(2) - op1.xmm32u(3);
296 result.xmm32u(2) = op2.xmm32u(0) - op2.xmm32u(1);
297 result.xmm32u(3) = op2.xmm32u(2) - op2.xmm32u(3);
299 BX_WRITE_XMM_REG(i->nnn(), result);
300 #else
301 BX_INFO(("PHSUBD_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
302 exception(BX_UD_EXCEPTION, 0, 0);
303 #endif
306 /* 66 0F 38 08 */
307 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSIGNB_VdqWdq(bxInstruction_c *i)
309 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
310 BX_CPU_THIS_PTR prepareSSE();
312 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
314 /* op2 is a register or memory reference */
315 if (i->modC0()) {
316 op2 = BX_READ_XMM_REG(i->rm());
318 else {
319 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
320 /* pointer, segment address pair */
321 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
324 for(unsigned j=0; j<16; j++) {
325 int sign = (op2.xmmsbyte(j) > 0) - (op2.xmmsbyte(j) < 0);
326 op1.xmmsbyte(j) *= sign;
329 BX_WRITE_XMM_REG(i->nnn(), op1);
330 #else
331 BX_INFO(("PSIGNB_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
332 exception(BX_UD_EXCEPTION, 0, 0);
333 #endif
336 /* 66 0F 38 09 */
337 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSIGNW_VdqWdq(bxInstruction_c *i)
339 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
340 BX_CPU_THIS_PTR prepareSSE();
342 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
344 /* op2 is a register or memory reference */
345 if (i->modC0()) {
346 op2 = BX_READ_XMM_REG(i->rm());
348 else {
349 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
350 /* pointer, segment address pair */
351 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
354 for(unsigned j=0; j<8; j++) {
355 int sign = (op2.xmm16s(j) > 0) - (op2.xmm16s(j) < 0);
356 op1.xmm16s(j) *= sign;
359 BX_WRITE_XMM_REG(i->nnn(), op1);
360 #else
361 BX_INFO(("PSIGNW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
362 exception(BX_UD_EXCEPTION, 0, 0);
363 #endif
366 /* 66 0F 38 0A */
367 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSIGND_VdqWdq(bxInstruction_c *i)
369 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
370 BX_CPU_THIS_PTR prepareSSE();
372 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
374 /* op2 is a register or memory reference */
375 if (i->modC0()) {
376 op2 = BX_READ_XMM_REG(i->rm());
378 else {
379 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
380 /* pointer, segment address pair */
381 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
384 for(unsigned j=0; j<4; j++) {
385 int sign = (op2.xmm32s(j) > 0) - (op2.xmm32s(j) < 0);
386 op1.xmm32s(j) *= sign;
389 BX_WRITE_XMM_REG(i->nnn(), op1);
390 #else
391 BX_INFO(("PSIGND_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
392 exception(BX_UD_EXCEPTION, 0, 0);
393 #endif
396 /* 66 0F 38 0B */
397 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULHRSW_VdqWdq(bxInstruction_c *i)
399 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
400 BX_CPU_THIS_PTR prepareSSE();
402 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
404 /* op2 is a register or memory reference */
405 if (i->modC0()) {
406 op2 = BX_READ_XMM_REG(i->rm());
408 else {
409 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
410 /* pointer, segment address pair */
411 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
414 op1.xmm16u(0) = (((op1.xmm16s(0) * op2.xmm16s(0)) >> 14) + 1) >> 1;
415 op1.xmm16u(1) = (((op1.xmm16s(1) * op2.xmm16s(1)) >> 14) + 1) >> 1;
416 op1.xmm16u(2) = (((op1.xmm16s(2) * op2.xmm16s(2)) >> 14) + 1) >> 1;
417 op1.xmm16u(3) = (((op1.xmm16s(3) * op2.xmm16s(3)) >> 14) + 1) >> 1;
418 op1.xmm16u(4) = (((op1.xmm16s(4) * op2.xmm16s(4)) >> 14) + 1) >> 1;
419 op1.xmm16u(5) = (((op1.xmm16s(5) * op2.xmm16s(5)) >> 14) + 1) >> 1;
420 op1.xmm16u(6) = (((op1.xmm16s(6) * op2.xmm16s(6)) >> 14) + 1) >> 1;
421 op1.xmm16u(7) = (((op1.xmm16s(7) * op2.xmm16s(7)) >> 14) + 1) >> 1;
423 /* now write result back to destination */
424 BX_WRITE_XMM_REG(i->nnn(), op1);
425 #else
426 BX_INFO(("PMULHRSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
427 exception(BX_UD_EXCEPTION, 0, 0);
428 #endif
431 /* 66 0F 38 1C */
432 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PABSB_VdqWdq(bxInstruction_c *i)
434 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
435 BX_CPU_THIS_PTR prepareSSE();
437 BxPackedXmmRegister op;
439 if (i->modC0()) {
440 op = BX_READ_XMM_REG(i->rm());
442 else {
443 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
444 /* pointer, segment address pair */
445 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
448 if(op.xmmsbyte(0x0) < 0) op.xmmubyte(0x0) = -op.xmmsbyte(0x0);
449 if(op.xmmsbyte(0x1) < 0) op.xmmubyte(0x1) = -op.xmmsbyte(0x1);
450 if(op.xmmsbyte(0x2) < 0) op.xmmubyte(0x2) = -op.xmmsbyte(0x2);
451 if(op.xmmsbyte(0x3) < 0) op.xmmubyte(0x3) = -op.xmmsbyte(0x3);
452 if(op.xmmsbyte(0x4) < 0) op.xmmubyte(0x4) = -op.xmmsbyte(0x4);
453 if(op.xmmsbyte(0x5) < 0) op.xmmubyte(0x5) = -op.xmmsbyte(0x5);
454 if(op.xmmsbyte(0x6) < 0) op.xmmubyte(0x6) = -op.xmmsbyte(0x6);
455 if(op.xmmsbyte(0x7) < 0) op.xmmubyte(0x7) = -op.xmmsbyte(0x7);
456 if(op.xmmsbyte(0x8) < 0) op.xmmubyte(0x8) = -op.xmmsbyte(0x8);
457 if(op.xmmsbyte(0x9) < 0) op.xmmubyte(0x9) = -op.xmmsbyte(0x9);
458 if(op.xmmsbyte(0xa) < 0) op.xmmubyte(0xa) = -op.xmmsbyte(0xa);
459 if(op.xmmsbyte(0xb) < 0) op.xmmubyte(0xb) = -op.xmmsbyte(0xb);
460 if(op.xmmsbyte(0xc) < 0) op.xmmubyte(0xc) = -op.xmmsbyte(0xc);
461 if(op.xmmsbyte(0xd) < 0) op.xmmubyte(0xd) = -op.xmmsbyte(0xd);
462 if(op.xmmsbyte(0xe) < 0) op.xmmubyte(0xe) = -op.xmmsbyte(0xe);
463 if(op.xmmsbyte(0xf) < 0) op.xmmubyte(0xf) = -op.xmmsbyte(0xf);
465 /* now write result back to destination */
466 BX_WRITE_XMM_REG(i->nnn(), op);
467 #else
468 BX_INFO(("PABSB_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
469 exception(BX_UD_EXCEPTION, 0, 0);
470 #endif
473 /* 66 0F 38 1D */
474 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PABSW_VdqWdq(bxInstruction_c *i)
476 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
477 BX_CPU_THIS_PTR prepareSSE();
479 BxPackedXmmRegister op;
481 if (i->modC0()) {
482 op = BX_READ_XMM_REG(i->rm());
484 else {
485 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
486 /* pointer, segment address pair */
487 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
490 if(op.xmm16s(0) < 0) op.xmm16u(0) = -op.xmm16s(0);
491 if(op.xmm16s(1) < 0) op.xmm16u(1) = -op.xmm16s(1);
492 if(op.xmm16s(2) < 0) op.xmm16u(2) = -op.xmm16s(2);
493 if(op.xmm16s(3) < 0) op.xmm16u(3) = -op.xmm16s(3);
494 if(op.xmm16s(4) < 0) op.xmm16u(4) = -op.xmm16s(4);
495 if(op.xmm16s(5) < 0) op.xmm16u(5) = -op.xmm16s(5);
496 if(op.xmm16s(6) < 0) op.xmm16u(6) = -op.xmm16s(6);
497 if(op.xmm16s(7) < 0) op.xmm16u(7) = -op.xmm16s(7);
499 /* now write result back to destination */
500 BX_WRITE_XMM_REG(i->nnn(), op);
501 #else
502 BX_INFO(("PABSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
503 exception(BX_UD_EXCEPTION, 0, 0);
504 #endif
507 /* 66 0F 38 1E */
508 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PABSD_VdqWdq(bxInstruction_c *i)
510 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
511 BX_CPU_THIS_PTR prepareSSE();
513 BxPackedXmmRegister op;
515 if (i->modC0()) {
516 op = BX_READ_XMM_REG(i->rm());
518 else {
519 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
520 /* pointer, segment address pair */
521 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
524 if(op.xmm32s(0) < 0) op.xmm32u(0) = -op.xmm32s(0);
525 if(op.xmm32s(1) < 0) op.xmm32u(1) = -op.xmm32s(1);
526 if(op.xmm32s(2) < 0) op.xmm32u(2) = -op.xmm32s(2);
527 if(op.xmm32s(3) < 0) op.xmm32u(3) = -op.xmm32s(3);
529 /* now write result back to destination */
530 BX_WRITE_XMM_REG(i->nnn(), op);
531 #else
532 BX_INFO(("PABSD_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
533 exception(BX_UD_EXCEPTION, 0, 0);
534 #endif
537 /* 66 0F 38 10 */
538 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PBLENDVB_VdqWdq(bxInstruction_c *i)
540 #if BX_SUPPORT_SSE >= 4
541 BX_CPU_THIS_PTR prepareSSE();
543 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2,
544 mask = BX_READ_XMM_REG(0); // XMM0
546 /* op2 is a register or memory reference */
547 if (i->modC0()) {
548 op2 = BX_READ_XMM_REG(i->rm());
550 else {
551 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
552 /* pointer, segment address pair */
553 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
556 for(unsigned j=0; j<16; j++)
557 if (mask.xmmubyte(j) & 0x80) op1.xmmubyte(j) = op2.xmmubyte(j);
559 /* now write result back to destination */
560 BX_WRITE_XMM_REG(i->nnn(), op1);
561 #else
562 BX_INFO(("PBLENDVB_VdqWdq: required SSE4, use --enable-sse option"));
563 exception(BX_UD_EXCEPTION, 0, 0);
564 #endif
567 /* 66 0F 38 14 */
568 void BX_CPP_AttrRegparmN(1) BX_CPU_C::BLENDVPS_VpsWps(bxInstruction_c *i)
570 #if BX_SUPPORT_SSE >= 4
571 BX_CPU_THIS_PTR prepareSSE();
573 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2,
574 mask = BX_READ_XMM_REG(0); // XMM0
576 /* op2 is a register or memory reference */
577 if (i->modC0()) {
578 op2 = BX_READ_XMM_REG(i->rm());
580 else {
581 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
582 /* pointer, segment address pair */
583 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
586 if (mask.xmm32u(0) & 0x80000000) op1.xmm32u(0) = op2.xmm32u(0);
587 if (mask.xmm32u(1) & 0x80000000) op1.xmm32u(1) = op2.xmm32u(1);
588 if (mask.xmm32u(2) & 0x80000000) op1.xmm32u(2) = op2.xmm32u(2);
589 if (mask.xmm32u(3) & 0x80000000) op1.xmm32u(3) = op2.xmm32u(3);
591 /* now write result back to destination */
592 BX_WRITE_XMM_REG(i->nnn(), op1);
593 #else
594 BX_INFO(("BLENDVPS_VpsWps: required SSE4, use --enable-sse option"));
595 exception(BX_UD_EXCEPTION, 0, 0);
596 #endif
599 /* 66 0F 38 15 */
600 void BX_CPP_AttrRegparmN(1) BX_CPU_C::BLENDVPD_VpdWpd(bxInstruction_c *i)
602 #if BX_SUPPORT_SSE >= 4
603 BX_CPU_THIS_PTR prepareSSE();
605 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2,
606 mask = BX_READ_XMM_REG(0); // XMM0
608 /* op2 is a register or memory reference */
609 if (i->modC0()) {
610 op2 = BX_READ_XMM_REG(i->rm());
612 else {
613 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
614 /* pointer, segment address pair */
615 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
618 if (mask.xmm32u(1) & 0x80000000) op1.xmm64u(0) = op2.xmm64u(0);
619 if (mask.xmm32u(3) & 0x80000000) op1.xmm64u(1) = op2.xmm64u(1);
621 /* now write result back to destination */
622 BX_WRITE_XMM_REG(i->nnn(), op1);
623 #else
624 BX_INFO(("BLENDVPD_VpdWpd: required SSE4, use --enable-sse option"));
625 exception(BX_UD_EXCEPTION, 0, 0);
626 #endif
629 /* 66 0F 38 17 */
630 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PTEST_VdqWdq(bxInstruction_c *i)
632 #if BX_SUPPORT_SSE >= 4
633 BX_CPU_THIS_PTR prepareSSE();
635 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
636 unsigned result = 0;
638 /* op2 is a register or memory reference */
639 if (i->modC0()) {
640 op2 = BX_READ_XMM_REG(i->rm());
642 else {
643 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
644 /* pointer, segment address pair */
645 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
648 if ((op2.xmm64u(0) & op1.xmm64u(0)) == 0 &&
649 (op2.xmm64u(1) & op1.xmm64u(1)) == 0) result |= EFlagsZFMask;
651 if ((op2.xmm64u(0) & ~op1.xmm64u(0)) == 0 &&
652 (op2.xmm64u(1) & ~op1.xmm64u(1)) == 0) result |= EFlagsCFMask;
654 setEFlagsOSZAPC(result);
656 #else
657 BX_INFO(("PTEST_VdqWdq: required SSE4, use --enable-sse option"));
658 exception(BX_UD_EXCEPTION, 0, 0);
659 #endif
662 /* 66 0F 38 28 */
663 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULDQ_VdqWdq(bxInstruction_c *i)
665 #if BX_SUPPORT_SSE >= 4
666 BX_CPU_THIS_PTR prepareSSE();
668 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
670 /* op2 is a register or memory reference */
671 if (i->modC0()) {
672 op2 = BX_READ_XMM_REG(i->rm());
674 else {
675 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
676 /* pointer, segment address pair */
677 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
680 result.xmm64s(0) = Bit64s(op1.xmm32s(0)) * Bit64s(op2.xmm32s(0));
681 result.xmm64s(1) = Bit64s(op1.xmm32s(2)) * Bit64s(op2.xmm32s(2));
683 /* now write result back to destination */
684 BX_WRITE_XMM_REG(i->nnn(), result);
685 #else
686 BX_INFO(("PMULDQ_VdqWdq: required SSE4, use --enable-sse option"));
687 exception(BX_UD_EXCEPTION, 0, 0);
688 #endif
691 /* 66 0F 38 29 */
692 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPEQQ_VdqWdq(bxInstruction_c *i)
694 #if BX_SUPPORT_SSE >= 4
695 BX_CPU_THIS_PTR prepareSSE();
697 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
699 /* op2 is a register or memory reference */
700 if (i->modC0()) {
701 op2 = BX_READ_XMM_REG(i->rm());
703 else {
704 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
705 /* pointer, segment address pair */
706 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
709 op1.xmm64u(0) = (op1.xmm64u(0) == op2.xmm64u(0)) ?
710 BX_CONST64(0xffffffffffffffff) : 0;
712 op1.xmm64u(1) = (op1.xmm64u(1) == op2.xmm64u(1)) ?
713 BX_CONST64(0xffffffffffffffff) : 0;
715 /* now write result back to destination */
716 BX_WRITE_XMM_REG(i->nnn(), op1);
717 #else
718 BX_INFO(("PCMPEQQ_VdqWdq: required SSE4, use --enable-sse option"));
719 exception(BX_UD_EXCEPTION, 0, 0);
720 #endif
723 /* 66 0F 38 2B */
724 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKUSDW_VdqWdq(bxInstruction_c *i)
726 #if BX_SUPPORT_SSE >= 4
727 BX_CPU_THIS_PTR prepareSSE();
729 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
731 /* op2 is a register or memory reference */
732 if (i->modC0()) {
733 op2 = BX_READ_XMM_REG(i->rm());
735 else {
736 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
737 /* pointer, segment address pair */
738 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
741 result.xmm16u(0) = SaturateDwordSToWordU(op1.xmm32s(0));
742 result.xmm16u(1) = SaturateDwordSToWordU(op1.xmm32s(1));
743 result.xmm16u(2) = SaturateDwordSToWordU(op1.xmm32s(2));
744 result.xmm16u(3) = SaturateDwordSToWordU(op1.xmm32s(3));
745 result.xmm16u(4) = SaturateDwordSToWordU(op2.xmm32s(0));
746 result.xmm16u(5) = SaturateDwordSToWordU(op2.xmm32s(1));
747 result.xmm16u(6) = SaturateDwordSToWordU(op2.xmm32s(2));
748 result.xmm16u(7) = SaturateDwordSToWordU(op2.xmm32s(3));
750 /* now write result back to destination */
751 BX_WRITE_XMM_REG(i->nnn(), result);
752 #else
753 BX_INFO(("PACKUSDW_VdqWdq: required SSE4, use --enable-sse option"));
754 exception(BX_UD_EXCEPTION, 0, 0);
755 #endif
758 /* 66 0F 38 37 */
759 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPGTQ_VdqWdq(bxInstruction_c *i)
761 #if (BX_SUPPORT_SSE > 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
762 BX_CPU_THIS_PTR prepareSSE();
764 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
766 /* op2 is a register or memory reference */
767 if (i->modC0()) {
768 op2 = BX_READ_XMM_REG(i->rm());
770 else {
771 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
772 /* pointer, segment address pair */
773 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
776 op1.xmm64u(0) = (op1.xmm64u(0) > op2.xmm64u(0)) ?
777 BX_CONST64(0xffffffffffffffff) : 0;
779 op1.xmm64u(1) = (op1.xmm64u(1) > op2.xmm64u(1)) ?
780 BX_CONST64(0xffffffffffffffff) : 0;
782 /* now write result back to destination */
783 BX_WRITE_XMM_REG(i->nnn(), op1);
784 #else
785 BX_INFO(("PCMPGTQ_VdqWdq: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
786 exception(BX_UD_EXCEPTION, 0, 0);
787 #endif
790 /* 66 0F 38 38 */
791 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINSB_VdqWdq(bxInstruction_c *i)
793 #if BX_SUPPORT_SSE >= 4
794 BX_CPU_THIS_PTR prepareSSE();
796 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
798 /* op2 is a register or memory reference */
799 if (i->modC0()) {
800 op2 = BX_READ_XMM_REG(i->rm());
802 else {
803 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
804 /* pointer, segment address pair */
805 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
808 for(unsigned j=0; j<16; j++) {
809 if(op2.xmmsbyte(j) < op1.xmmsbyte(j)) op1.xmmubyte(j) = op2.xmmubyte(j);
812 /* now write result back to destination */
813 BX_WRITE_XMM_REG(i->nnn(), op1);
814 #else
815 BX_INFO(("PMINSB_VdqWdq: required SSE4, use --enable-sse option"));
816 exception(BX_UD_EXCEPTION, 0, 0);
817 #endif
820 /* 66 0F 38 39 */
821 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINSD_VdqWdq(bxInstruction_c *i)
823 #if BX_SUPPORT_SSE >= 4
824 BX_CPU_THIS_PTR prepareSSE();
826 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
828 /* op2 is a register or memory reference */
829 if (i->modC0()) {
830 op2 = BX_READ_XMM_REG(i->rm());
832 else {
833 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
834 /* pointer, segment address pair */
835 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
838 if(op2.xmm32s(0) < op1.xmm32s(0)) op1.xmm32u(0) = op2.xmm32u(0);
839 if(op2.xmm32s(1) < op1.xmm32s(1)) op1.xmm32u(1) = op2.xmm32u(1);
840 if(op2.xmm32s(2) < op1.xmm32s(2)) op1.xmm32u(2) = op2.xmm32u(2);
841 if(op2.xmm32s(3) < op1.xmm32s(3)) op1.xmm32u(3) = op2.xmm32u(3);
843 /* now write result back to destination */
844 BX_WRITE_XMM_REG(i->nnn(), op1);
845 #else
846 BX_INFO(("PMINSD_VdqWdq: required SSE4, use --enable-sse option"));
847 exception(BX_UD_EXCEPTION, 0, 0);
848 #endif
851 /* 66 0F 38 3A */
852 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINUW_VdqWdq(bxInstruction_c *i)
854 #if BX_SUPPORT_SSE >= 4
855 BX_CPU_THIS_PTR prepareSSE();
857 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
859 /* op2 is a register or memory reference */
860 if (i->modC0()) {
861 op2 = BX_READ_XMM_REG(i->rm());
863 else {
864 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
865 /* pointer, segment address pair */
866 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
869 if(op2.xmm16u(0) < op1.xmm16u(0)) op1.xmm16u(0) = op2.xmm16u(0);
870 if(op2.xmm16u(1) < op1.xmm16u(1)) op1.xmm16u(1) = op2.xmm16u(1);
871 if(op2.xmm16u(2) < op1.xmm16u(2)) op1.xmm16u(2) = op2.xmm16u(2);
872 if(op2.xmm16u(3) < op1.xmm16u(3)) op1.xmm16u(3) = op2.xmm16u(3);
873 if(op2.xmm16u(4) < op1.xmm16u(4)) op1.xmm16u(4) = op2.xmm16u(4);
874 if(op2.xmm16u(5) < op1.xmm16u(5)) op1.xmm16u(5) = op2.xmm16u(5);
875 if(op2.xmm16u(6) < op1.xmm16u(6)) op1.xmm16u(6) = op2.xmm16u(6);
876 if(op2.xmm16u(7) < op1.xmm16u(7)) op1.xmm16u(7) = op2.xmm16u(7);
878 /* now write result back to destination */
879 BX_WRITE_XMM_REG(i->nnn(), op1);
880 #else
881 BX_INFO(("PMINUW_VdqWdq: required SSE4, use --enable-sse option"));
882 exception(BX_UD_EXCEPTION, 0, 0);
883 #endif
886 /* 66 0F 38 3B */
887 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINUD_VdqWdq(bxInstruction_c *i)
889 #if BX_SUPPORT_SSE >= 4
890 BX_CPU_THIS_PTR prepareSSE();
892 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
894 /* op2 is a register or memory reference */
895 if (i->modC0()) {
896 op2 = BX_READ_XMM_REG(i->rm());
898 else {
899 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
900 /* pointer, segment address pair */
901 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
904 if(op2.xmm32u(0) < op1.xmm32u(0)) op1.xmm32u(0) = op2.xmm32u(0);
905 if(op2.xmm32u(1) < op1.xmm32u(1)) op1.xmm32u(1) = op2.xmm32u(1);
906 if(op2.xmm32u(2) < op1.xmm32u(2)) op1.xmm32u(2) = op2.xmm32u(2);
907 if(op2.xmm32u(3) < op1.xmm32u(3)) op1.xmm32u(3) = op2.xmm32u(3);
909 /* now write result back to destination */
910 BX_WRITE_XMM_REG(i->nnn(), op1);
911 #else
912 BX_INFO(("PMINUD_VdqWdq: required SSE4, use --enable-sse option"));
913 exception(BX_UD_EXCEPTION, 0, 0);
914 #endif
917 /* 66 0F 38 3C */
918 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXSB_VdqWdq(bxInstruction_c *i)
920 #if BX_SUPPORT_SSE >= 4
921 BX_CPU_THIS_PTR prepareSSE();
923 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
925 /* op2 is a register or memory reference */
926 if (i->modC0()) {
927 op2 = BX_READ_XMM_REG(i->rm());
929 else {
930 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
931 /* pointer, segment address pair */
932 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
935 for(unsigned j=0; j<16; j++) {
936 if(op2.xmmsbyte(j) > op1.xmmsbyte(j)) op1.xmmubyte(j) = op2.xmmubyte(j);
939 /* now write result back to destination */
940 BX_WRITE_XMM_REG(i->nnn(), op1);
941 #else
942 BX_INFO(("PMAXSB_VdqWdq: required SSE4, use --enable-sse option"));
943 exception(BX_UD_EXCEPTION, 0, 0);
944 #endif
947 /* 66 0F 38 3D */
948 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXSD_VdqWdq(bxInstruction_c *i)
950 #if BX_SUPPORT_SSE >= 4
951 BX_CPU_THIS_PTR prepareSSE();
953 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
955 /* op2 is a register or memory reference */
956 if (i->modC0()) {
957 op2 = BX_READ_XMM_REG(i->rm());
959 else {
960 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
961 /* pointer, segment address pair */
962 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
965 if(op2.xmm32s(0) > op1.xmm32s(0)) op1.xmm32u(0) = op2.xmm32u(0);
966 if(op2.xmm32s(1) > op1.xmm32s(1)) op1.xmm32u(1) = op2.xmm32u(1);
967 if(op2.xmm32s(2) > op1.xmm32s(2)) op1.xmm32u(2) = op2.xmm32u(2);
968 if(op2.xmm32s(3) > op1.xmm32s(3)) op1.xmm32u(3) = op2.xmm32u(3);
970 /* now write result back to destination */
971 BX_WRITE_XMM_REG(i->nnn(), op1);
972 #else
973 BX_INFO(("PMAXSD_VdqWdq: required SSE4, use --enable-sse option"));
974 exception(BX_UD_EXCEPTION, 0, 0);
975 #endif
978 /* 66 0F 38 3E */
979 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXUW_VdqWdq(bxInstruction_c *i)
981 #if BX_SUPPORT_SSE >= 4
982 BX_CPU_THIS_PTR prepareSSE();
984 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
986 /* op2 is a register or memory reference */
987 if (i->modC0()) {
988 op2 = BX_READ_XMM_REG(i->rm());
990 else {
991 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
992 /* pointer, segment address pair */
993 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
996 if(op2.xmm16u(0) > op1.xmm16u(0)) op1.xmm16u(0) = op2.xmm16u(0);
997 if(op2.xmm16u(1) > op1.xmm16u(1)) op1.xmm16u(1) = op2.xmm16u(1);
998 if(op2.xmm16u(2) > op1.xmm16u(2)) op1.xmm16u(2) = op2.xmm16u(2);
999 if(op2.xmm16u(3) > op1.xmm16u(3)) op1.xmm16u(3) = op2.xmm16u(3);
1000 if(op2.xmm16u(4) > op1.xmm16u(4)) op1.xmm16u(4) = op2.xmm16u(4);
1001 if(op2.xmm16u(5) > op1.xmm16u(5)) op1.xmm16u(5) = op2.xmm16u(5);
1002 if(op2.xmm16u(6) > op1.xmm16u(6)) op1.xmm16u(6) = op2.xmm16u(6);
1003 if(op2.xmm16u(7) > op1.xmm16u(7)) op1.xmm16u(7) = op2.xmm16u(7);
1005 /* now write result back to destination */
1006 BX_WRITE_XMM_REG(i->nnn(), op1);
1007 #else
1008 BX_INFO(("PMAXUW_VdqWdq: required SSE4, use --enable-sse option"));
1009 exception(BX_UD_EXCEPTION, 0, 0);
1010 #endif
1013 /* 66 0F 38 3F */
1014 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXUD_VdqWdq(bxInstruction_c *i)
1016 #if BX_SUPPORT_SSE >= 4
1017 BX_CPU_THIS_PTR prepareSSE();
1019 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1021 /* op2 is a register or memory reference */
1022 if (i->modC0()) {
1023 op2 = BX_READ_XMM_REG(i->rm());
1025 else {
1026 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1027 /* pointer, segment address pair */
1028 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1031 if(op2.xmm32u(0) > op1.xmm32u(0)) op1.xmm32u(0) = op2.xmm32u(0);
1032 if(op2.xmm32u(1) > op1.xmm32u(1)) op1.xmm32u(1) = op2.xmm32u(1);
1033 if(op2.xmm32u(2) > op1.xmm32u(2)) op1.xmm32u(2) = op2.xmm32u(2);
1034 if(op2.xmm32u(3) > op1.xmm32u(3)) op1.xmm32u(3) = op2.xmm32u(3);
1036 /* now write result back to destination */
1037 BX_WRITE_XMM_REG(i->nnn(), op1);
1038 #else
1039 BX_INFO(("PMAXUD_VdqWdq: required SSE4, use --enable-sse option"));
1040 exception(BX_UD_EXCEPTION, 0, 0);
1041 #endif
1044 /* 66 0F 38 40 */
1045 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULLD_VdqWdq(bxInstruction_c *i)
1047 #if BX_SUPPORT_SSE >= 4
1048 BX_CPU_THIS_PTR prepareSSE();
1050 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1052 /* op2 is a register or memory reference */
1053 if (i->modC0()) {
1054 op2 = BX_READ_XMM_REG(i->rm());
1056 else {
1057 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1058 /* pointer, segment address pair */
1059 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1062 Bit64s product1 = Bit64s(op1.xmm32s(0)) * Bit64s(op2.xmm32s(0));
1063 Bit64s product2 = Bit64s(op1.xmm32s(1)) * Bit64s(op2.xmm32s(1));
1064 Bit64s product3 = Bit64s(op1.xmm32s(2)) * Bit64s(op2.xmm32s(2));
1065 Bit64s product4 = Bit64s(op1.xmm32s(3)) * Bit64s(op2.xmm32s(3));
1067 op1.xmm32u(0) = (Bit32u)(product1 & 0xFFFFFFFF);
1068 op1.xmm32u(1) = (Bit32u)(product2 & 0xFFFFFFFF);
1069 op1.xmm32u(2) = (Bit32u)(product3 & 0xFFFFFFFF);
1070 op1.xmm32u(3) = (Bit32u)(product4 & 0xFFFFFFFF);
1072 /* now write result back to destination */
1073 BX_WRITE_XMM_REG(i->nnn(), op1);
1074 #else
1075 BX_INFO(("PMULLD_VdqWdq: required SSE4, use --enable-sse option"));
1076 exception(BX_UD_EXCEPTION, 0, 0);
1077 #endif
1080 /* 66 0F 38 41 */
1081 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHMINPOSUW_VdqWdq(bxInstruction_c *i)
1083 #if BX_SUPPORT_SSE >= 4
1084 BX_CPU_THIS_PTR prepareSSE();
1086 BxPackedXmmRegister op, result;
1088 /* op2 is a register or memory reference */
1089 if (i->modC0()) {
1090 op = BX_READ_XMM_REG(i->rm());
1092 else {
1093 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1094 /* pointer, segment address pair */
1095 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
1098 unsigned min = 0;
1100 for (unsigned j=1; j < 8; j++) {
1101 if (op.xmm16u(j) < op.xmm16u(min)) min = j;
1104 result.xmm16u(0) = op.xmm16u(min);
1105 result.xmm16u(1) = min;
1106 result.xmm32u(1) = 0;
1107 result.xmm64u(1) = 0;
1109 /* now write result back to destination */
1110 BX_WRITE_XMM_REG(i->nnn(), result);
1111 #else
1112 BX_INFO(("PHMINPOSUW_VdqWdq: required SSE4, use --enable-sse option"));
1113 exception(BX_UD_EXCEPTION, 0, 0);
1114 #endif
1117 /* 66 0F 3A 0C */
1118 void BX_CPP_AttrRegparmN(1) BX_CPU_C::BLENDPS_VpsWpsIb(bxInstruction_c *i)
1120 #if BX_SUPPORT_SSE >= 4
1121 BX_CPU_THIS_PTR prepareSSE();
1123 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1124 Bit8u mask = i->Ib();
1126 /* op2 is a register or memory reference */
1127 if (i->modC0()) {
1128 op2 = BX_READ_XMM_REG(i->rm());
1130 else {
1131 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1132 /* pointer, segment address pair */
1133 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1136 if (mask & 0x1) op1.xmm32u(0) = op2.xmm32u(0);
1137 if (mask & 0x2) op1.xmm32u(1) = op2.xmm32u(1);
1138 if (mask & 0x4) op1.xmm32u(2) = op2.xmm32u(2);
1139 if (mask & 0x8) op1.xmm32u(3) = op2.xmm32u(3);
1141 /* now write result back to destination */
1142 BX_WRITE_XMM_REG(i->nnn(), op1);
1143 #else
1144 BX_INFO(("BLENDPS_VpsWpsIb: required SSE4, use --enable-sse option"));
1145 exception(BX_UD_EXCEPTION, 0, 0);
1146 #endif
1149 /* 66 0F 3A 0D */
1150 void BX_CPP_AttrRegparmN(1) BX_CPU_C::BLENDPD_VpdWpdIb(bxInstruction_c *i)
1152 #if BX_SUPPORT_SSE >= 4
1153 BX_CPU_THIS_PTR prepareSSE();
1155 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1156 Bit8u mask = i->Ib();
1158 /* op2 is a register or memory reference */
1159 if (i->modC0()) {
1160 op2 = BX_READ_XMM_REG(i->rm());
1162 else {
1163 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1164 /* pointer, segment address pair */
1165 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1168 if (mask & 0x1) op1.xmm64u(0) = op2.xmm64u(0);
1169 if (mask & 0x2) op1.xmm64u(1) = op2.xmm64u(1);
1171 /* now write result back to destination */
1172 BX_WRITE_XMM_REG(i->nnn(), op1);
1173 #else
1174 BX_INFO(("BLENDPD_VpdWpdIb: required SSE4, use --enable-sse option"));
1175 exception(BX_UD_EXCEPTION, 0, 0);
1176 #endif
1179 /* 66 0F 3A 0E */
1180 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PBLENDW_VdqWdqIb(bxInstruction_c *i)
1182 #if BX_SUPPORT_SSE >= 4
1183 BX_CPU_THIS_PTR prepareSSE();
1185 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1186 Bit8u mask = i->Ib();
1188 /* op2 is a register or memory reference */
1189 if (i->modC0()) {
1190 op2 = BX_READ_XMM_REG(i->rm());
1192 else {
1193 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1194 /* pointer, segment address pair */
1195 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1198 if (mask & 0x01) op1.xmm16u(0) = op2.xmm16u(0);
1199 if (mask & 0x02) op1.xmm16u(1) = op2.xmm16u(1);
1200 if (mask & 0x04) op1.xmm16u(2) = op2.xmm16u(2);
1201 if (mask & 0x08) op1.xmm16u(3) = op2.xmm16u(3);
1202 if (mask & 0x10) op1.xmm16u(4) = op2.xmm16u(4);
1203 if (mask & 0x20) op1.xmm16u(5) = op2.xmm16u(5);
1204 if (mask & 0x40) op1.xmm16u(6) = op2.xmm16u(6);
1205 if (mask & 0x80) op1.xmm16u(7) = op2.xmm16u(7);
1207 /* now write result back to destination */
1208 BX_WRITE_XMM_REG(i->nnn(), op1);
1209 #else
1210 BX_INFO(("PBLENDW_VdqWdqIb: required SSE4, use --enable-sse option"));
1211 exception(BX_UD_EXCEPTION, 0, 0);
1212 #endif
1215 /* 66 0F 3A 14 */
1216 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_HbdUdqIb(bxInstruction_c *i)
1218 #if BX_SUPPORT_SSE >= 4
1219 BX_CPU_THIS_PTR prepareSSE();
1221 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
1222 Bit8u result = op.xmmubyte(i->Ib() & 0xF);
1224 /* result is a register or memory reference */
1225 if (i->modC0()) {
1226 BX_WRITE_32BIT_REGZ(i->nnn(), result);
1228 else {
1229 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1230 /* pointer, segment address pair */
1231 write_virtual_byte(i->seg(), eaddr, result);
1233 #else
1234 BX_INFO(("PEXTRB_HbdUdqIb: required SSE4, use --enable-sse option"));
1235 exception(BX_UD_EXCEPTION, 0, 0);
1236 #endif
1239 /* 66 0F 3A 15 */
1240 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_HwdUdqIb(bxInstruction_c *i)
1242 #if BX_SUPPORT_SSE >= 4
1243 BX_CPU_THIS_PTR prepareSSE();
1245 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
1246 Bit16u result = op.xmm16u(i->Ib() & 7);
1248 /* result is a register or memory reference */
1249 if (i->modC0()) {
1250 BX_WRITE_32BIT_REGZ(i->nnn(), result);
1252 else {
1253 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1254 /* pointer, segment address pair */
1255 write_virtual_word(i->seg(), eaddr, result);
1257 #else
1258 BX_INFO(("PEXTRW_HwdUdqIb: required SSE4, use --enable-sse option"));
1259 exception(BX_UD_EXCEPTION, 0, 0);
1260 #endif
1263 /* 66 0F 3A 16 */
1264 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRD_HdUdqIb(bxInstruction_c *i)
1266 #if BX_SUPPORT_SSE >= 4
1267 BX_CPU_THIS_PTR prepareSSE();
1269 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
1271 #if BX_SUPPORT_X86_64
1272 if (i->os64L()) /* 64 bit operand size mode */
1274 Bit64u result = op.xmm64u(i->Ib() & 1);
1276 /* result is a register or memory reference */
1277 if (i->modC0()) {
1278 BX_WRITE_64BIT_REG(i->nnn(), result);
1280 else {
1281 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1282 /* pointer, segment address pair */
1283 write_virtual_qword_64(i->seg(), eaddr, result);
1286 else
1287 #endif
1289 Bit32u result = op.xmm32u(i->Ib() & 3);
1291 /* result is a register or memory reference */
1292 if (i->modC0()) {
1293 BX_WRITE_32BIT_REGZ(i->nnn(), result);
1295 else {
1296 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1297 /* pointer, segment address pair */
1298 write_virtual_dword(i->seg(), eaddr, result);
1301 #else
1302 BX_INFO(("PEXTRD_HdUdqIb: required SSE4, use --enable-sse option"));
1303 exception(BX_UD_EXCEPTION, 0, 0);
1304 #endif
1307 /* 66 0F 3A 17 */
1308 void BX_CPP_AttrRegparmN(1) BX_CPU_C::EXTRACTPS_HdUpsIb(bxInstruction_c *i)
1310 #if BX_SUPPORT_SSE >= 4
1311 BX_CPU_THIS_PTR prepareSSE();
1313 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
1314 Bit32u result = op.xmm32u(i->Ib() & 3);
1316 /* result is a register or memory reference */
1317 if (i->modC0()) {
1318 BX_WRITE_32BIT_REGZ(i->nnn(), result);
1320 else {
1321 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1322 /* pointer, segment address pair */
1323 write_virtual_dword(i->seg(), eaddr, result);
1325 #else
1326 BX_INFO(("EXTRACTPS_HdUpsIb: required SSE4, use --enable-sse option"));
1327 exception(BX_UD_EXCEPTION, 0, 0);
1328 #endif
1331 /* 66 0F 3A 20 */
1332 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PINSRB_VdqEbIb(bxInstruction_c *i)
1334 #if BX_SUPPORT_SSE >= 4
1335 BX_CPU_THIS_PTR prepareSSE();
1337 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
1338 Bit8u op2;
1340 /* op2 is a register or memory reference */
1341 if (i->modC0()) {
1342 op2 = BX_READ_16BIT_REG(i->rm()); // won't allow reading of AH/CH/BH/DH
1344 else {
1345 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1346 /* pointer, segment address pair */
1347 op2 = read_virtual_byte(i->seg(), eaddr);
1350 op1.xmmubyte(i->Ib() & 0xF) = op2;
1352 /* now write result back to destination */
1353 BX_WRITE_XMM_REG(i->nnn(), op1);
1354 #else
1355 BX_INFO(("PINSRB_VdqEbIb: required SSE4, use --enable-sse option"));
1356 exception(BX_UD_EXCEPTION, 0, 0);
1357 #endif
1360 /* 66 0F 3A 21 */
1361 void BX_CPP_AttrRegparmN(1) BX_CPU_C::INSERTPS_VpsWssIb(bxInstruction_c *i)
1363 #if BX_SUPPORT_SSE >= 4
1364 BX_CPU_THIS_PTR prepareSSE();
1366 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
1367 Bit8u control = i->Ib();
1368 Bit32u op2;
1370 /* op2 is a register or memory reference */
1371 if (i->modC0()) {
1372 BxPackedXmmRegister temp = BX_READ_XMM_REG(i->rm());
1373 op2 = temp.xmm32u((control >> 6) & 3);
1375 else {
1376 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1377 /* pointer, segment address pair */
1378 op2 = read_virtual_dword(i->seg(), eaddr);
1381 op1.xmm32u((control >> 4) & 3) = op2;
1383 if (control & 1) op1.xmm32u(0) = 0;
1384 if (control & 2) op1.xmm32u(1) = 0;
1385 if (control & 4) op1.xmm32u(2) = 0;
1386 if (control & 8) op1.xmm32u(3) = 0;
1388 /* now write result back to destination */
1389 BX_WRITE_XMM_REG(i->nnn(), op1);
1390 #else
1391 BX_INFO(("INSERTPS_VpsWssIb: required SSE4, use --enable-sse option"));
1392 exception(BX_UD_EXCEPTION, 0, 0);
1393 #endif
1396 /* 66 0F 3A 22 */
1397 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PINSRD_VdqEdIb(bxInstruction_c *i)
1399 #if BX_SUPPORT_SSE >= 4
1400 BX_CPU_THIS_PTR prepareSSE();
1402 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
1404 #if BX_SUPPORT_X86_64
1405 if (i->os64L()) /* 64 bit operand size mode */
1407 Bit64u op2;
1409 /* op2 is a register or memory reference */
1410 if (i->modC0()) {
1411 op2 = BX_READ_64BIT_REG(i->rm());
1413 else {
1414 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1415 /* pointer, segment address pair */
1416 op2 = read_virtual_qword_64(i->seg(), eaddr);
1419 op1.xmm64u(i->Ib() & 1) = op2;
1421 else
1422 #endif
1424 Bit32u op2;
1426 /* op2 is a register or memory reference */
1427 if (i->modC0()) {
1428 op2 = BX_READ_32BIT_REG(i->rm());
1430 else {
1431 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1432 /* pointer, segment address pair */
1433 op2 = read_virtual_dword(i->seg(), eaddr);
1436 op1.xmm32u(i->Ib() & 3) = op2;
1439 /* now write result back to destination */
1440 BX_WRITE_XMM_REG(i->nnn(), op1);
1441 #else
1442 BX_INFO(("PINSRD_VdqEdIb: required SSE4, use --enable-sse option"));
1443 exception(BX_UD_EXCEPTION, 0, 0);
1444 #endif
1447 /* 66 0F 3A 42 */
1448 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MPSADBW_VdqWdqIb(bxInstruction_c *i)
1450 #if BX_SUPPORT_SSE >= 4
1451 BX_CPU_THIS_PTR prepareSSE();
1453 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1455 /* op2 is a register or memory reference */
1456 if (i->modC0()) {
1457 op2 = BX_READ_XMM_REG(i->rm());
1459 else {
1460 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1461 /* pointer, segment address pair */
1462 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1465 unsigned src_offset = (i->Ib() & 3) * 4;
1466 unsigned dst_offset = ((i->Ib() >> 2) & 1) * 4;
1468 for (unsigned j=0; j < 8; j++)
1470 result.xmm16u(j) = 0;
1472 for (unsigned k=0; k < 4; k++) {
1473 Bit8u temp1 = op1.xmmubyte(j + k + dst_offset);
1474 Bit8u temp2 = op2.xmmubyte( k + src_offset);
1475 if (temp1 > temp2)
1476 result.xmm16u(j) += (temp1 - temp2);
1477 else
1478 result.xmm16u(j) += (temp2 - temp1);
1482 BX_WRITE_XMM_REG(i->nnn(), result);
1483 #else
1484 BX_INFO(("MPSADBW_VdqWdqIb: required SSE4, use --enable-sse option"));
1485 exception(BX_UD_EXCEPTION, 0, 0);
1486 #endif
1489 #endif // (BX_SUPPORT_SSE >= 4 || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
1491 /* 66 0F 60 */
1492 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKLBW_VdqWdq(bxInstruction_c *i)
1494 #if BX_SUPPORT_SSE >= 2
1495 BX_CPU_THIS_PTR prepareSSE();
1497 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1499 /* op2 is a register or memory reference */
1500 if (i->modC0()) {
1501 op2 = BX_READ_XMM_REG(i->rm());
1503 else {
1504 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1505 /* pointer, segment address pair */
1506 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1509 result.xmmubyte(0x0) = op1.xmmubyte(0);
1510 result.xmmubyte(0x1) = op2.xmmubyte(0);
1511 result.xmmubyte(0x2) = op1.xmmubyte(1);
1512 result.xmmubyte(0x3) = op2.xmmubyte(1);
1513 result.xmmubyte(0x4) = op1.xmmubyte(2);
1514 result.xmmubyte(0x5) = op2.xmmubyte(2);
1515 result.xmmubyte(0x6) = op1.xmmubyte(3);
1516 result.xmmubyte(0x7) = op2.xmmubyte(3);
1517 result.xmmubyte(0x8) = op1.xmmubyte(4);
1518 result.xmmubyte(0x9) = op2.xmmubyte(4);
1519 result.xmmubyte(0xA) = op1.xmmubyte(5);
1520 result.xmmubyte(0xB) = op2.xmmubyte(5);
1521 result.xmmubyte(0xC) = op1.xmmubyte(6);
1522 result.xmmubyte(0xD) = op2.xmmubyte(6);
1523 result.xmmubyte(0xE) = op1.xmmubyte(7);
1524 result.xmmubyte(0xF) = op2.xmmubyte(7);
1526 /* now write result back to destination */
1527 BX_WRITE_XMM_REG(i->nnn(), result);
1528 #else
1529 BX_INFO(("PUNPCKLBW_VdqWdq: required SSE2, use --enable-sse option"));
1530 exception(BX_UD_EXCEPTION, 0, 0);
1531 #endif
1534 /* 66 0F 61 */
1535 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKLWD_VdqWdq(bxInstruction_c *i)
1537 #if BX_SUPPORT_SSE >= 2
1538 BX_CPU_THIS_PTR prepareSSE();
1540 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1542 /* op2 is a register or memory reference */
1543 if (i->modC0()) {
1544 op2 = BX_READ_XMM_REG(i->rm());
1546 else {
1547 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1548 /* pointer, segment address pair */
1549 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1552 result.xmm16u(0) = op1.xmm16u(0);
1553 result.xmm16u(1) = op2.xmm16u(0);
1554 result.xmm16u(2) = op1.xmm16u(1);
1555 result.xmm16u(3) = op2.xmm16u(1);
1556 result.xmm16u(4) = op1.xmm16u(2);
1557 result.xmm16u(5) = op2.xmm16u(2);
1558 result.xmm16u(6) = op1.xmm16u(3);
1559 result.xmm16u(7) = op2.xmm16u(3);
1561 /* now write result back to destination */
1562 BX_WRITE_XMM_REG(i->nnn(), result);
1563 #else
1564 BX_INFO(("PUNPCKLWD_VdqWdq: required SSE2, use --enable-sse option"));
1565 exception(BX_UD_EXCEPTION, 0, 0);
1566 #endif
1569 /* UNPCKLPS: 0F 14 */
1570 /* PUNPCKLDQ: 66 0F 62 */
1571 void BX_CPP_AttrRegparmN(1) BX_CPU_C::UNPCKLPS_VpsWdq(bxInstruction_c *i)
1573 #if BX_SUPPORT_SSE >= 1
1574 BX_CPU_THIS_PTR prepareSSE();
1576 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1578 /* op2 is a register or memory reference */
1579 if (i->modC0()) {
1580 op2 = BX_READ_XMM_REG(i->rm());
1582 else {
1583 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1584 /* pointer, segment address pair */
1585 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1588 result.xmm32u(0) = op1.xmm32u(0);
1589 result.xmm32u(1) = op2.xmm32u(0);
1590 result.xmm32u(2) = op1.xmm32u(1);
1591 result.xmm32u(3) = op2.xmm32u(1);
1593 /* now write result back to destination */
1594 BX_WRITE_XMM_REG(i->nnn(), result);
1595 #else
1596 BX_INFO(("UNPCKLPS_VpsWdq: required SSE, use --enable-sse option"));
1597 exception(BX_UD_EXCEPTION, 0, 0);
1598 #endif
1601 /* 66 0F 63 */
1602 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKSSWB_VdqWdq(bxInstruction_c *i)
1604 #if BX_SUPPORT_SSE >= 2
1605 BX_CPU_THIS_PTR prepareSSE();
1607 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1609 /* op2 is a register or memory reference */
1610 if (i->modC0()) {
1611 op2 = BX_READ_XMM_REG(i->rm());
1613 else {
1614 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1615 /* pointer, segment address pair */
1616 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1619 result.xmmsbyte(0x0) = SaturateWordSToByteS(op1.xmm16s(0));
1620 result.xmmsbyte(0x1) = SaturateWordSToByteS(op1.xmm16s(1));
1621 result.xmmsbyte(0x2) = SaturateWordSToByteS(op1.xmm16s(2));
1622 result.xmmsbyte(0x3) = SaturateWordSToByteS(op1.xmm16s(3));
1623 result.xmmsbyte(0x4) = SaturateWordSToByteS(op1.xmm16s(4));
1624 result.xmmsbyte(0x5) = SaturateWordSToByteS(op1.xmm16s(5));
1625 result.xmmsbyte(0x6) = SaturateWordSToByteS(op1.xmm16s(6));
1626 result.xmmsbyte(0x7) = SaturateWordSToByteS(op1.xmm16s(7));
1628 result.xmmsbyte(0x8) = SaturateWordSToByteS(op2.xmm16s(0));
1629 result.xmmsbyte(0x9) = SaturateWordSToByteS(op2.xmm16s(1));
1630 result.xmmsbyte(0xA) = SaturateWordSToByteS(op2.xmm16s(2));
1631 result.xmmsbyte(0xB) = SaturateWordSToByteS(op2.xmm16s(3));
1632 result.xmmsbyte(0xC) = SaturateWordSToByteS(op2.xmm16s(4));
1633 result.xmmsbyte(0xD) = SaturateWordSToByteS(op2.xmm16s(5));
1634 result.xmmsbyte(0xE) = SaturateWordSToByteS(op2.xmm16s(6));
1635 result.xmmsbyte(0xF) = SaturateWordSToByteS(op2.xmm16s(7));
1637 /* now write result back to destination */
1638 BX_WRITE_XMM_REG(i->nnn(), result);
1639 #else
1640 BX_INFO(("PACKSSWB_VdqWdq: required SSE2, use --enable-sse option"));
1641 exception(BX_UD_EXCEPTION, 0, 0);
1642 #endif
1645 /* 66 0F 64 */
1646 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPGTB_VdqWdq(bxInstruction_c *i)
1648 #if BX_SUPPORT_SSE >= 2
1649 BX_CPU_THIS_PTR prepareSSE();
1651 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1653 /* op2 is a register or memory reference */
1654 if (i->modC0()) {
1655 op2 = BX_READ_XMM_REG(i->rm());
1657 else {
1658 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1659 /* pointer, segment address pair */
1660 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1663 for(unsigned j=0; j<16; j++) {
1664 op1.xmmubyte(j) = (op1.xmmsbyte(j) > op2.xmmsbyte(j)) ? 0xff : 0;
1667 /* now write result back to destination */
1668 BX_WRITE_XMM_REG(i->nnn(), op1);
1669 #else
1670 BX_INFO(("PCMPGTB_VdqWdq: required SSE2, use --enable-sse option"));
1671 exception(BX_UD_EXCEPTION, 0, 0);
1672 #endif
1675 /* 66 0F 65 */
1676 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPGTW_VdqWdq(bxInstruction_c *i)
1678 #if BX_SUPPORT_SSE >= 2
1679 BX_CPU_THIS_PTR prepareSSE();
1681 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1683 /* op2 is a register or memory reference */
1684 if (i->modC0()) {
1685 op2 = BX_READ_XMM_REG(i->rm());
1687 else {
1688 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1689 /* pointer, segment address pair */
1690 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1693 op1.xmm16u(0) = (op1.xmm16s(0) > op2.xmm16s(0)) ? 0xffff : 0;
1694 op1.xmm16u(1) = (op1.xmm16s(1) > op2.xmm16s(1)) ? 0xffff : 0;
1695 op1.xmm16u(2) = (op1.xmm16s(2) > op2.xmm16s(2)) ? 0xffff : 0;
1696 op1.xmm16u(3) = (op1.xmm16s(3) > op2.xmm16s(3)) ? 0xffff : 0;
1697 op1.xmm16u(4) = (op1.xmm16s(4) > op2.xmm16s(4)) ? 0xffff : 0;
1698 op1.xmm16u(5) = (op1.xmm16s(5) > op2.xmm16s(5)) ? 0xffff : 0;
1699 op1.xmm16u(6) = (op1.xmm16s(6) > op2.xmm16s(6)) ? 0xffff : 0;
1700 op1.xmm16u(7) = (op1.xmm16s(7) > op2.xmm16s(7)) ? 0xffff : 0;
1702 /* now write result back to destination */
1703 BX_WRITE_XMM_REG(i->nnn(), op1);
1704 #else
1705 BX_INFO(("PCMPGTW_VdqWdq: required SSE2, use --enable-sse option"));
1706 exception(BX_UD_EXCEPTION, 0, 0);
1707 #endif
1710 /* 66 0F 66 */
1711 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPGTD_VdqWdq(bxInstruction_c *i)
1713 #if BX_SUPPORT_SSE >= 2
1714 BX_CPU_THIS_PTR prepareSSE();
1716 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1718 /* op2 is a register or memory reference */
1719 if (i->modC0()) {
1720 op2 = BX_READ_XMM_REG(i->rm());
1722 else {
1723 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1724 /* pointer, segment address pair */
1725 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1728 op1.xmm32u(0) = (op1.xmm32s(0) > op2.xmm32s(0)) ? 0xffffffff : 0;
1729 op1.xmm32u(1) = (op1.xmm32s(1) > op2.xmm32s(1)) ? 0xffffffff : 0;
1730 op1.xmm32u(2) = (op1.xmm32s(2) > op2.xmm32s(2)) ? 0xffffffff : 0;
1731 op1.xmm32u(3) = (op1.xmm32s(3) > op2.xmm32s(3)) ? 0xffffffff : 0;
1733 /* now write result back to destination */
1734 BX_WRITE_XMM_REG(i->nnn(), op1);
1735 #else
1736 BX_INFO(("PCMPGTD_VdqWdq: required SSE2, use --enable-sse option"));
1737 exception(BX_UD_EXCEPTION, 0, 0);
1738 #endif
1741 /* 66 0F 67 */
1742 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKUSWB_VdqWdq(bxInstruction_c *i)
1744 #if BX_SUPPORT_SSE >= 2
1745 BX_CPU_THIS_PTR prepareSSE();
1747 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1749 /* op2 is a register or memory reference */
1750 if (i->modC0()) {
1751 op2 = BX_READ_XMM_REG(i->rm());
1753 else {
1754 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1755 /* pointer, segment address pair */
1756 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1759 result.xmmubyte(0x0) = SaturateWordSToByteU(op1.xmm16s(0));
1760 result.xmmubyte(0x1) = SaturateWordSToByteU(op1.xmm16s(1));
1761 result.xmmubyte(0x2) = SaturateWordSToByteU(op1.xmm16s(2));
1762 result.xmmubyte(0x3) = SaturateWordSToByteU(op1.xmm16s(3));
1763 result.xmmubyte(0x4) = SaturateWordSToByteU(op1.xmm16s(4));
1764 result.xmmubyte(0x5) = SaturateWordSToByteU(op1.xmm16s(5));
1765 result.xmmubyte(0x6) = SaturateWordSToByteU(op1.xmm16s(6));
1766 result.xmmubyte(0x7) = SaturateWordSToByteU(op1.xmm16s(7));
1768 result.xmmubyte(0x8) = SaturateWordSToByteU(op2.xmm16s(0));
1769 result.xmmubyte(0x9) = SaturateWordSToByteU(op2.xmm16s(1));
1770 result.xmmubyte(0xA) = SaturateWordSToByteU(op2.xmm16s(2));
1771 result.xmmubyte(0xB) = SaturateWordSToByteU(op2.xmm16s(3));
1772 result.xmmubyte(0xC) = SaturateWordSToByteU(op2.xmm16s(4));
1773 result.xmmubyte(0xD) = SaturateWordSToByteU(op2.xmm16s(5));
1774 result.xmmubyte(0xE) = SaturateWordSToByteU(op2.xmm16s(6));
1775 result.xmmubyte(0xF) = SaturateWordSToByteU(op2.xmm16s(7));
1777 /* now write result back to destination */
1778 BX_WRITE_XMM_REG(i->nnn(), result);
1779 #else
1780 BX_INFO(("PACKUSWB_VdqWdq: required SSE2, use --enable-sse option"));
1781 exception(BX_UD_EXCEPTION, 0, 0);
1782 #endif
1785 /* 66 0F 68 */
1786 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKHBW_VdqWdq(bxInstruction_c *i)
1788 #if BX_SUPPORT_SSE >= 2
1789 BX_CPU_THIS_PTR prepareSSE();
1791 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1793 /* op2 is a register or memory reference */
1794 if (i->modC0()) {
1795 op2 = BX_READ_XMM_REG(i->rm());
1797 else {
1798 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1799 /* pointer, segment address pair */
1800 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1803 result.xmmubyte(0x0) = op1.xmmubyte(0x8);
1804 result.xmmubyte(0x1) = op2.xmmubyte(0x8);
1805 result.xmmubyte(0x2) = op1.xmmubyte(0x9);
1806 result.xmmubyte(0x3) = op2.xmmubyte(0x9);
1807 result.xmmubyte(0x4) = op1.xmmubyte(0xA);
1808 result.xmmubyte(0x5) = op2.xmmubyte(0xA);
1809 result.xmmubyte(0x6) = op1.xmmubyte(0xB);
1810 result.xmmubyte(0x7) = op2.xmmubyte(0xB);
1811 result.xmmubyte(0x8) = op1.xmmubyte(0xC);
1812 result.xmmubyte(0x9) = op2.xmmubyte(0xC);
1813 result.xmmubyte(0xA) = op1.xmmubyte(0xD);
1814 result.xmmubyte(0xB) = op2.xmmubyte(0xD);
1815 result.xmmubyte(0xC) = op1.xmmubyte(0xE);
1816 result.xmmubyte(0xD) = op2.xmmubyte(0xE);
1817 result.xmmubyte(0xE) = op1.xmmubyte(0xF);
1818 result.xmmubyte(0xF) = op2.xmmubyte(0xF);
1820 /* now write result back to destination */
1821 BX_WRITE_XMM_REG(i->nnn(), result);
1822 #else
1823 BX_INFO(("PUNPCKHBW_VdqWdq: required SSE2, use --enable-sse option"));
1824 exception(BX_UD_EXCEPTION, 0, 0);
1825 #endif
1828 /* 66 0F 69 */
1829 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKHWD_VdqWdq(bxInstruction_c *i)
1831 #if BX_SUPPORT_SSE >= 2
1832 BX_CPU_THIS_PTR prepareSSE();
1834 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1836 /* op2 is a register or memory reference */
1837 if (i->modC0()) {
1838 op2 = BX_READ_XMM_REG(i->rm());
1840 else {
1841 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1842 /* pointer, segment address pair */
1843 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1846 result.xmm16u(0) = op1.xmm16u(4);
1847 result.xmm16u(1) = op2.xmm16u(4);
1848 result.xmm16u(2) = op1.xmm16u(5);
1849 result.xmm16u(3) = op2.xmm16u(5);
1850 result.xmm16u(4) = op1.xmm16u(6);
1851 result.xmm16u(5) = op2.xmm16u(6);
1852 result.xmm16u(6) = op1.xmm16u(7);
1853 result.xmm16u(7) = op2.xmm16u(7);
1855 /* now write result back to destination */
1856 BX_WRITE_XMM_REG(i->nnn(), result);
1857 #else
1858 BX_INFO(("PUNPCKHWD_VdqWdq: required SSE2, use --enable-sse option"));
1859 exception(BX_UD_EXCEPTION, 0, 0);
1860 #endif
1863 /* UNPCKHPS: 0F 15 */
1864 /* PUNPCKHDQ: 66 0F 6A */
1865 void BX_CPP_AttrRegparmN(1) BX_CPU_C::UNPCKHPS_VpsWdq(bxInstruction_c *i)
1867 #if BX_SUPPORT_SSE >= 1
1868 BX_CPU_THIS_PTR prepareSSE();
1870 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1872 /* op2 is a register or memory reference */
1873 if (i->modC0()) {
1874 op2 = BX_READ_XMM_REG(i->rm());
1876 else {
1877 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1878 /* pointer, segment address pair */
1879 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1882 result.xmm32u(0) = op1.xmm32u(2);
1883 result.xmm32u(1) = op2.xmm32u(2);
1884 result.xmm32u(2) = op1.xmm32u(3);
1885 result.xmm32u(3) = op2.xmm32u(3);
1887 /* now write result back to destination */
1888 BX_WRITE_XMM_REG(i->nnn(), result);
1889 #else
1890 BX_INFO(("UNPCKHPS_VpsWdq: required SSE, use --enable-sse option"));
1891 exception(BX_UD_EXCEPTION, 0, 0);
1892 #endif
1895 /* 66 0F 6B */
1896 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKSSDW_VdqWdq(bxInstruction_c *i)
1898 #if BX_SUPPORT_SSE >= 2
1899 BX_CPU_THIS_PTR prepareSSE();
1901 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1903 /* op2 is a register or memory reference */
1904 if (i->modC0()) {
1905 op2 = BX_READ_XMM_REG(i->rm());
1907 else {
1908 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1909 /* pointer, segment address pair */
1910 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1913 result.xmm16s(0) = SaturateDwordSToWordS(op1.xmm32s(0));
1914 result.xmm16s(1) = SaturateDwordSToWordS(op1.xmm32s(1));
1915 result.xmm16s(2) = SaturateDwordSToWordS(op1.xmm32s(2));
1916 result.xmm16s(3) = SaturateDwordSToWordS(op1.xmm32s(3));
1918 result.xmm16s(4) = SaturateDwordSToWordS(op2.xmm32s(0));
1919 result.xmm16s(5) = SaturateDwordSToWordS(op2.xmm32s(1));
1920 result.xmm16s(6) = SaturateDwordSToWordS(op2.xmm32s(2));
1921 result.xmm16s(7) = SaturateDwordSToWordS(op2.xmm32s(3));
1923 /* now write result back to destination */
1924 BX_WRITE_XMM_REG(i->nnn(), result);
1925 #else
1926 BX_INFO(("PACKSSDW_VdqWdq: required SSE2, use --enable-sse option"));
1927 exception(BX_UD_EXCEPTION, 0, 0);
1928 #endif
1931 /* UNPCKLPD: 66 0F 14 */
1932 /* PUNPCKLQDQ: 66 0F 6C */
1933 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKLQDQ_VdqWdq(bxInstruction_c *i)
1935 #if BX_SUPPORT_SSE >= 2
1936 BX_CPU_THIS_PTR prepareSSE();
1938 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1940 /* op2 is a register or memory reference */
1941 if (i->modC0()) {
1942 op2 = BX_READ_XMM_REG(i->rm());
1944 else {
1945 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1946 /* pointer, segment address pair */
1947 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1950 op1.xmm64u(1) = op2.xmm64u(0);
1952 /* now write result back to destination */
1953 BX_WRITE_XMM_REG(i->nnn(), op1);
1954 #else
1955 BX_INFO(("PUNPCKLQDQ_VdqWdq: required SSE2, use --enable-sse option"));
1956 exception(BX_UD_EXCEPTION, 0, 0);
1957 #endif
1960 /* UNPCKHPD: 66 0F 15 */
1961 /* PUNPCKHQDQ: 66 0F 6D */
1962 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKHQDQ_VdqWdq(bxInstruction_c *i)
1964 #if BX_SUPPORT_SSE >= 2
1965 BX_CPU_THIS_PTR prepareSSE();
1967 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1969 /* op2 is a register or memory reference */
1970 if (i->modC0()) {
1971 op2 = BX_READ_XMM_REG(i->rm());
1973 else {
1974 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1975 /* pointer, segment address pair */
1976 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1979 result.xmm64u(0) = op1.xmm64u(1);
1980 result.xmm64u(1) = op2.xmm64u(1);
1982 /* now write result back to destination */
1983 BX_WRITE_XMM_REG(i->nnn(), result);
1984 #else
1985 BX_INFO(("PUNPCKHQDQ_VdqWdq: required SSE2, use --enable-sse option"));
1986 exception(BX_UD_EXCEPTION, 0, 0);
1987 #endif
1990 /* 66 0F 70 */
1991 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSHUFD_VdqWdqIb(bxInstruction_c *i)
1993 #if BX_SUPPORT_SSE >= 2
1994 BX_CPU_THIS_PTR prepareSSE();
1996 BxPackedXmmRegister op, result;
1997 Bit8u order = i->Ib();
1999 /* op is a register or memory reference */
2000 if (i->modC0()) {
2001 op = BX_READ_XMM_REG(i->rm());
2003 else {
2004 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2005 /* pointer, segment address pair */
2006 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
2009 result.xmm32u(0) = op.xmm32u((order >> 0) & 0x3);
2010 result.xmm32u(1) = op.xmm32u((order >> 2) & 0x3);
2011 result.xmm32u(2) = op.xmm32u((order >> 4) & 0x3);
2012 result.xmm32u(3) = op.xmm32u((order >> 6) & 0x3);
2014 /* now write result back to destination */
2015 BX_WRITE_XMM_REG(i->nnn(), result);
2016 #else
2017 BX_INFO(("PSHUFD_VdqWdqIb: required SSE2, use --enable-sse option"));
2018 exception(BX_UD_EXCEPTION, 0, 0);
2019 #endif
2022 /* F2 0F 70 */
2023 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSHUFHW_VdqWdqIb(bxInstruction_c *i)
2025 #if BX_SUPPORT_SSE >= 2
2026 BX_CPU_THIS_PTR prepareSSE();
2028 BxPackedXmmRegister op, result;
2029 Bit8u order = i->Ib();
2031 /* op is a register or memory reference */
2032 if (i->modC0()) {
2033 op = BX_READ_XMM_REG(i->rm());
2035 else {
2036 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2037 /* pointer, segment address pair */
2038 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
2041 result.xmm64u(0) = op.xmm64u(0);
2042 result.xmm16u(4) = op.xmm16u(4 + ((order >> 0) & 0x3));
2043 result.xmm16u(5) = op.xmm16u(4 + ((order >> 2) & 0x3));
2044 result.xmm16u(6) = op.xmm16u(4 + ((order >> 4) & 0x3));
2045 result.xmm16u(7) = op.xmm16u(4 + ((order >> 6) & 0x3));
2047 /* now write result back to destination */
2048 BX_WRITE_XMM_REG(i->nnn(), result);
2049 #else
2050 BX_INFO(("PSHUFHW_VdqWdqIb: required SSE2, use --enable-sse option"));
2051 exception(BX_UD_EXCEPTION, 0, 0);
2052 #endif
2055 /* F3 0F 70 */
2056 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSHUFLW_VdqWdqIb(bxInstruction_c *i)
2058 #if BX_SUPPORT_SSE >= 2
2059 BX_CPU_THIS_PTR prepareSSE();
2061 BxPackedXmmRegister op, result;
2062 Bit8u order = i->Ib();
2064 /* op is a register or memory reference */
2065 if (i->modC0()) {
2066 op = BX_READ_XMM_REG(i->rm());
2068 else {
2069 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2070 /* pointer, segment address pair */
2071 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
2074 result.xmm16u(0) = op.xmm16u((order >> 0) & 0x3);
2075 result.xmm16u(1) = op.xmm16u((order >> 2) & 0x3);
2076 result.xmm16u(2) = op.xmm16u((order >> 4) & 0x3);
2077 result.xmm16u(3) = op.xmm16u((order >> 6) & 0x3);
2078 result.xmm64u(1) = op.xmm64u(1);
2080 /* now write result back to destination */
2081 BX_WRITE_XMM_REG(i->nnn(), result);
2082 #else
2083 BX_INFO(("PSHUFLW_VdqWdqIb: required SSE, use --enable-sse option"));
2084 exception(BX_UD_EXCEPTION, 0, 0);
2085 #endif
2088 /* 66 0F 74 */
2089 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPEQB_VdqWdq(bxInstruction_c *i)
2091 #if BX_SUPPORT_SSE >= 2
2092 BX_CPU_THIS_PTR prepareSSE();
2094 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2096 /* op2 is a register or memory reference */
2097 if (i->modC0()) {
2098 op2 = BX_READ_XMM_REG(i->rm());
2100 else {
2101 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2102 /* pointer, segment address pair */
2103 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2106 for(unsigned j=0; j<16; j++) {
2107 op1.xmmubyte(j) = (op1.xmmubyte(j) == op2.xmmubyte(j)) ? 0xff : 0;
2110 /* now write result back to destination */
2111 BX_WRITE_XMM_REG(i->nnn(), op1);
2112 #else
2113 BX_INFO(("PCMPEQB_VdqWdq: required SSE2, use --enable-sse option"));
2114 exception(BX_UD_EXCEPTION, 0, 0);
2115 #endif
2118 /* 66 0F 75 */
2119 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPEQW_VdqWdq(bxInstruction_c *i)
2121 #if BX_SUPPORT_SSE >= 2
2122 BX_CPU_THIS_PTR prepareSSE();
2124 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2126 /* op2 is a register or memory reference */
2127 if (i->modC0()) {
2128 op2 = BX_READ_XMM_REG(i->rm());
2130 else {
2131 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2132 /* pointer, segment address pair */
2133 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2136 op1.xmm16u(0) = (op1.xmm16u(0) == op2.xmm16u(0)) ? 0xffff : 0;
2137 op1.xmm16u(1) = (op1.xmm16u(1) == op2.xmm16u(1)) ? 0xffff : 0;
2138 op1.xmm16u(2) = (op1.xmm16u(2) == op2.xmm16u(2)) ? 0xffff : 0;
2139 op1.xmm16u(3) = (op1.xmm16u(3) == op2.xmm16u(3)) ? 0xffff : 0;
2140 op1.xmm16u(4) = (op1.xmm16u(4) == op2.xmm16u(4)) ? 0xffff : 0;
2141 op1.xmm16u(5) = (op1.xmm16u(5) == op2.xmm16u(5)) ? 0xffff : 0;
2142 op1.xmm16u(6) = (op1.xmm16u(6) == op2.xmm16u(6)) ? 0xffff : 0;
2143 op1.xmm16u(7) = (op1.xmm16u(7) == op2.xmm16u(7)) ? 0xffff : 0;
2145 /* now write result back to destination */
2146 BX_WRITE_XMM_REG(i->nnn(), op1);
2147 #else
2148 BX_INFO(("PCMPEQW_VdqWdq: required SSE2, use --enable-sse option"));
2149 exception(BX_UD_EXCEPTION, 0, 0);
2150 #endif
2153 /* 66 0F 76 */
2154 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPEQD_VdqWdq(bxInstruction_c *i)
2156 #if BX_SUPPORT_SSE >= 2
2157 BX_CPU_THIS_PTR prepareSSE();
2159 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2161 /* op2 is a register or memory reference */
2162 if (i->modC0()) {
2163 op2 = BX_READ_XMM_REG(i->rm());
2165 else {
2166 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2167 /* pointer, segment address pair */
2168 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2171 op1.xmm32u(0) = (op1.xmm32u(0) == op2.xmm32u(0)) ? 0xffffffff : 0;
2172 op1.xmm32u(1) = (op1.xmm32u(1) == op2.xmm32u(1)) ? 0xffffffff : 0;
2173 op1.xmm32u(2) = (op1.xmm32u(2) == op2.xmm32u(2)) ? 0xffffffff : 0;
2174 op1.xmm32u(3) = (op1.xmm32u(3) == op2.xmm32u(3)) ? 0xffffffff : 0;
2176 /* now write result back to destination */
2177 BX_WRITE_XMM_REG(i->nnn(), op1);
2178 #else
2179 BX_INFO(("PCMPEQD_VdqWdq: required SSE2, use --enable-sse option"));
2180 exception(BX_UD_EXCEPTION, 0, 0);
2181 #endif
2184 /* 66 0F C4 */
2185 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PINSRW_VdqEwIb(bxInstruction_c *i)
2187 #if BX_SUPPORT_SSE >= 2
2188 BX_CPU_THIS_PTR prepareSSE();
2190 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
2191 Bit16u op2;
2192 Bit8u count = i->Ib() & 0x7;
2194 /* op2 is a register or memory reference */
2195 if (i->modC0()) {
2196 op2 = BX_READ_16BIT_REG(i->rm());
2198 else {
2199 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2200 /* pointer, segment address pair */
2201 op2 = read_virtual_word(i->seg(), eaddr);
2204 op1.xmm16u(count) = op2;
2206 /* now write result back to destination */
2207 BX_WRITE_XMM_REG(i->nnn(), op1);
2208 #else
2209 BX_INFO(("PINSRW_VdqEdIb: required SSE2, use --enable-sse option"));
2210 exception(BX_UD_EXCEPTION, 0, 0);
2211 #endif
2214 /* 66 0F C5 */
2215 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_GdUdqIb(bxInstruction_c *i)
2217 #if BX_SUPPORT_SSE >= 2
2218 BX_CPU_THIS_PTR prepareSSE();
2220 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
2221 Bit8u count = i->Ib() & 0x7;
2222 Bit32u result = (Bit32u) op.xmm16u(count);
2224 BX_WRITE_32BIT_REGZ(i->nnn(), result);
2225 #else
2226 BX_INFO(("PEXTRW_GdUdqIb: required SSE2, use --enable-sse option"));
2227 exception(BX_UD_EXCEPTION, 0, 0);
2228 #endif
2231 /* 0F C6 */
2232 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SHUFPS_VpsWpsIb(bxInstruction_c *i)
2234 #if BX_SUPPORT_SSE >= 1
2235 BX_CPU_THIS_PTR prepareSSE();
2237 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2238 Bit8u order = i->Ib();
2240 /* op2 is a register or memory reference */
2241 if (i->modC0()) {
2242 op2 = BX_READ_XMM_REG(i->rm());
2244 else {
2245 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2246 /* pointer, segment address pair */
2247 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2250 result.xmm32u(0) = op1.xmm32u((order >> 0) & 0x3);
2251 result.xmm32u(1) = op1.xmm32u((order >> 2) & 0x3);
2252 result.xmm32u(2) = op2.xmm32u((order >> 4) & 0x3);
2253 result.xmm32u(3) = op2.xmm32u((order >> 6) & 0x3);
2255 /* now write result back to destination */
2256 BX_WRITE_XMM_REG(i->nnn(), result);
2257 #else
2258 BX_INFO(("SHUFPS_VpsWpsIb: required SSE, use --enable-sse option"));
2259 exception(BX_UD_EXCEPTION, 0, 0);
2260 #endif
2263 /* 66 0F C6 */
2264 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SHUFPD_VpdWpdIb(bxInstruction_c *i)
2266 #if BX_SUPPORT_SSE >= 2
2267 BX_CPU_THIS_PTR prepareSSE();
2269 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2270 Bit8u order = i->Ib();
2272 /* op2 is a register or memory reference */
2273 if (i->modC0()) {
2274 op2 = BX_READ_XMM_REG(i->rm());
2276 else {
2277 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2278 /* pointer, segment address pair */
2279 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2282 result.xmm64u(0) = op1.xmm64u((order >> 0) & 0x1);
2283 result.xmm64u(1) = op2.xmm64u((order >> 1) & 0x1);
2285 /* now write result back to destination */
2286 BX_WRITE_XMM_REG(i->nnn(), result);
2287 #else
2288 BX_INFO(("SHUFPD_VpdWpdIb: required SSE2, use --enable-sse option"));
2289 exception(BX_UD_EXCEPTION, 0, 0);
2290 #endif
2293 /* 66 0F D1 */
2294 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLW_VdqWdq(bxInstruction_c *i)
2296 #if BX_SUPPORT_SSE >= 2
2297 BX_CPU_THIS_PTR prepareSSE();
2299 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2301 /* op2 is a register or memory reference */
2302 if (i->modC0()) {
2303 op2 = BX_READ_XMM_REG(i->rm());
2305 else {
2306 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2307 /* pointer, segment address pair */
2308 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2311 if(op2.xmm64u(0) > 15) /* looking only to low 64 bits */
2313 op1.xmm64u(0) = 0;
2314 op1.xmm64u(1) = 0;
2316 else
2318 Bit8u shift = op2.xmmubyte(0);
2320 op1.xmm16u(0) >>= shift;
2321 op1.xmm16u(1) >>= shift;
2322 op1.xmm16u(2) >>= shift;
2323 op1.xmm16u(3) >>= shift;
2324 op1.xmm16u(4) >>= shift;
2325 op1.xmm16u(5) >>= shift;
2326 op1.xmm16u(6) >>= shift;
2327 op1.xmm16u(7) >>= shift;
2330 /* now write result back to destination */
2331 BX_WRITE_XMM_REG(i->nnn(), op1);
2332 #else
2333 BX_INFO(("PSRLW_VdqWdq: required SSE2, use --enable-sse option"));
2334 exception(BX_UD_EXCEPTION, 0, 0);
2335 #endif
2338 /* 66 0F D2 */
2339 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLD_VdqWdq(bxInstruction_c *i)
2341 #if BX_SUPPORT_SSE >= 2
2342 BX_CPU_THIS_PTR prepareSSE();
2344 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2346 /* op2 is a register or memory reference */
2347 if (i->modC0()) {
2348 op2 = BX_READ_XMM_REG(i->rm());
2350 else {
2351 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2352 /* pointer, segment address pair */
2353 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2356 if(op2.xmm64u(0) > 31) /* looking only to low 64 bits */
2358 op1.xmm64u(0) = 0;
2359 op1.xmm64u(1) = 0;
2361 else
2363 Bit8u shift = op2.xmmubyte(0);
2365 op1.xmm32u(0) >>= shift;
2366 op1.xmm32u(1) >>= shift;
2367 op1.xmm32u(2) >>= shift;
2368 op1.xmm32u(3) >>= shift;
2371 /* now write result back to destination */
2372 BX_WRITE_XMM_REG(i->nnn(), op1);
2373 #else
2374 BX_INFO(("PSRLD_VdqWdq: required SSE2, use --enable-sse option"));
2375 exception(BX_UD_EXCEPTION, 0, 0);
2376 #endif
2379 /* 66 0F D3 */
2380 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLQ_VdqWdq(bxInstruction_c *i)
2382 #if BX_SUPPORT_SSE >= 2
2383 BX_CPU_THIS_PTR prepareSSE();
2385 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2387 /* op2 is a register or memory reference */
2388 if (i->modC0()) {
2389 op2 = BX_READ_XMM_REG(i->rm());
2391 else {
2392 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2393 /* pointer, segment address pair */
2394 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2397 if(op2.xmm64u(0) > 63) /* looking only to low 64 bits */
2399 op1.xmm64u(0) = 0;
2400 op1.xmm64u(1) = 0;
2402 else
2404 Bit8u shift = op2.xmmubyte(0);
2406 op1.xmm64u(0) >>= shift;
2407 op1.xmm64u(1) >>= shift;
2410 /* now write result back to destination */
2411 BX_WRITE_XMM_REG(i->nnn(), op1);
2412 #else
2413 BX_INFO(("PSRLQ_VdqWdq: required SSE2, use --enable-sse option"));
2414 exception(BX_UD_EXCEPTION, 0, 0);
2415 #endif
2418 /* 66 0F D4 */
2419 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDQ_VdqWdq(bxInstruction_c *i)
2421 #if BX_SUPPORT_SSE >= 2
2422 BX_CPU_THIS_PTR prepareSSE();
2424 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2426 /* op2 is a register or memory reference */
2427 if (i->modC0()) {
2428 op2 = BX_READ_XMM_REG(i->rm());
2430 else {
2431 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2432 /* pointer, segment address pair */
2433 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2436 op1.xmm64u(0) += op2.xmm64u(0);
2437 op1.xmm64u(1) += op2.xmm64u(1);
2439 /* now write result back to destination */
2440 BX_WRITE_XMM_REG(i->nnn(), op1);
2441 #else
2442 BX_INFO(("PADDQ_VdqWdq: required SSE2, use --enable-sse option"));
2443 exception(BX_UD_EXCEPTION, 0, 0);
2444 #endif
2447 /* 66 0F D5 */
2448 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULLW_VdqWdq(bxInstruction_c *i)
2450 #if BX_SUPPORT_SSE >= 2
2451 BX_CPU_THIS_PTR prepareSSE();
2453 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2455 /* op2 is a register or memory reference */
2456 if (i->modC0()) {
2457 op2 = BX_READ_XMM_REG(i->rm());
2459 else {
2460 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2461 /* pointer, segment address pair */
2462 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2465 Bit32u product1 = Bit32u(op1.xmm16u(0)) * Bit32u(op2.xmm16u(0));
2466 Bit32u product2 = Bit32u(op1.xmm16u(1)) * Bit32u(op2.xmm16u(1));
2467 Bit32u product3 = Bit32u(op1.xmm16u(2)) * Bit32u(op2.xmm16u(2));
2468 Bit32u product4 = Bit32u(op1.xmm16u(3)) * Bit32u(op2.xmm16u(3));
2469 Bit32u product5 = Bit32u(op1.xmm16u(4)) * Bit32u(op2.xmm16u(4));
2470 Bit32u product6 = Bit32u(op1.xmm16u(5)) * Bit32u(op2.xmm16u(5));
2471 Bit32u product7 = Bit32u(op1.xmm16u(6)) * Bit32u(op2.xmm16u(6));
2472 Bit32u product8 = Bit32u(op1.xmm16u(7)) * Bit32u(op2.xmm16u(7));
2474 op1.xmm16u(0) = product1 & 0xffff;
2475 op1.xmm16u(1) = product2 & 0xffff;
2476 op1.xmm16u(2) = product3 & 0xffff;
2477 op1.xmm16u(3) = product4 & 0xffff;
2478 op1.xmm16u(4) = product5 & 0xffff;
2479 op1.xmm16u(5) = product6 & 0xffff;
2480 op1.xmm16u(6) = product7 & 0xffff;
2481 op1.xmm16u(7) = product8 & 0xffff;
2483 /* now write result back to destination */
2484 BX_WRITE_XMM_REG(i->nnn(), op1);
2485 #else
2486 BX_INFO(("PMULLW_VdqWdq: required SSE2, use --enable-sse option"));
2487 exception(BX_UD_EXCEPTION, 0, 0);
2488 #endif
2491 /* 66 0F D8 */
2492 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBUSB_VdqWdq(bxInstruction_c *i)
2494 #if BX_SUPPORT_SSE >= 2
2495 BX_CPU_THIS_PTR prepareSSE();
2497 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2499 /* op2 is a register or memory reference */
2500 if (i->modC0()) {
2501 op2 = BX_READ_XMM_REG(i->rm());
2503 else {
2504 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2505 /* pointer, segment address pair */
2506 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2509 result.xmm64u(0) = result.xmm64u(1) = 0;
2511 for(unsigned j=0; j<16; j++)
2513 if(op1.xmmubyte(j) > op2.xmmubyte(j))
2515 result.xmmubyte(j) = op1.xmmubyte(j) - op2.xmmubyte(j);
2519 /* now write result back to destination */
2520 BX_WRITE_XMM_REG(i->nnn(), result);
2521 #else
2522 BX_INFO(("PSUBUSB_VdqWdq: required SSE2, use --enable-sse option"));
2523 exception(BX_UD_EXCEPTION, 0, 0);
2524 #endif
2527 /* 66 0F D9 */
2528 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBUSW_VdqWdq(bxInstruction_c *i)
2530 #if BX_SUPPORT_SSE >= 2
2531 BX_CPU_THIS_PTR prepareSSE();
2533 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2535 /* op2 is a register or memory reference */
2536 if (i->modC0()) {
2537 op2 = BX_READ_XMM_REG(i->rm());
2539 else {
2540 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2541 /* pointer, segment address pair */
2542 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2545 result.xmm64u(0) = result.xmm64u(1) = 0;
2547 for(unsigned j=0; j<8; j++)
2549 if(op1.xmm16u(j) > op2.xmm16u(j))
2551 result.xmm16u(j) = op1.xmm16u(j) - op2.xmm16u(j);
2555 /* now write result back to destination */
2556 BX_WRITE_XMM_REG(i->nnn(), result);
2557 #else
2558 BX_INFO(("PSUBUSW_VdqWdq: required SSE2, use --enable-sse option"));
2559 exception(BX_UD_EXCEPTION, 0, 0);
2560 #endif
2563 /* 66 0F DA */
2564 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINUB_VdqWdq(bxInstruction_c *i)
2566 #if BX_SUPPORT_SSE >= 2
2567 BX_CPU_THIS_PTR prepareSSE();
2569 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2571 /* op2 is a register or memory reference */
2572 if (i->modC0()) {
2573 op2 = BX_READ_XMM_REG(i->rm());
2575 else {
2576 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2577 /* pointer, segment address pair */
2578 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2581 for(unsigned j=0; j<16; j++) {
2582 if(op2.xmmubyte(j) < op1.xmmubyte(j)) op1.xmmubyte(j) = op2.xmmubyte(j);
2585 /* now write result back to destination */
2586 BX_WRITE_XMM_REG(i->nnn(), op1);
2587 #else
2588 BX_INFO(("PMINUB_VdqWdq: required SSE2, use --enable-sse option"));
2589 exception(BX_UD_EXCEPTION, 0, 0);
2590 #endif
2593 /* ANDPS: 0F 54 */
2594 /* ANDPD: 66 0F 54 */
2595 /* PAND: 66 0F DB */
2596 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ANDPS_VpsWps(bxInstruction_c *i)
2598 #if BX_SUPPORT_SSE >= 1
2599 BX_CPU_THIS_PTR prepareSSE();
2601 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2603 /* op2 is a register or memory reference */
2604 if (i->modC0()) {
2605 op2 = BX_READ_XMM_REG(i->rm());
2607 else {
2608 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2609 /* pointer, segment address pair */
2610 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2613 op1.xmm64u(0) &= op2.xmm64u(0);
2614 op1.xmm64u(1) &= op2.xmm64u(1);
2616 /* now write result back to destination */
2617 BX_WRITE_XMM_REG(i->nnn(), op1);
2618 #else
2619 BX_INFO(("ANDPS_VpsWps: required SSE, use --enable-sse option"));
2620 exception(BX_UD_EXCEPTION, 0, 0);
2621 #endif
2624 /* 66 0F DC */
2625 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDUSB_VdqWdq(bxInstruction_c *i)
2627 #if BX_SUPPORT_SSE >= 2
2628 BX_CPU_THIS_PTR prepareSSE();
2630 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2632 /* op2 is a register or memory reference */
2633 if (i->modC0()) {
2634 op2 = BX_READ_XMM_REG(i->rm());
2636 else {
2637 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2638 /* pointer, segment address pair */
2639 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2642 for(unsigned j=0; j<16; j++) {
2643 op1.xmmubyte(j) = SaturateWordSToByteU(Bit16s(op1.xmmubyte(j)) + Bit16s(op2.xmmubyte(j)));
2646 /* now write result back to destination */
2647 BX_WRITE_XMM_REG(i->nnn(), op1);
2648 #else
2649 BX_INFO(("PADDUSB_VdqWdq: required SSE2, use --enable-sse option"));
2650 exception(BX_UD_EXCEPTION, 0, 0);
2651 #endif
2654 /* 66 0F DD */
2655 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDUSW_VdqWdq(bxInstruction_c *i)
2657 #if BX_SUPPORT_SSE >= 2
2658 BX_CPU_THIS_PTR prepareSSE();
2660 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2662 /* op2 is a register or memory reference */
2663 if (i->modC0()) {
2664 op2 = BX_READ_XMM_REG(i->rm());
2666 else {
2667 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2668 /* pointer, segment address pair */
2669 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2672 op1.xmm16u(0) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(0)) + Bit32s(op2.xmm16u(0)));
2673 op1.xmm16u(1) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(1)) + Bit32s(op2.xmm16u(1)));
2674 op1.xmm16u(2) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(2)) + Bit32s(op2.xmm16u(2)));
2675 op1.xmm16u(3) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(3)) + Bit32s(op2.xmm16u(3)));
2676 op1.xmm16u(4) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(4)) + Bit32s(op2.xmm16u(4)));
2677 op1.xmm16u(5) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(5)) + Bit32s(op2.xmm16u(5)));
2678 op1.xmm16u(6) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(6)) + Bit32s(op2.xmm16u(6)));
2679 op1.xmm16u(7) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(7)) + Bit32s(op2.xmm16u(7)));
2681 /* now write result back to destination */
2682 BX_WRITE_XMM_REG(i->nnn(), op1);
2683 #else
2684 BX_INFO(("PADDUSW_VdqWdq: required SSE2, use --enable-sse option"));
2685 exception(BX_UD_EXCEPTION, 0, 0);
2686 #endif
2689 /* 66 0F DE */
2690 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXUB_VdqWdq(bxInstruction_c *i)
2692 #if BX_SUPPORT_SSE >= 2
2693 BX_CPU_THIS_PTR prepareSSE();
2695 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2697 /* op2 is a register or memory reference */
2698 if (i->modC0()) {
2699 op2 = BX_READ_XMM_REG(i->rm());
2701 else {
2702 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2703 /* pointer, segment address pair */
2704 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2707 for(unsigned j=0; j<16; j++) {
2708 if(op2.xmmubyte(j) > op1.xmmubyte(j)) op1.xmmubyte(j) = op2.xmmubyte(j);
2711 /* now write result back to destination */
2712 BX_WRITE_XMM_REG(i->nnn(), op1);
2713 #else
2714 BX_INFO(("PMAXUB_VdqWdq: required SSE2, use --enable-sse option"));
2715 exception(BX_UD_EXCEPTION, 0, 0);
2716 #endif
2719 /* ANDNPS: 0F 55 */
2720 /* ANDNPD: 66 0F 55 */
2721 /* PANDN: 66 0F DF */
2722 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ANDNPS_VpsWps(bxInstruction_c *i)
2724 #if BX_SUPPORT_SSE >= 1
2725 BX_CPU_THIS_PTR prepareSSE();
2727 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2729 /* op2 is a register or memory reference */
2730 if (i->modC0()) {
2731 op2 = BX_READ_XMM_REG(i->rm());
2733 else {
2734 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2735 /* pointer, segment address pair */
2736 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2739 op1.xmm64u(0) = ~(op1.xmm64u(0)) & op2.xmm64u(0);
2740 op1.xmm64u(1) = ~(op1.xmm64u(1)) & op2.xmm64u(1);
2742 /* now write result back to destination */
2743 BX_WRITE_XMM_REG(i->nnn(), op1);
2744 #else
2745 BX_INFO(("ANDNPS_VpsWps: required SSE, use --enable-sse option"));
2746 exception(BX_UD_EXCEPTION, 0, 0);
2747 #endif
2750 /* 66 0F E0 */
2751 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PAVGB_VdqWdq(bxInstruction_c *i)
2753 #if BX_SUPPORT_SSE
2754 BX_CPU_THIS_PTR prepareSSE();
2756 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2758 /* op2 is a register or memory reference */
2759 if (i->modC0()) {
2760 op2 = BX_READ_XMM_REG(i->rm());
2762 else {
2763 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2764 /* pointer, segment address pair */
2765 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2768 for(unsigned j=0; j<16; j++) {
2769 op1.xmmubyte(j) = (op1.xmmubyte(j) + op2.xmmubyte(j) + 1) >> 1;
2772 /* now write result back to destination */
2773 BX_WRITE_XMM_REG(i->nnn(), op1);
2774 #else
2775 BX_INFO(("PAVGB_VdqWdq: required SSE, use --enable-sse option"));
2776 exception(BX_UD_EXCEPTION, 0, 0);
2777 #endif
2780 /* 66 0F E1 */
2781 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAW_VdqWdq(bxInstruction_c *i)
2783 #if BX_SUPPORT_SSE >= 2
2784 BX_CPU_THIS_PTR prepareSSE();
2786 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2788 /* op2 is a register or memory reference */
2789 if (i->modC0()) {
2790 op2 = BX_READ_XMM_REG(i->rm());
2792 else {
2793 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2794 /* pointer, segment address pair */
2795 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2798 if(op2.xmm64u(0) == 0) return;
2800 if(op2.xmm64u(0) > 15) /* looking only to low 64 bits */
2802 result.xmm16u(0) = (op1.xmm16u(0) & 0x8000) ? 0xffff : 0;
2803 result.xmm16u(1) = (op1.xmm16u(1) & 0x8000) ? 0xffff : 0;
2804 result.xmm16u(2) = (op1.xmm16u(2) & 0x8000) ? 0xffff : 0;
2805 result.xmm16u(3) = (op1.xmm16u(3) & 0x8000) ? 0xffff : 0;
2806 result.xmm16u(4) = (op1.xmm16u(4) & 0x8000) ? 0xffff : 0;
2807 result.xmm16u(5) = (op1.xmm16u(5) & 0x8000) ? 0xffff : 0;
2808 result.xmm16u(6) = (op1.xmm16u(6) & 0x8000) ? 0xffff : 0;
2809 result.xmm16u(7) = (op1.xmm16u(7) & 0x8000) ? 0xffff : 0;
2811 else
2813 Bit8u shift = op2.xmmubyte(0);
2815 result.xmm16u(0) = op1.xmm16u(0) >> shift;
2816 result.xmm16u(1) = op1.xmm16u(1) >> shift;
2817 result.xmm16u(2) = op1.xmm16u(2) >> shift;
2818 result.xmm16u(3) = op1.xmm16u(3) >> shift;
2819 result.xmm16u(4) = op1.xmm16u(4) >> shift;
2820 result.xmm16u(5) = op1.xmm16u(5) >> shift;
2821 result.xmm16u(6) = op1.xmm16u(6) >> shift;
2822 result.xmm16u(7) = op1.xmm16u(7) >> shift;
2824 if(op1.xmm16u(0) & 0x8000) result.xmm16u(0) |= (0xffff << (16 - shift));
2825 if(op1.xmm16u(1) & 0x8000) result.xmm16u(1) |= (0xffff << (16 - shift));
2826 if(op1.xmm16u(2) & 0x8000) result.xmm16u(2) |= (0xffff << (16 - shift));
2827 if(op1.xmm16u(3) & 0x8000) result.xmm16u(3) |= (0xffff << (16 - shift));
2828 if(op1.xmm16u(4) & 0x8000) result.xmm16u(4) |= (0xffff << (16 - shift));
2829 if(op1.xmm16u(5) & 0x8000) result.xmm16u(5) |= (0xffff << (16 - shift));
2830 if(op1.xmm16u(6) & 0x8000) result.xmm16u(6) |= (0xffff << (16 - shift));
2831 if(op1.xmm16u(7) & 0x8000) result.xmm16u(7) |= (0xffff << (16 - shift));
2834 /* now write result back to destination */
2835 BX_WRITE_XMM_REG(i->nnn(), result);
2836 #else
2837 BX_INFO(("PSRAW_VdqWdq: required SSE2, use --enable-sse option"));
2838 exception(BX_UD_EXCEPTION, 0, 0);
2839 #endif
2842 /* 66 0F E2 */
2843 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAD_VdqWdq(bxInstruction_c *i)
2845 #if BX_SUPPORT_SSE >= 2
2846 BX_CPU_THIS_PTR prepareSSE();
2848 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2850 /* op2 is a register or memory reference */
2851 if (i->modC0()) {
2852 op2 = BX_READ_XMM_REG(i->rm());
2854 else {
2855 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2856 /* pointer, segment address pair */
2857 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2860 if(op2.xmm64u(0) == 0) return;
2862 if(op2.xmm64u(0) > 31) /* looking only to low 64 bits */
2864 result.xmm32u(0) = (op1.xmm32u(0) & 0x80000000) ? 0xffffffff : 0;
2865 result.xmm32u(1) = (op1.xmm32u(1) & 0x80000000) ? 0xffffffff : 0;
2866 result.xmm32u(2) = (op1.xmm32u(2) & 0x80000000) ? 0xffffffff : 0;
2867 result.xmm32u(3) = (op1.xmm32u(3) & 0x80000000) ? 0xffffffff : 0;
2869 else
2871 Bit8u shift = op2.xmmubyte(0);
2873 result.xmm32u(0) = op1.xmm32u(0) >> shift;
2874 result.xmm32u(1) = op1.xmm32u(1) >> shift;
2875 result.xmm32u(2) = op1.xmm32u(2) >> shift;
2876 result.xmm32u(3) = op1.xmm32u(3) >> shift;
2878 if(op1.xmm32u(0) & 0x80000000) result.xmm32u(0) |= (0xffffffff << (32-shift));
2879 if(op1.xmm32u(1) & 0x80000000) result.xmm32u(1) |= (0xffffffff << (32-shift));
2880 if(op1.xmm32u(2) & 0x80000000) result.xmm32u(2) |= (0xffffffff << (32-shift));
2881 if(op1.xmm32u(3) & 0x80000000) result.xmm32u(3) |= (0xffffffff << (32-shift));
2884 /* now write result back to destination */
2885 BX_WRITE_XMM_REG(i->nnn(), result);
2886 #else
2887 BX_INFO(("PSRAD_VdqWdq: required SSE2, use --enable-sse option"));
2888 exception(BX_UD_EXCEPTION, 0, 0);
2889 #endif
2892 /* 66 0F E3 */
2893 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PAVGW_VdqWdq(bxInstruction_c *i)
2895 #if BX_SUPPORT_SSE
2896 BX_CPU_THIS_PTR prepareSSE();
2898 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2900 /* op2 is a register or memory reference */
2901 if (i->modC0()) {
2902 op2 = BX_READ_XMM_REG(i->rm());
2904 else {
2905 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2906 /* pointer, segment address pair */
2907 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2910 op1.xmm16u(0) = (op1.xmm16u(0) + op2.xmm16u(0) + 1) >> 1;
2911 op1.xmm16u(1) = (op1.xmm16u(1) + op2.xmm16u(1) + 1) >> 1;
2912 op1.xmm16u(2) = (op1.xmm16u(2) + op2.xmm16u(2) + 1) >> 1;
2913 op1.xmm16u(3) = (op1.xmm16u(3) + op2.xmm16u(3) + 1) >> 1;
2914 op1.xmm16u(4) = (op1.xmm16u(4) + op2.xmm16u(4) + 1) >> 1;
2915 op1.xmm16u(5) = (op1.xmm16u(5) + op2.xmm16u(5) + 1) >> 1;
2916 op1.xmm16u(6) = (op1.xmm16u(6) + op2.xmm16u(6) + 1) >> 1;
2917 op1.xmm16u(7) = (op1.xmm16u(7) + op2.xmm16u(7) + 1) >> 1;
2919 /* now write result back to destination */
2920 BX_WRITE_XMM_REG(i->nnn(), op1);
2921 #else
2922 BX_INFO(("PAVGW_VdqWdq: required SSE, use --enable-sse option"));
2923 exception(BX_UD_EXCEPTION, 0, 0);
2924 #endif
2927 /* 66 0F E4 */
2928 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULHUW_VdqWdq(bxInstruction_c *i)
2930 #if BX_SUPPORT_SSE >= 2
2931 BX_CPU_THIS_PTR prepareSSE();
2933 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2935 /* op2 is a register or memory reference */
2936 if (i->modC0()) {
2937 op2 = BX_READ_XMM_REG(i->rm());
2939 else {
2940 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2941 /* pointer, segment address pair */
2942 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2945 Bit32u product1 = Bit32u(op1.xmm16u(0)) * Bit32u(op2.xmm16u(0));
2946 Bit32u product2 = Bit32u(op1.xmm16u(1)) * Bit32u(op2.xmm16u(1));
2947 Bit32u product3 = Bit32u(op1.xmm16u(2)) * Bit32u(op2.xmm16u(2));
2948 Bit32u product4 = Bit32u(op1.xmm16u(3)) * Bit32u(op2.xmm16u(3));
2949 Bit32u product5 = Bit32u(op1.xmm16u(4)) * Bit32u(op2.xmm16u(4));
2950 Bit32u product6 = Bit32u(op1.xmm16u(5)) * Bit32u(op2.xmm16u(5));
2951 Bit32u product7 = Bit32u(op1.xmm16u(6)) * Bit32u(op2.xmm16u(6));
2952 Bit32u product8 = Bit32u(op1.xmm16u(7)) * Bit32u(op2.xmm16u(7));
2954 op1.xmm16u(0) = (Bit16u)(product1 >> 16);
2955 op1.xmm16u(1) = (Bit16u)(product2 >> 16);
2956 op1.xmm16u(2) = (Bit16u)(product3 >> 16);
2957 op1.xmm16u(3) = (Bit16u)(product4 >> 16);
2958 op1.xmm16u(4) = (Bit16u)(product5 >> 16);
2959 op1.xmm16u(5) = (Bit16u)(product6 >> 16);
2960 op1.xmm16u(6) = (Bit16u)(product7 >> 16);
2961 op1.xmm16u(7) = (Bit16u)(product8 >> 16);
2963 /* now write result back to destination */
2964 BX_WRITE_XMM_REG(i->nnn(), op1);
2965 #else
2966 BX_INFO(("PMULHUW_VdqWdq: required SSE2, use --enable-sse option"));
2967 exception(BX_UD_EXCEPTION, 0, 0);
2968 #endif
2971 /* 66 0F E5 */
2972 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULHW_VdqWdq(bxInstruction_c *i)
2974 #if BX_SUPPORT_SSE >= 2
2975 BX_CPU_THIS_PTR prepareSSE();
2977 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2979 /* op2 is a register or memory reference */
2980 if (i->modC0()) {
2981 op2 = BX_READ_XMM_REG(i->rm());
2983 else {
2984 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2985 /* pointer, segment address pair */
2986 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2989 Bit32s product1 = Bit32s(op1.xmm16s(0)) * Bit32s(op2.xmm16s(0));
2990 Bit32s product2 = Bit32s(op1.xmm16s(1)) * Bit32s(op2.xmm16s(1));
2991 Bit32s product3 = Bit32s(op1.xmm16s(2)) * Bit32s(op2.xmm16s(2));
2992 Bit32s product4 = Bit32s(op1.xmm16s(3)) * Bit32s(op2.xmm16s(3));
2993 Bit32s product5 = Bit32s(op1.xmm16s(4)) * Bit32s(op2.xmm16s(4));
2994 Bit32s product6 = Bit32s(op1.xmm16s(5)) * Bit32s(op2.xmm16s(5));
2995 Bit32s product7 = Bit32s(op1.xmm16s(6)) * Bit32s(op2.xmm16s(6));
2996 Bit32s product8 = Bit32s(op1.xmm16s(7)) * Bit32s(op2.xmm16s(7));
2998 op1.xmm16u(0) = (Bit16u)(product1 >> 16);
2999 op1.xmm16u(1) = (Bit16u)(product2 >> 16);
3000 op1.xmm16u(2) = (Bit16u)(product3 >> 16);
3001 op1.xmm16u(3) = (Bit16u)(product4 >> 16);
3002 op1.xmm16u(4) = (Bit16u)(product5 >> 16);
3003 op1.xmm16u(5) = (Bit16u)(product6 >> 16);
3004 op1.xmm16u(6) = (Bit16u)(product7 >> 16);
3005 op1.xmm16u(7) = (Bit16u)(product8 >> 16);
3007 /* now write result back to destination */
3008 BX_WRITE_XMM_REG(i->nnn(), op1);
3009 #else
3010 BX_INFO(("PMULHW_VdqWdq: required SSE2, use --enable-sse option"));
3011 exception(BX_UD_EXCEPTION, 0, 0);
3012 #endif
3015 /* 66 0F E8 */
3016 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBSB_VdqWdq(bxInstruction_c *i)
3018 #if BX_SUPPORT_SSE >= 2
3019 BX_CPU_THIS_PTR prepareSSE();
3021 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3023 /* op2 is a register or memory reference */
3024 if (i->modC0()) {
3025 op2 = BX_READ_XMM_REG(i->rm());
3027 else {
3028 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3029 /* pointer, segment address pair */
3030 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3033 for(unsigned j=0; j<16; j++) {
3034 op1.xmmsbyte(j) = SaturateWordSToByteS(Bit16s(op1.xmmsbyte(j)) - Bit16s(op2.xmmsbyte(j)));
3037 /* now write result back to destination */
3038 BX_WRITE_XMM_REG(i->nnn(), op1);
3039 #else
3040 BX_INFO(("PSUBSB_VdqWdq: required SSE2, use --enable-sse option"));
3041 exception(BX_UD_EXCEPTION, 0, 0);
3042 #endif
3045 /* 66 0F E9 */
3046 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBSW_VdqWdq(bxInstruction_c *i)
3048 #if BX_SUPPORT_SSE >= 2
3049 BX_CPU_THIS_PTR prepareSSE();
3051 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3053 /* op2 is a register or memory reference */
3054 if (i->modC0()) {
3055 op2 = BX_READ_XMM_REG(i->rm());
3057 else {
3058 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3059 /* pointer, segment address pair */
3060 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3063 op1.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) - Bit32s(op2.xmm16s(0)));
3064 op1.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(1)) - Bit32s(op2.xmm16s(1)));
3065 op1.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) - Bit32s(op2.xmm16s(2)));
3066 op1.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(3)) - Bit32s(op2.xmm16s(3)));
3067 op1.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) - Bit32s(op2.xmm16s(4)));
3068 op1.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(5)) - Bit32s(op2.xmm16s(5)));
3069 op1.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) - Bit32s(op2.xmm16s(6)));
3070 op1.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(7)) - Bit32s(op2.xmm16s(7)));
3072 /* now write result back to destination */
3073 BX_WRITE_XMM_REG(i->nnn(), op1);
3074 #else
3075 BX_INFO(("PSUBSW_VdqWdq: required SSE2, use --enable-sse option"));
3076 exception(BX_UD_EXCEPTION, 0, 0);
3077 #endif
3080 /* 66 0F EA */
3081 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINSW_VdqWdq(bxInstruction_c *i)
3083 #if BX_SUPPORT_SSE >= 2
3084 BX_CPU_THIS_PTR prepareSSE();
3086 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3088 /* op2 is a register or memory reference */
3089 if (i->modC0()) {
3090 op2 = BX_READ_XMM_REG(i->rm());
3092 else {
3093 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3094 /* pointer, segment address pair */
3095 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3098 if(op2.xmm16s(0) < op1.xmm16s(0)) op1.xmm16s(0) = op2.xmm16s(0);
3099 if(op2.xmm16s(1) < op1.xmm16s(1)) op1.xmm16s(1) = op2.xmm16s(1);
3100 if(op2.xmm16s(2) < op1.xmm16s(2)) op1.xmm16s(2) = op2.xmm16s(2);
3101 if(op2.xmm16s(3) < op1.xmm16s(3)) op1.xmm16s(3) = op2.xmm16s(3);
3102 if(op2.xmm16s(4) < op1.xmm16s(4)) op1.xmm16s(4) = op2.xmm16s(4);
3103 if(op2.xmm16s(5) < op1.xmm16s(5)) op1.xmm16s(5) = op2.xmm16s(5);
3104 if(op2.xmm16s(6) < op1.xmm16s(6)) op1.xmm16s(6) = op2.xmm16s(6);
3105 if(op2.xmm16s(7) < op1.xmm16s(7)) op1.xmm16s(7) = op2.xmm16s(7);
3107 /* now write result back to destination */
3108 BX_WRITE_XMM_REG(i->nnn(), op1);
3109 #else
3110 BX_INFO(("PMINSW_VdqWdq: required SSE2, use --enable-sse option"));
3111 exception(BX_UD_EXCEPTION, 0, 0);
3112 #endif
3115 /* ORPS: 0F 56 */
3116 /* ORPD: 66 0F 56 */
3117 /* POR: 66 0F EB */
3118 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ORPS_VpsWps(bxInstruction_c *i)
3120 #if BX_SUPPORT_SSE >= 1
3121 BX_CPU_THIS_PTR prepareSSE();
3123 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3125 /* op2 is a register or memory reference */
3126 if (i->modC0()) {
3127 op2 = BX_READ_XMM_REG(i->rm());
3129 else {
3130 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3131 /* pointer, segment address pair */
3132 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3135 op1.xmm64u(0) |= op2.xmm64u(0);
3136 op1.xmm64u(1) |= op2.xmm64u(1);
3138 /* now write result back to destination */
3139 BX_WRITE_XMM_REG(i->nnn(), op1);
3140 #else
3141 BX_INFO(("ORPS_VpsWps: required SSE, use --enable-sse option"));
3142 exception(BX_UD_EXCEPTION, 0, 0);
3143 #endif
3146 /* 66 0F EC */
3147 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDSB_VdqWdq(bxInstruction_c *i)
3149 #if BX_SUPPORT_SSE >= 2
3150 BX_CPU_THIS_PTR prepareSSE();
3152 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3154 /* op2 is a register or memory reference */
3155 if (i->modC0()) {
3156 op2 = BX_READ_XMM_REG(i->rm());
3158 else {
3159 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3160 /* pointer, segment address pair */
3161 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3164 for(unsigned j=0; j<16; j++) {
3165 op1.xmmsbyte(j) = SaturateWordSToByteS(Bit16s(op1.xmmsbyte(j)) + Bit16s(op2.xmmsbyte(j)));
3168 /* now write result back to destination */
3169 BX_WRITE_XMM_REG(i->nnn(), op1);
3170 #else
3171 BX_INFO(("PADDSB_VdqWdq: required SSE2, use --enable-sse option"));
3172 exception(BX_UD_EXCEPTION, 0, 0);
3173 #endif
3176 /* 66 0F ED */
3177 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDSW_VdqWdq(bxInstruction_c *i)
3179 #if BX_SUPPORT_SSE >= 2
3180 BX_CPU_THIS_PTR prepareSSE();
3182 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3184 /* op2 is a register or memory reference */
3185 if (i->modC0()) {
3186 op2 = BX_READ_XMM_REG(i->rm());
3188 else {
3189 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3190 /* pointer, segment address pair */
3191 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3194 op1.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) + Bit32s(op2.xmm16s(0)));
3195 op1.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(1)) + Bit32s(op2.xmm16s(1)));
3196 op1.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) + Bit32s(op2.xmm16s(2)));
3197 op1.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(3)) + Bit32s(op2.xmm16s(3)));
3198 op1.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) + Bit32s(op2.xmm16s(4)));
3199 op1.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(5)) + Bit32s(op2.xmm16s(5)));
3200 op1.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) + Bit32s(op2.xmm16s(6)));
3201 op1.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(7)) + Bit32s(op2.xmm16s(7)));
3203 /* now write result back to destination */
3204 BX_WRITE_XMM_REG(i->nnn(), op1);
3205 #else
3206 BX_INFO(("PADDSW_VdqWdq: required SSE2, use --enable-sse option"));
3207 exception(BX_UD_EXCEPTION, 0, 0);
3208 #endif
3211 /* 66 0F EE */
3212 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXSW_VdqWdq(bxInstruction_c *i)
3214 #if BX_SUPPORT_SSE >= 2
3215 BX_CPU_THIS_PTR prepareSSE();
3217 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3219 /* op2 is a register or memory reference */
3220 if (i->modC0()) {
3221 op2 = BX_READ_XMM_REG(i->rm());
3223 else {
3224 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3225 /* pointer, segment address pair */
3226 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3229 if(op2.xmm16s(0) > op1.xmm16s(0)) op1.xmm16s(0) = op2.xmm16s(0);
3230 if(op2.xmm16s(1) > op1.xmm16s(1)) op1.xmm16s(1) = op2.xmm16s(1);
3231 if(op2.xmm16s(2) > op1.xmm16s(2)) op1.xmm16s(2) = op2.xmm16s(2);
3232 if(op2.xmm16s(3) > op1.xmm16s(3)) op1.xmm16s(3) = op2.xmm16s(3);
3233 if(op2.xmm16s(4) > op1.xmm16s(4)) op1.xmm16s(4) = op2.xmm16s(4);
3234 if(op2.xmm16s(5) > op1.xmm16s(5)) op1.xmm16s(5) = op2.xmm16s(5);
3235 if(op2.xmm16s(6) > op1.xmm16s(6)) op1.xmm16s(6) = op2.xmm16s(6);
3236 if(op2.xmm16s(7) > op1.xmm16s(7)) op1.xmm16s(7) = op2.xmm16s(7);
3238 /* now write result back to destination */
3239 BX_WRITE_XMM_REG(i->nnn(), op1);
3240 #else
3241 BX_INFO(("PMAXSW_VdqWdq: required SSE2, use --enable-sse option"));
3242 exception(BX_UD_EXCEPTION, 0, 0);
3243 #endif
3246 /* XORPS: 0F 57 */
3247 /* XORPD: 66 0F 57 */
3248 /* PXOR: 66 0F EF */
3249 void BX_CPP_AttrRegparmN(1) BX_CPU_C::XORPS_VpsWps(bxInstruction_c *i)
3251 #if BX_SUPPORT_SSE >= 1
3252 BX_CPU_THIS_PTR prepareSSE();
3254 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3256 /* op2 is a register or memory reference */
3257 if (i->modC0()) {
3258 op2 = BX_READ_XMM_REG(i->rm());
3260 else {
3261 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3262 /* pointer, segment address pair */
3263 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3266 op1.xmm64u(0) ^= op2.xmm64u(0);
3267 op1.xmm64u(1) ^= op2.xmm64u(1);
3269 /* now write result back to destination */
3270 BX_WRITE_XMM_REG(i->nnn(), op1);
3271 #else
3272 BX_INFO(("XORPS_VpsWps: required SSE, use --enable-sse option"));
3273 exception(BX_UD_EXCEPTION, 0, 0);
3274 #endif
3277 /* 66 0F F1 */
3278 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLW_VdqWdq(bxInstruction_c *i)
3280 #if BX_SUPPORT_SSE >= 2
3281 BX_CPU_THIS_PTR prepareSSE();
3283 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3285 /* op2 is a register or memory reference */
3286 if (i->modC0()) {
3287 op2 = BX_READ_XMM_REG(i->rm());
3289 else {
3290 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3291 /* pointer, segment address pair */
3292 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3295 if(op2.xmm64u(0) > 15) /* looking only to low 64 bits */
3297 op1.xmm64u(0) = 0;
3298 op1.xmm64u(1) = 0;
3300 else
3302 Bit8u shift = op2.xmmubyte(0);
3304 op1.xmm16u(0) <<= shift;
3305 op1.xmm16u(1) <<= shift;
3306 op1.xmm16u(2) <<= shift;
3307 op1.xmm16u(3) <<= shift;
3308 op1.xmm16u(4) <<= shift;
3309 op1.xmm16u(5) <<= shift;
3310 op1.xmm16u(6) <<= shift;
3311 op1.xmm16u(7) <<= shift;
3314 /* now write result back to destination */
3315 BX_WRITE_XMM_REG(i->nnn(), op1);
3316 #else
3317 BX_INFO(("PSLLW_VdqWdq: required SSE2, use --enable-sse option"));
3318 exception(BX_UD_EXCEPTION, 0, 0);
3319 #endif
3322 /* 66 0F F2 */
3323 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLD_VdqWdq(bxInstruction_c *i)
3325 #if BX_SUPPORT_SSE >= 2
3326 BX_CPU_THIS_PTR prepareSSE();
3328 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3330 /* op2 is a register or memory reference */
3331 if (i->modC0()) {
3332 op2 = BX_READ_XMM_REG(i->rm());
3334 else {
3335 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3336 /* pointer, segment address pair */
3337 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3340 if(op2.xmm64u(0) > 31) /* looking only to low 64 bits */
3342 op1.xmm64u(0) = 0;
3343 op1.xmm64u(1) = 0;
3345 else
3347 Bit8u shift = op2.xmmubyte(0);
3349 op1.xmm32u(0) <<= shift;
3350 op1.xmm32u(1) <<= shift;
3351 op1.xmm32u(2) <<= shift;
3352 op1.xmm32u(3) <<= shift;
3355 /* now write result back to destination */
3356 BX_WRITE_XMM_REG(i->nnn(), op1);
3357 #else
3358 BX_INFO(("PSLLD_VdqWdq: required SSE2, use --enable-sse option"));
3359 exception(BX_UD_EXCEPTION, 0, 0);
3360 #endif
3363 /* 66 0F F3 */
3364 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLQ_VdqWdq(bxInstruction_c *i)
3366 #if BX_SUPPORT_SSE >= 2
3367 BX_CPU_THIS_PTR prepareSSE();
3369 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3371 /* op2 is a register or memory reference */
3372 if (i->modC0()) {
3373 op2 = BX_READ_XMM_REG(i->rm());
3375 else {
3376 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3377 /* pointer, segment address pair */
3378 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3381 if(op2.xmm64u(0) > 63) /* looking only to low 64 bits */
3383 op1.xmm64u(0) = 0;
3384 op1.xmm64u(1) = 0;
3386 else
3388 Bit8u shift = op2.xmmubyte(0);
3390 op1.xmm64u(0) <<= shift;
3391 op1.xmm64u(1) <<= shift;
3394 /* now write result back to destination */
3395 BX_WRITE_XMM_REG(i->nnn(), op1);
3396 #else
3397 BX_INFO(("PSLLQ_VdqWdq: required SSE2, use --enable-sse option"));
3398 exception(BX_UD_EXCEPTION, 0, 0);
3399 #endif
3402 /* 66 0F F4 */
3403 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULUDQ_VdqWdq(bxInstruction_c *i)
3405 #if BX_SUPPORT_SSE >= 2
3406 BX_CPU_THIS_PTR prepareSSE();
3408 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
3410 /* op2 is a register or memory reference */
3411 if (i->modC0()) {
3412 op2 = BX_READ_XMM_REG(i->rm());
3414 else {
3415 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3416 /* pointer, segment address pair */
3417 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3420 result.xmm64u(0) = Bit64u(op1.xmm32u(0)) * Bit64u(op2.xmm32u(0));
3421 result.xmm64u(1) = Bit64u(op1.xmm32u(2)) * Bit64u(op2.xmm32u(2));
3423 /* now write result back to destination */
3424 BX_WRITE_XMM_REG(i->nnn(), result);
3425 #else
3426 BX_INFO(("PMULUDQ_VdqWdq: required SSE2, use --enable-sse option"));
3427 exception(BX_UD_EXCEPTION, 0, 0);
3428 #endif
3431 /* 66 0F F5 */
3432 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMADDWD_VdqWdq(bxInstruction_c *i)
3434 #if BX_SUPPORT_SSE >= 2
3435 BX_CPU_THIS_PTR prepareSSE();
3437 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
3439 /* op2 is a register or memory reference */
3440 if (i->modC0()) {
3441 op2 = BX_READ_XMM_REG(i->rm());
3443 else {
3444 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3445 /* pointer, segment address pair */
3446 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3449 for(unsigned j=0; j<4; j++)
3451 if(op1.xmm32u(j) == 0x80008000 && op2.xmm32u(j) == 0x80008000) {
3452 result.xmm32u(j) = 0x80000000;
3454 else {
3455 result.xmm32u(j) =
3456 Bit32s(op1.xmm16s(2*j+0)) * Bit32s(op2.xmm16s(2*j+0)) +
3457 Bit32s(op1.xmm16s(2*j+1)) * Bit32s(op2.xmm16s(2*j+1));
3461 /* now write result back to destination */
3462 BX_WRITE_XMM_REG(i->nnn(), result);
3463 #else
3464 BX_INFO(("PMADDWD_VdqWdq: required SSE2, use --enable-sse option"));
3465 exception(BX_UD_EXCEPTION, 0, 0);
3466 #endif
3469 /* 66 0F F6 */
3470 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSADBW_VdqWdq(bxInstruction_c *i)
3472 #if BX_SUPPORT_SSE >= 2
3473 BX_CPU_THIS_PTR prepareSSE();
3475 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3476 Bit16u temp1 = 0, temp2 = 0;
3478 /* op2 is a register or memory reference */
3479 if (i->modC0()) {
3480 op2 = BX_READ_XMM_REG(i->rm());
3482 else {
3483 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3484 /* pointer, segment address pair */
3485 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3488 temp1 += abs(op1.xmmubyte(0x0) - op2.xmmubyte(0x0));
3489 temp1 += abs(op1.xmmubyte(0x1) - op2.xmmubyte(0x1));
3490 temp1 += abs(op1.xmmubyte(0x2) - op2.xmmubyte(0x2));
3491 temp1 += abs(op1.xmmubyte(0x3) - op2.xmmubyte(0x3));
3492 temp1 += abs(op1.xmmubyte(0x4) - op2.xmmubyte(0x4));
3493 temp1 += abs(op1.xmmubyte(0x5) - op2.xmmubyte(0x5));
3494 temp1 += abs(op1.xmmubyte(0x6) - op2.xmmubyte(0x6));
3495 temp1 += abs(op1.xmmubyte(0x7) - op2.xmmubyte(0x7));
3497 temp2 += abs(op1.xmmubyte(0x8) - op2.xmmubyte(0x8));
3498 temp2 += abs(op1.xmmubyte(0x9) - op2.xmmubyte(0x9));
3499 temp2 += abs(op1.xmmubyte(0xA) - op2.xmmubyte(0xA));
3500 temp2 += abs(op1.xmmubyte(0xB) - op2.xmmubyte(0xB));
3501 temp2 += abs(op1.xmmubyte(0xC) - op2.xmmubyte(0xC));
3502 temp2 += abs(op1.xmmubyte(0xD) - op2.xmmubyte(0xD));
3503 temp2 += abs(op1.xmmubyte(0xE) - op2.xmmubyte(0xE));
3504 temp2 += abs(op1.xmmubyte(0xF) - op2.xmmubyte(0xF));
3506 op1.xmm64u(0) = Bit64u(temp1);
3507 op1.xmm64u(1) = Bit64u(temp2);
3509 /* now write result back to destination */
3510 BX_WRITE_XMM_REG(i->nnn(), op1);
3511 #else
3512 BX_INFO(("PSADBW_VdqWdq: required SSE2, use --enable-sse option"));
3513 exception(BX_UD_EXCEPTION, 0, 0);
3514 #endif
3517 /* 66 0F F8 */
3518 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBB_VdqWdq(bxInstruction_c *i)
3520 #if BX_SUPPORT_SSE >= 2
3521 BX_CPU_THIS_PTR prepareSSE();
3523 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3525 /* op2 is a register or memory reference */
3526 if (i->modC0()) {
3527 op2 = BX_READ_XMM_REG(i->rm());
3529 else {
3530 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3531 /* pointer, segment address pair */
3532 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3535 for(unsigned j=0; j<16; j++) {
3536 op1.xmmubyte(j) -= op2.xmmubyte(j);
3539 /* now write result back to destination */
3540 BX_WRITE_XMM_REG(i->nnn(), op1);
3541 #else
3542 BX_INFO(("PSUBB_VdqWdq: required SSE2, use --enable-sse option"));
3543 exception(BX_UD_EXCEPTION, 0, 0);
3544 #endif
3547 /* 66 0F F9 */
3548 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBW_VdqWdq(bxInstruction_c *i)
3550 #if BX_SUPPORT_SSE >= 2
3551 BX_CPU_THIS_PTR prepareSSE();
3553 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3555 /* op2 is a register or memory reference */
3556 if (i->modC0()) {
3557 op2 = BX_READ_XMM_REG(i->rm());
3559 else {
3560 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3561 /* pointer, segment address pair */
3562 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3565 op1.xmm16u(0) -= op2.xmm16u(0);
3566 op1.xmm16u(1) -= op2.xmm16u(1);
3567 op1.xmm16u(2) -= op2.xmm16u(2);
3568 op1.xmm16u(3) -= op2.xmm16u(3);
3569 op1.xmm16u(4) -= op2.xmm16u(4);
3570 op1.xmm16u(5) -= op2.xmm16u(5);
3571 op1.xmm16u(6) -= op2.xmm16u(6);
3572 op1.xmm16u(7) -= op2.xmm16u(7);
3574 /* now write result back to destination */
3575 BX_WRITE_XMM_REG(i->nnn(), op1);
3576 #else
3577 BX_INFO(("PSUBW_VdqWdq: required SSE2, use --enable-sse option"));
3578 exception(BX_UD_EXCEPTION, 0, 0);
3579 #endif
3582 /* 66 0F FA */
3583 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBD_VdqWdq(bxInstruction_c *i)
3585 #if BX_SUPPORT_SSE >= 2
3586 BX_CPU_THIS_PTR prepareSSE();
3588 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3590 /* op2 is a register or memory reference */
3591 if (i->modC0()) {
3592 op2 = BX_READ_XMM_REG(i->rm());
3594 else {
3595 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3596 /* pointer, segment address pair */
3597 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3600 op1.xmm32u(0) -= op2.xmm32u(0);
3601 op1.xmm32u(1) -= op2.xmm32u(1);
3602 op1.xmm32u(2) -= op2.xmm32u(2);
3603 op1.xmm32u(3) -= op2.xmm32u(3);
3605 /* now write result back to destination */
3606 BX_WRITE_XMM_REG(i->nnn(), op1);
3607 #else
3608 BX_INFO(("PSUBD_VdqWdq: required SSE2, use --enable-sse option"));
3609 exception(BX_UD_EXCEPTION, 0, 0);
3610 #endif
3613 /* 66 0F FB */
3614 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBQ_VdqWdq(bxInstruction_c *i)
3616 #if BX_SUPPORT_SSE >= 2
3617 BX_CPU_THIS_PTR prepareSSE();
3619 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3621 /* op2 is a register or memory reference */
3622 if (i->modC0()) {
3623 op2 = BX_READ_XMM_REG(i->rm());
3625 else {
3626 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3627 /* pointer, segment address pair */
3628 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3631 op1.xmm64u(0) -= op2.xmm64u(0);
3632 op1.xmm64u(1) -= op2.xmm64u(1);
3634 /* now write result back to destination */
3635 BX_WRITE_XMM_REG(i->nnn(), op1);
3636 #else
3637 BX_INFO(("PSUBQ_VdqWdq: required SSE2, use --enable-sse option"));
3638 exception(BX_UD_EXCEPTION, 0, 0);
3639 #endif
3642 /* 66 0F FC */
3643 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDB_VdqWdq(bxInstruction_c *i)
3645 #if BX_SUPPORT_SSE >= 2
3646 BX_CPU_THIS_PTR prepareSSE();
3648 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3650 /* op2 is a register or memory reference */
3651 if (i->modC0()) {
3652 op2 = BX_READ_XMM_REG(i->rm());
3654 else {
3655 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3656 /* pointer, segment address pair */
3657 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3660 for(unsigned j=0; j<16; j++) {
3661 op1.xmmubyte(j) += op2.xmmubyte(j);
3664 /* now write result back to destination */
3665 BX_WRITE_XMM_REG(i->nnn(), op1);
3666 #else
3667 BX_INFO(("PADDB_VdqWdq: required SSE2, use --enable-sse option"));
3668 exception(BX_UD_EXCEPTION, 0, 0);
3669 #endif
3672 /* 66 0F FD */
3673 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDW_VdqWdq(bxInstruction_c *i)
3675 #if BX_SUPPORT_SSE >= 2
3676 BX_CPU_THIS_PTR prepareSSE();
3678 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3680 /* op2 is a register or memory reference */
3681 if (i->modC0()) {
3682 op2 = BX_READ_XMM_REG(i->rm());
3684 else {
3685 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3686 /* pointer, segment address pair */
3687 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3690 op1.xmm16u(0) += op2.xmm16u(0);
3691 op1.xmm16u(1) += op2.xmm16u(1);
3692 op1.xmm16u(2) += op2.xmm16u(2);
3693 op1.xmm16u(3) += op2.xmm16u(3);
3694 op1.xmm16u(4) += op2.xmm16u(4);
3695 op1.xmm16u(5) += op2.xmm16u(5);
3696 op1.xmm16u(6) += op2.xmm16u(6);
3697 op1.xmm16u(7) += op2.xmm16u(7);
3699 /* now write result back to destination */
3700 BX_WRITE_XMM_REG(i->nnn(), op1);
3701 #else
3702 BX_INFO(("PADDW_VdqWdq: required SSE2, use --enable-sse option"));
3703 exception(BX_UD_EXCEPTION, 0, 0);
3704 #endif
3707 /* 66 0F FE */
3708 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDD_VdqWdq(bxInstruction_c *i)
3710 #if BX_SUPPORT_SSE >= 2
3711 BX_CPU_THIS_PTR prepareSSE();
3713 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3715 /* op2 is a register or memory reference */
3716 if (i->modC0()) {
3717 op2 = BX_READ_XMM_REG(i->rm());
3719 else {
3720 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3721 /* pointer, segment address pair */
3722 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3725 op1.xmm32u(0) += op2.xmm32u(0);
3726 op1.xmm32u(1) += op2.xmm32u(1);
3727 op1.xmm32u(2) += op2.xmm32u(2);
3728 op1.xmm32u(3) += op2.xmm32u(3);
3730 /* now write result back to destination */
3731 BX_WRITE_XMM_REG(i->nnn(), op1);
3732 #else
3733 BX_INFO(("PADDD_VdqWdq: required SSE2, use --enable-sse option"));
3734 exception(BX_UD_EXCEPTION, 0, 0);
3735 #endif
3738 /* 66 0F 71 Grp12 010 */
3739 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLW_UdqIb(bxInstruction_c *i)
3741 #if BX_SUPPORT_SSE >= 2
3742 BX_CPU_THIS_PTR prepareSSE();
3744 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
3745 Bit8u shift = i->Ib();
3747 if(shift > 15) {
3748 op.xmm64u(0) = 0;
3749 op.xmm64u(1) = 0;
3751 else {
3752 op.xmm16u(0) >>= shift;
3753 op.xmm16u(1) >>= shift;
3754 op.xmm16u(2) >>= shift;
3755 op.xmm16u(3) >>= shift;
3756 op.xmm16u(4) >>= shift;
3757 op.xmm16u(5) >>= shift;
3758 op.xmm16u(6) >>= shift;
3759 op.xmm16u(7) >>= shift;
3762 /* now write result back to destination */
3763 BX_WRITE_XMM_REG(i->rm(), op);
3764 #else
3765 BX_INFO(("PSRLW_UdqIb: required SSE2, use --enable-sse option"));
3766 exception(BX_UD_EXCEPTION, 0, 0);
3767 #endif
3770 /* 0F 71 Grp12 100 */
3771 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAW_UdqIb(bxInstruction_c *i)
3773 #if BX_SUPPORT_SSE >= 2
3774 BX_CPU_THIS_PTR prepareSSE();
3776 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm()), result;
3777 Bit8u shift = i->Ib();
3779 if(shift == 0) return;
3781 if(shift > 15) {
3782 result.xmm16u(0) = (op.xmm16u(0) & 0x8000) ? 0xffff : 0;
3783 result.xmm16u(1) = (op.xmm16u(1) & 0x8000) ? 0xffff : 0;
3784 result.xmm16u(2) = (op.xmm16u(2) & 0x8000) ? 0xffff : 0;
3785 result.xmm16u(3) = (op.xmm16u(3) & 0x8000) ? 0xffff : 0;
3786 result.xmm16u(4) = (op.xmm16u(4) & 0x8000) ? 0xffff : 0;
3787 result.xmm16u(5) = (op.xmm16u(5) & 0x8000) ? 0xffff : 0;
3788 result.xmm16u(6) = (op.xmm16u(6) & 0x8000) ? 0xffff : 0;
3789 result.xmm16u(7) = (op.xmm16u(7) & 0x8000) ? 0xffff : 0;
3791 else {
3792 result.xmm16u(0) = op.xmm16u(0) >> shift;
3793 result.xmm16u(1) = op.xmm16u(1) >> shift;
3794 result.xmm16u(2) = op.xmm16u(2) >> shift;
3795 result.xmm16u(3) = op.xmm16u(3) >> shift;
3796 result.xmm16u(4) = op.xmm16u(4) >> shift;
3797 result.xmm16u(5) = op.xmm16u(5) >> shift;
3798 result.xmm16u(6) = op.xmm16u(6) >> shift;
3799 result.xmm16u(7) = op.xmm16u(7) >> shift;
3801 if(op.xmm16u(0) & 0x8000) result.xmm16u(0) |= (0xffff << (16 - shift));
3802 if(op.xmm16u(1) & 0x8000) result.xmm16u(1) |= (0xffff << (16 - shift));
3803 if(op.xmm16u(2) & 0x8000) result.xmm16u(2) |= (0xffff << (16 - shift));
3804 if(op.xmm16u(3) & 0x8000) result.xmm16u(3) |= (0xffff << (16 - shift));
3805 if(op.xmm16u(4) & 0x8000) result.xmm16u(4) |= (0xffff << (16 - shift));
3806 if(op.xmm16u(5) & 0x8000) result.xmm16u(5) |= (0xffff << (16 - shift));
3807 if(op.xmm16u(6) & 0x8000) result.xmm16u(6) |= (0xffff << (16 - shift));
3808 if(op.xmm16u(7) & 0x8000) result.xmm16u(7) |= (0xffff << (16 - shift));
3811 /* now write result back to destination */
3812 BX_WRITE_XMM_REG(i->rm(), result);
3813 #else
3814 BX_INFO(("PSRAW_UdqIb: required SSE2, use --enable-sse option"));
3815 exception(BX_UD_EXCEPTION, 0, 0);
3816 #endif
3819 /* 66 0F 71 Grp12 110 */
3820 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLW_UdqIb(bxInstruction_c *i)
3822 #if BX_SUPPORT_SSE >= 2
3823 BX_CPU_THIS_PTR prepareSSE();
3825 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
3826 Bit8u shift = i->Ib();
3828 if(shift > 15) {
3829 op.xmm64u(0) = 0;
3830 op.xmm64u(1) = 0;
3832 else {
3833 op.xmm16u(0) <<= shift;
3834 op.xmm16u(1) <<= shift;
3835 op.xmm16u(2) <<= shift;
3836 op.xmm16u(3) <<= shift;
3837 op.xmm16u(4) <<= shift;
3838 op.xmm16u(5) <<= shift;
3839 op.xmm16u(6) <<= shift;
3840 op.xmm16u(7) <<= shift;
3843 /* now write result back to destination */
3844 BX_WRITE_XMM_REG(i->rm(), op);
3845 #else
3846 BX_INFO(("PSLLW_UdqIb: required SSE2, use --enable-sse option"));
3847 exception(BX_UD_EXCEPTION, 0, 0);
3848 #endif
3851 /* 66 0F 72 Grp13 010 */
3852 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLD_UdqIb(bxInstruction_c *i)
3854 #if BX_SUPPORT_SSE >= 2
3855 BX_CPU_THIS_PTR prepareSSE();
3857 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
3858 Bit8u shift = i->Ib();
3860 if(shift > 31) {
3861 op.xmm64u(0) = 0;
3862 op.xmm64u(1) = 0;
3864 else {
3865 op.xmm32u(0) >>= shift;
3866 op.xmm32u(1) >>= shift;
3867 op.xmm32u(2) >>= shift;
3868 op.xmm32u(3) >>= shift;
3871 /* now write result back to destination */
3872 BX_WRITE_XMM_REG(i->rm(), op);
3873 #else
3874 BX_INFO(("PSRLD_UdqIb: required SSE2, use --enable-sse option"));
3875 exception(BX_UD_EXCEPTION, 0, 0);
3876 #endif
3879 /* 0F 72 Grp13 100 */
3880 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAD_UdqIb(bxInstruction_c *i)
3882 #if BX_SUPPORT_SSE >= 2
3883 BX_CPU_THIS_PTR prepareSSE();
3885 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm()), result;
3886 Bit8u shift = i->Ib();
3888 if(shift == 0) return;
3890 if(shift > 31) {
3891 result.xmm32u(0) = (op.xmm32u(0) & 0x80000000) ? 0xffffffff : 0;
3892 result.xmm32u(1) = (op.xmm32u(1) & 0x80000000) ? 0xffffffff : 0;
3893 result.xmm32u(2) = (op.xmm32u(2) & 0x80000000) ? 0xffffffff : 0;
3894 result.xmm32u(3) = (op.xmm32u(3) & 0x80000000) ? 0xffffffff : 0;
3896 else {
3897 result.xmm32u(0) = op.xmm32u(0) >> shift;
3898 result.xmm32u(1) = op.xmm32u(1) >> shift;
3899 result.xmm32u(2) = op.xmm32u(2) >> shift;
3900 result.xmm32u(3) = op.xmm32u(3) >> shift;
3902 if(op.xmm32u(0) & 0x80000000) result.xmm32u(0) |= (0xffffffff << (32-shift));
3903 if(op.xmm32u(1) & 0x80000000) result.xmm32u(1) |= (0xffffffff << (32-shift));
3904 if(op.xmm32u(2) & 0x80000000) result.xmm32u(2) |= (0xffffffff << (32-shift));
3905 if(op.xmm32u(3) & 0x80000000) result.xmm32u(3) |= (0xffffffff << (32-shift));
3908 /* now write result back to destination */
3909 BX_WRITE_XMM_REG(i->rm(), result);
3910 #else
3911 BX_INFO(("PSRAD_UdqIb: required SSE2, use --enable-sse option"));
3912 exception(BX_UD_EXCEPTION, 0, 0);
3913 #endif
3916 /* 66 0F 72 Grp13 110 */
3917 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLD_UdqIb(bxInstruction_c *i)
3919 #if BX_SUPPORT_SSE >= 2
3920 BX_CPU_THIS_PTR prepareSSE();
3922 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
3923 Bit8u shift = i->Ib();
3925 if(shift > 31) {
3926 op.xmm64u(0) = 0;
3927 op.xmm64u(1) = 0;
3929 else {
3930 op.xmm32u(0) <<= shift;
3931 op.xmm32u(1) <<= shift;
3932 op.xmm32u(2) <<= shift;
3933 op.xmm32u(3) <<= shift;
3936 /* now write result back to destination */
3937 BX_WRITE_XMM_REG(i->rm(), op);
3938 #else
3939 BX_INFO(("PSLLD_UdqIb: required SSE2, use --enable-sse option"));
3940 exception(BX_UD_EXCEPTION, 0, 0);
3941 #endif
3944 /* 66 0F 73 Grp14 010 */
3945 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLQ_UdqIb(bxInstruction_c *i)
3947 #if BX_SUPPORT_SSE >= 2
3948 BX_CPU_THIS_PTR prepareSSE();
3950 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
3951 Bit8u shift = i->Ib();
3953 if(shift > 63) {
3954 op.xmm64u(0) = 0;
3955 op.xmm64u(1) = 0;
3957 else {
3958 op.xmm64u(0) >>= shift;
3959 op.xmm64u(1) >>= shift;
3962 /* now write result back to destination */
3963 BX_WRITE_XMM_REG(i->rm(), op);
3964 #else
3965 BX_INFO(("PSRLQ_UdqIb: required SSE2, use --enable-sse option"));
3966 exception(BX_UD_EXCEPTION, 0, 0);
3967 #endif
3970 /* 66 0F 73 Grp14 011 */
3971 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLDQ_UdqIb(bxInstruction_c *i)
3973 #if BX_SUPPORT_SSE >= 2
3974 BX_CPU_THIS_PTR prepareSSE();
3976 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm()), result;
3977 Bit8u shift = i->Ib();
3979 result.xmm64u(0) = result.xmm64u(1) = 0;
3981 for(unsigned j=shift; j<16; j++) {
3982 result.xmmubyte(j-shift) = op.xmmubyte(j);
3985 /* now write result back to destination */
3986 BX_WRITE_XMM_REG(i->rm(), result);
3987 #else
3988 BX_INFO(("PSRLDQ_UdqIb: required SSE2, use --enable-sse option"));
3989 exception(BX_UD_EXCEPTION, 0, 0);
3990 #endif
3993 /* 66 0F 73 Grp14 110 */
3994 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLQ_UdqIb(bxInstruction_c *i)
3996 #if BX_SUPPORT_SSE >= 2
3997 BX_CPU_THIS_PTR prepareSSE();
3999 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
4000 Bit8u shift = i->Ib();
4002 if(shift > 63) {
4003 op.xmm64u(0) = 0;
4004 op.xmm64u(1) = 0;
4006 else {
4007 op.xmm64u(0) <<= shift;
4008 op.xmm64u(1) <<= shift;
4011 /* now write result back to destination */
4012 BX_WRITE_XMM_REG(i->rm(), op);
4013 #else
4014 BX_INFO(("PSLLQ_UdqIb: required SSE2, use --enable-sse option"));
4015 exception(BX_UD_EXCEPTION, 0, 0);
4016 #endif
4019 /* 66 0F 73 Grp14 111 */
4020 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSLLDQ_UdqIb(bxInstruction_c *i)
4022 #if BX_SUPPORT_SSE >= 2
4023 BX_CPU_THIS_PTR prepareSSE();
4025 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm()), result;
4026 Bit8u shift = i->Ib();
4028 result.xmm64u(0) = result.xmm64u(1) = 0;
4030 for(unsigned j=shift; j<16; j++) {
4031 result.xmmubyte(j) = op.xmmubyte(j-shift);
4034 /* now write result back to destination */
4035 BX_WRITE_XMM_REG(i->rm(), result);
4036 #else
4037 BX_INFO(("PSLLDQ_UdqIb: required SSE2, use --enable-sse option"));
4038 exception(BX_UD_EXCEPTION, 0, 0);
4039 #endif