- compare disk size with the size calculated from geometry to avoid image
[bochs-mirror.git] / cpu / sse.cc
bloba66efe75b8871c697c3235c18b951c8afcfea031
1 /////////////////////////////////////////////////////////////////////////
2 // $Id: sse.cc,v 1.47 2007/07/31 20:25:52 sshwarts Exp $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 // Copyright (c) 2003 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #define NEED_CPU_REG_SHORTCUTS 1
24 #include "bochs.h"
25 #include "cpu.h"
26 #define LOG_THIS BX_CPU_THIS_PTR
28 /* ********************************************** */
29 /* SSE Integer Operations (128bit MMX extensions) */
30 /* ********************************************** */
32 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
34 /* 66 0F 38 00 */
35 void BX_CPU_C::PSHUFB_VdqWdq(bxInstruction_c *i)
37 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
38 BX_CPU_THIS_PTR prepareSSE();
40 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
42 /* op2 is a register or memory reference */
43 if (i->modC0()) {
44 op2 = BX_READ_XMM_REG(i->rm());
46 else {
47 /* pointer, segment address pair */
48 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
51 for(unsigned j=0; j<16; j++)
53 unsigned mask = op2.xmmubyte(j);
54 if (mask & 0x80)
55 result.xmmubyte(j) = 0;
56 else
57 result.xmmubyte(j) = op1.xmmubyte(mask & 0xf);
60 BX_WRITE_XMM_REG(i->nnn(), result);
61 #else
62 BX_INFO(("PSHUFB_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
63 UndefinedOpcode(i);
64 #endif
67 /* 66 0F 38 01 */
68 void BX_CPU_C::PHADDW_VdqWdq(bxInstruction_c *i)
70 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
71 BX_CPU_THIS_PTR prepareSSE();
73 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
75 /* op2 is a register or memory reference */
76 if (i->modC0()) {
77 op2 = BX_READ_XMM_REG(i->rm());
79 else {
80 /* pointer, segment address pair */
81 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
84 result.xmm16u(0) = op1.xmm16u(0) + op1.xmm16u(1);
85 result.xmm16u(1) = op1.xmm16u(2) + op1.xmm16u(3);
86 result.xmm16u(2) = op1.xmm16u(4) + op1.xmm16u(5);
87 result.xmm16u(3) = op1.xmm16u(6) + op1.xmm16u(7);
89 result.xmm16u(4) = op2.xmm16u(0) + op2.xmm16u(1);
90 result.xmm16u(5) = op2.xmm16u(2) + op2.xmm16u(3);
91 result.xmm16u(6) = op2.xmm16u(4) + op2.xmm16u(5);
92 result.xmm16u(7) = op2.xmm16u(6) + op2.xmm16u(7);
94 BX_WRITE_XMM_REG(i->nnn(), result);
95 #else
96 BX_INFO(("PHADDW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
97 UndefinedOpcode(i);
98 #endif
101 /* 66 0F 38 02 */
102 void BX_CPU_C::PHADDD_VdqWdq(bxInstruction_c *i)
104 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
105 BX_CPU_THIS_PTR prepareSSE();
107 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
109 /* op2 is a register or memory reference */
110 if (i->modC0()) {
111 op2 = BX_READ_XMM_REG(i->rm());
113 else {
114 /* pointer, segment address pair */
115 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
118 result.xmm32u(0) = op1.xmm32u(0) + op1.xmm32u(1);
119 result.xmm32u(1) = op1.xmm32u(2) + op1.xmm32u(3);
120 result.xmm32u(2) = op2.xmm32u(0) + op2.xmm32u(1);
121 result.xmm32u(3) = op2.xmm32u(2) + op2.xmm32u(3);
123 BX_WRITE_XMM_REG(i->nnn(), result);
124 #else
125 BX_INFO(("PHADDD_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
126 UndefinedOpcode(i);
127 #endif
130 /* 66 0F 38 03 */
131 void BX_CPU_C::PHADDSW_VdqWdq(bxInstruction_c *i)
133 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
134 BX_CPU_THIS_PTR prepareSSE();
136 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
138 /* op2 is a register or memory reference */
139 if (i->modC0()) {
140 op2 = BX_READ_XMM_REG(i->rm());
142 else {
143 /* pointer, segment address pair */
144 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
147 result.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) + Bit32s(op1.xmm16s(1)));
148 result.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) + Bit32s(op1.xmm16s(3)));
149 result.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) + Bit32s(op1.xmm16s(5)));
150 result.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) + Bit32s(op1.xmm16s(7)));
152 result.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(0)) + Bit32s(op2.xmm16s(1)));
153 result.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(2)) + Bit32s(op2.xmm16s(3)));
154 result.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(4)) + Bit32s(op2.xmm16s(5)));
155 result.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(6)) + Bit32s(op2.xmm16s(7)));
157 /* now write result back to destination */
158 BX_WRITE_XMM_REG(i->nnn(), result);
159 #else
160 BX_INFO(("PHADDSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
161 UndefinedOpcode(i);
162 #endif
165 /* 66 0F 38 04 */
166 void BX_CPU_C::PMADDUBSW_VdqWdq(bxInstruction_c *i)
168 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
169 BX_CPU_THIS_PTR prepareSSE();
171 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
173 /* op2 is a register or memory reference */
174 if (i->modC0()) {
175 op2 = BX_READ_XMM_REG(i->rm());
177 else {
178 /* pointer, segment address pair */
179 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
182 for(unsigned j=0; j<8; j++)
184 Bit32s temp = Bit32s(op1.xmmubyte(j*2+0))*Bit32s(op2.xmmsbyte(j*2+0)) +
185 Bit32s(op1.xmmubyte(j*2+1))*Bit32s(op2.xmmsbyte(j*2+1));
187 result.xmm16s(j) = SaturateDwordSToWordS(temp);
190 /* now write result back to destination */
191 BX_WRITE_XMM_REG(i->nnn(), result);
192 #else
193 BX_INFO(("PMADDUBSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
194 UndefinedOpcode(i);
195 #endif
198 /* 66 0F 38 05 */
199 void BX_CPU_C::PHSUBSW_VdqWdq(bxInstruction_c *i)
201 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
202 BX_CPU_THIS_PTR prepareSSE();
204 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
206 /* op2 is a register or memory reference */
207 if (i->modC0()) {
208 op2 = BX_READ_XMM_REG(i->rm());
210 else {
211 /* pointer, segment address pair */
212 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
215 result.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) - Bit32s(op1.xmm16s(1)));
216 result.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) - Bit32s(op1.xmm16s(3)));
217 result.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) - Bit32s(op1.xmm16s(5)));
218 result.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) - Bit32s(op1.xmm16s(7)));
220 result.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(0)) - Bit32s(op2.xmm16s(1)));
221 result.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(2)) - Bit32s(op2.xmm16s(3)));
222 result.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(4)) - Bit32s(op2.xmm16s(5)));
223 result.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(6)) - Bit32s(op2.xmm16s(7)));
225 /* now write result back to destination */
226 BX_WRITE_XMM_REG(i->nnn(), result);
227 #else
228 BX_INFO(("PHSUBSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
229 UndefinedOpcode(i);
230 #endif
233 /* 66 0F 38 05 */
234 void BX_CPU_C::PHSUBW_VdqWdq(bxInstruction_c *i)
236 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
237 BX_CPU_THIS_PTR prepareSSE();
239 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
241 /* op2 is a register or memory reference */
242 if (i->modC0()) {
243 op2 = BX_READ_XMM_REG(i->rm());
245 else {
246 /* pointer, segment address pair */
247 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
250 result.xmm16u(0) = op1.xmm16u(0) - op1.xmm16u(1);
251 result.xmm16u(1) = op1.xmm16u(2) - op1.xmm16u(3);
252 result.xmm16u(2) = op1.xmm16u(4) - op1.xmm16u(5);
253 result.xmm16u(3) = op1.xmm16u(6) - op1.xmm16u(7);
255 result.xmm16u(4) = op2.xmm16u(0) - op2.xmm16u(1);
256 result.xmm16u(5) = op2.xmm16u(2) - op2.xmm16u(3);
257 result.xmm16u(6) = op2.xmm16u(4) - op2.xmm16u(5);
258 result.xmm16u(7) = op2.xmm16u(6) - op2.xmm16u(7);
260 BX_WRITE_XMM_REG(i->nnn(), result);
261 #else
262 BX_INFO(("PHSUBW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
263 UndefinedOpcode(i);
264 #endif
267 /* 66 0F 38 06 */
268 void BX_CPU_C::PHSUBD_VdqWdq(bxInstruction_c *i)
270 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
271 BX_CPU_THIS_PTR prepareSSE();
273 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
275 /* op2 is a register or memory reference */
276 if (i->modC0()) {
277 op2 = BX_READ_XMM_REG(i->rm());
279 else {
280 /* pointer, segment address pair */
281 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
284 result.xmm32u(0) = op1.xmm32u(0) - op1.xmm32u(1);
285 result.xmm32u(1) = op1.xmm32u(2) - op1.xmm32u(3);
286 result.xmm32u(2) = op2.xmm32u(0) - op2.xmm32u(1);
287 result.xmm32u(3) = op2.xmm32u(2) - op2.xmm32u(3);
289 BX_WRITE_XMM_REG(i->nnn(), result);
290 #else
291 BX_INFO(("PHSUBD_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
292 UndefinedOpcode(i);
293 #endif
296 /* 66 0F 38 08 */
297 void BX_CPU_C::PSIGNB_VdqWdq(bxInstruction_c *i)
299 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
300 BX_CPU_THIS_PTR prepareSSE();
302 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
304 /* op2 is a register or memory reference */
305 if (i->modC0()) {
306 op2 = BX_READ_XMM_REG(i->rm());
308 else {
309 /* pointer, segment address pair */
310 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
313 for(unsigned j=0; j<16; j++) {
314 int sign = (op2.xmmsbyte(j) > 0) - (op2.xmmsbyte(j) < 0);
315 op1.xmmsbyte(j) *= sign;
318 BX_WRITE_XMM_REG(i->nnn(), op1);
319 #else
320 BX_INFO(("PSIGNB_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
321 UndefinedOpcode(i);
322 #endif
325 /* 66 0F 38 09 */
326 void BX_CPU_C::PSIGNW_VdqWdq(bxInstruction_c *i)
328 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
329 BX_CPU_THIS_PTR prepareSSE();
331 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
333 /* op2 is a register or memory reference */
334 if (i->modC0()) {
335 op2 = BX_READ_XMM_REG(i->rm());
337 else {
338 /* pointer, segment address pair */
339 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
342 for(unsigned j=0; j<8; j++) {
343 int sign = (op2.xmm16s(j) > 0) - (op2.xmm16s(j) < 0);
344 op1.xmm16s(j) *= sign;
347 BX_WRITE_XMM_REG(i->nnn(), op1);
348 #else
349 BX_INFO(("PSIGNW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
350 UndefinedOpcode(i);
351 #endif
354 /* 66 0F 38 0A */
355 void BX_CPU_C::PSIGND_VdqWdq(bxInstruction_c *i)
357 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
358 BX_CPU_THIS_PTR prepareSSE();
360 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
362 /* op2 is a register or memory reference */
363 if (i->modC0()) {
364 op2 = BX_READ_XMM_REG(i->rm());
366 else {
367 /* pointer, segment address pair */
368 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
371 for(unsigned j=0; j<4; j++) {
372 int sign = (op2.xmm32s(j) > 0) - (op2.xmm32s(j) < 0);
373 op1.xmm32s(j) *= sign;
376 BX_WRITE_XMM_REG(i->nnn(), op1);
377 #else
378 BX_INFO(("PSIGND_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
379 UndefinedOpcode(i);
380 #endif
383 /* 66 0F 38 0B */
384 void BX_CPU_C::PMULHRSW_VdqWdq(bxInstruction_c *i)
386 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
387 BX_CPU_THIS_PTR prepareSSE();
389 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
391 /* op2 is a register or memory reference */
392 if (i->modC0()) {
393 op2 = BX_READ_XMM_REG(i->rm());
395 else {
396 /* pointer, segment address pair */
397 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
400 result.xmm16u(0) = (((op1.xmm16s(0) * op2.xmm16s(0)) >> 14) + 1) >> 1;
401 result.xmm16u(1) = (((op1.xmm16s(1) * op2.xmm16s(1)) >> 14) + 1) >> 1;
402 result.xmm16u(2) = (((op1.xmm16s(2) * op2.xmm16s(2)) >> 14) + 1) >> 1;
403 result.xmm16u(3) = (((op1.xmm16s(3) * op2.xmm16s(3)) >> 14) + 1) >> 1;
404 result.xmm16u(4) = (((op1.xmm16s(4) * op2.xmm16s(4)) >> 14) + 1) >> 1;
405 result.xmm16u(5) = (((op1.xmm16s(5) * op2.xmm16s(5)) >> 14) + 1) >> 1;
406 result.xmm16u(6) = (((op1.xmm16s(6) * op2.xmm16s(6)) >> 14) + 1) >> 1;
407 result.xmm16u(7) = (((op1.xmm16s(7) * op2.xmm16s(7)) >> 14) + 1) >> 1;
409 /* now write result back to destination */
410 BX_WRITE_XMM_REG(i->nnn(), result);
411 #else
412 BX_INFO(("PMULHRSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
413 UndefinedOpcode(i);
414 #endif
417 /* 66 0F 38 1C */
418 void BX_CPU_C::PABSB_VdqWdq(bxInstruction_c *i)
420 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
421 BX_CPU_THIS_PTR prepareSSE();
423 BxPackedXmmRegister op;
425 if (i->modC0()) {
426 op = BX_READ_XMM_REG(i->rm());
428 else {
429 /* pointer, segment address pair */
430 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
433 if(op.xmmsbyte(0x0) < 0) op.xmmubyte(0x0) = -op.xmmsbyte(0x0);
434 if(op.xmmsbyte(0x1) < 0) op.xmmubyte(0x1) = -op.xmmsbyte(0x1);
435 if(op.xmmsbyte(0x2) < 0) op.xmmubyte(0x2) = -op.xmmsbyte(0x2);
436 if(op.xmmsbyte(0x3) < 0) op.xmmubyte(0x3) = -op.xmmsbyte(0x3);
437 if(op.xmmsbyte(0x4) < 0) op.xmmubyte(0x4) = -op.xmmsbyte(0x4);
438 if(op.xmmsbyte(0x5) < 0) op.xmmubyte(0x5) = -op.xmmsbyte(0x5);
439 if(op.xmmsbyte(0x6) < 0) op.xmmubyte(0x6) = -op.xmmsbyte(0x6);
440 if(op.xmmsbyte(0x7) < 0) op.xmmubyte(0x7) = -op.xmmsbyte(0x7);
441 if(op.xmmsbyte(0x8) < 0) op.xmmubyte(0x8) = -op.xmmsbyte(0x8);
442 if(op.xmmsbyte(0x9) < 0) op.xmmubyte(0x9) = -op.xmmsbyte(0x9);
443 if(op.xmmsbyte(0xa) < 0) op.xmmubyte(0xa) = -op.xmmsbyte(0xa);
444 if(op.xmmsbyte(0xb) < 0) op.xmmubyte(0xb) = -op.xmmsbyte(0xb);
445 if(op.xmmsbyte(0xc) < 0) op.xmmubyte(0xc) = -op.xmmsbyte(0xc);
446 if(op.xmmsbyte(0xd) < 0) op.xmmubyte(0xd) = -op.xmmsbyte(0xd);
447 if(op.xmmsbyte(0xe) < 0) op.xmmubyte(0xe) = -op.xmmsbyte(0xe);
448 if(op.xmmsbyte(0xf) < 0) op.xmmubyte(0xf) = -op.xmmsbyte(0xf);
450 /* now write result back to destination */
451 BX_WRITE_XMM_REG(i->nnn(), op);
452 #else
453 BX_INFO(("PABSB_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
454 UndefinedOpcode(i);
455 #endif
458 /* 66 0F 38 1D */
459 void BX_CPU_C::PABSW_VdqWdq(bxInstruction_c *i)
461 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
462 BX_CPU_THIS_PTR prepareSSE();
464 BxPackedXmmRegister op;
466 if (i->modC0()) {
467 op = BX_READ_XMM_REG(i->rm());
469 else {
470 /* pointer, segment address pair */
471 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
474 if(op.xmm16s(0) < 0) op.xmm16u(0) = -op.xmm16s(0);
475 if(op.xmm16s(1) < 0) op.xmm16u(1) = -op.xmm16s(1);
476 if(op.xmm16s(2) < 0) op.xmm16u(2) = -op.xmm16s(2);
477 if(op.xmm16s(3) < 0) op.xmm16u(3) = -op.xmm16s(3);
478 if(op.xmm16s(4) < 0) op.xmm16u(4) = -op.xmm16s(4);
479 if(op.xmm16s(5) < 0) op.xmm16u(5) = -op.xmm16s(5);
480 if(op.xmm16s(6) < 0) op.xmm16u(6) = -op.xmm16s(6);
481 if(op.xmm16s(7) < 0) op.xmm16u(7) = -op.xmm16s(7);
483 /* now write result back to destination */
484 BX_WRITE_XMM_REG(i->nnn(), op);
485 #else
486 BX_INFO(("PABSW_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
487 UndefinedOpcode(i);
488 #endif
491 /* 66 0F 38 1E */
492 void BX_CPU_C::PABSD_VdqWdq(bxInstruction_c *i)
494 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
495 BX_CPU_THIS_PTR prepareSSE();
497 BxPackedXmmRegister op;
499 if (i->modC0()) {
500 op = BX_READ_XMM_REG(i->rm());
502 else {
503 /* pointer, segment address pair */
504 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
507 if(op.xmm32s(0) < 0) op.xmm32u(0) = -op.xmm32s(0);
508 if(op.xmm32s(1) < 0) op.xmm32u(1) = -op.xmm32s(1);
509 if(op.xmm32s(2) < 0) op.xmm32u(2) = -op.xmm32s(2);
510 if(op.xmm32s(3) < 0) op.xmm32u(3) = -op.xmm32s(3);
512 /* now write result back to destination */
513 BX_WRITE_XMM_REG(i->nnn(), op);
514 #else
515 BX_INFO(("PABSD_VdqWdq: required SSE3E, use --enable-sse and --enable-sse-extension options"));
516 UndefinedOpcode(i);
517 #endif
520 /* 66 0F 38 10 */
521 void BX_CPU_C::PBLENDVB_VdqWdq(bxInstruction_c *i)
523 #if BX_SUPPORT_SSE >= 4
524 BX_CPU_THIS_PTR prepareSSE();
526 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2,
527 mask = BX_READ_XMM_REG(0); // XMM0
529 /* op2 is a register or memory reference */
530 if (i->modC0()) {
531 op2 = BX_READ_XMM_REG(i->rm());
533 else {
534 /* pointer, segment address pair */
535 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
538 for(unsigned j=0; j<16; j++)
539 if (mask.xmmubyte(j) & 0x80) op1.xmmubyte(j) = op2.xmmubyte(j);
541 /* now write result back to destination */
542 BX_WRITE_XMM_REG(i->nnn(), op1);
543 #else
544 BX_INFO(("PBLENDVB_VdqWdq: required SSE4, use --enable-sse option"));
545 UndefinedOpcode(i);
546 #endif
549 /* 66 0F 38 14 */
550 void BX_CPU_C::BLENDVPS_VpsWps(bxInstruction_c *i)
552 #if BX_SUPPORT_SSE >= 4
553 BX_CPU_THIS_PTR prepareSSE();
555 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2,
556 mask = BX_READ_XMM_REG(0); // XMM0
558 /* op2 is a register or memory reference */
559 if (i->modC0()) {
560 op2 = BX_READ_XMM_REG(i->rm());
562 else {
563 /* pointer, segment address pair */
564 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
567 if (mask.xmm32u(0) & 0x80000000) op1.xmm32u(0) = op2.xmm32u(0);
568 if (mask.xmm32u(1) & 0x80000000) op1.xmm32u(0) = op2.xmm32u(0);
569 if (mask.xmm32u(2) & 0x80000000) op1.xmm32u(0) = op2.xmm32u(0);
570 if (mask.xmm32u(3) & 0x80000000) op1.xmm32u(0) = op2.xmm32u(0);
572 /* now write result back to destination */
573 BX_WRITE_XMM_REG(i->nnn(), op1);
574 #else
575 BX_INFO(("BLENDVPS_VpsWps: required SSE4, use --enable-sse option"));
576 UndefinedOpcode(i);
577 #endif
580 /* 66 0F 38 15 */
581 void BX_CPU_C::BLENDVPD_VpdWpd(bxInstruction_c *i)
583 #if BX_SUPPORT_SSE >= 4
584 BX_CPU_THIS_PTR prepareSSE();
586 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2,
587 mask = BX_READ_XMM_REG(0); // XMM0
589 /* op2 is a register or memory reference */
590 if (i->modC0()) {
591 op2 = BX_READ_XMM_REG(i->rm());
593 else {
594 /* pointer, segment address pair */
595 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
598 if (mask.xmm32u(1) & 0x80000000) op1.xmm64u(0) = op2.xmm64u(0);
599 if (mask.xmm32u(3) & 0x80000000) op1.xmm64u(0) = op2.xmm64u(0);
601 /* now write result back to destination */
602 BX_WRITE_XMM_REG(i->nnn(), op1);
603 #else
604 BX_INFO(("BLENDVPD_VpdWpd: required SSE4, use --enable-sse option"));
605 UndefinedOpcode(i);
606 #endif
609 /* 66 0F 38 17 */
610 void BX_CPU_C::PTEST_VdqWdq(bxInstruction_c *i)
612 #if BX_SUPPORT_SSE >= 4
613 BX_CPU_THIS_PTR prepareSSE();
615 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
616 unsigned result = 0;
618 /* op2 is a register or memory reference */
619 if (i->modC0()) {
620 op2 = BX_READ_XMM_REG(i->rm());
622 else {
623 /* pointer, segment address pair */
624 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
627 if ((op2.xmm64u(0) & op1.xmm64u(0)) == 0 &&
628 (op2.xmm64u(1) & op1.xmm64u(1)) == 0) result |= EFlagsZFMask;
630 if ((op2.xmm64u(0) & ~op1.xmm64u(0)) == 0 &&
631 (op2.xmm64u(1) & ~op1.xmm64u(1)) == 0) result |= EFlagsCFMask;
633 setEFlagsOSZAPC(result);
635 #else
636 BX_INFO(("PTEST_VdqWdq: required SSE4, use --enable-sse option"));
637 UndefinedOpcode(i);
638 #endif
641 /* 66 0F 38 28 */
642 void BX_CPU_C::PMULDQ_VdqWdq(bxInstruction_c *i)
644 #if BX_SUPPORT_SSE >= 4
645 BX_CPU_THIS_PTR prepareSSE();
647 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
649 /* op2 is a register or memory reference */
650 if (i->modC0()) {
651 op2 = BX_READ_XMM_REG(i->rm());
653 else {
654 /* pointer, segment address pair */
655 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
658 result.xmm64s(0) = Bit64s(op1.xmm32s(0)) * Bit64s(op2.xmm32s(0));
659 result.xmm64s(1) = Bit64s(op1.xmm32s(2)) * Bit64s(op2.xmm32s(2));
661 /* now write result back to destination */
662 BX_WRITE_XMM_REG(i->nnn(), result);
663 #else
664 BX_INFO(("PMULDQ_VdqWdq: required SSE4, use --enable-sse option"));
665 UndefinedOpcode(i);
666 #endif
669 /* 66 0F 38 29 */
670 void BX_CPU_C::PCMPEQQ_VdqWdq(bxInstruction_c *i)
672 #if BX_SUPPORT_SSE >= 4
673 BX_CPU_THIS_PTR prepareSSE();
675 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
677 /* op2 is a register or memory reference */
678 if (i->modC0()) {
679 op2 = BX_READ_XMM_REG(i->rm());
681 else {
682 /* pointer, segment address pair */
683 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
686 op1.xmm64u(0) = (op1.xmm64u(0) == op2.xmm64u(0)) ?
687 BX_CONST64(0xffffffffffffffff) : 0;
689 op1.xmm64u(1) = (op1.xmm64u(1) == op2.xmm64u(1)) ?
690 BX_CONST64(0xffffffffffffffff) : 0;
692 /* now write result back to destination */
693 BX_WRITE_XMM_REG(i->nnn(), op1);
694 #else
695 BX_INFO(("PCMPEQQ_VdqWdq: required SSE4, use --enable-sse option"));
696 UndefinedOpcode(i);
697 #endif
700 /* 66 0F 38 2B */
701 void BX_CPU_C::PACKUSDW_VdqWdq(bxInstruction_c *i)
703 #if BX_SUPPORT_SSE >= 4
704 BX_CPU_THIS_PTR prepareSSE();
706 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
708 /* op2 is a register or memory reference */
709 if (i->modC0()) {
710 op2 = BX_READ_XMM_REG(i->rm());
712 else {
713 /* pointer, segment address pair */
714 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
717 result.xmm16u(0) = SaturateDwordSToWordU(op1.xmm32s(0));
718 result.xmm16u(1) = SaturateDwordSToWordU(op1.xmm32s(1));
719 result.xmm16u(2) = SaturateDwordSToWordU(op1.xmm32s(2));
720 result.xmm16u(3) = SaturateDwordSToWordU(op1.xmm32s(3));
721 result.xmm16u(4) = SaturateDwordSToWordU(op2.xmm32s(0));
722 result.xmm16u(5) = SaturateDwordSToWordU(op2.xmm32s(1));
723 result.xmm16u(6) = SaturateDwordSToWordU(op2.xmm32s(2));
724 result.xmm16u(7) = SaturateDwordSToWordU(op2.xmm32s(3));
726 /* now write result back to destination */
727 BX_WRITE_XMM_REG(i->nnn(), result);
728 #else
729 BX_INFO(("PACKUSDW_VdqWdq: required SSE4, use --enable-sse option"));
730 UndefinedOpcode(i);
731 #endif
734 /* 66 0F 38 38 */
735 void BX_CPU_C::PMINSB_VdqWdq(bxInstruction_c *i)
737 #if BX_SUPPORT_SSE >= 4
738 BX_CPU_THIS_PTR prepareSSE();
740 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
742 /* op2 is a register or memory reference */
743 if (i->modC0()) {
744 op2 = BX_READ_XMM_REG(i->rm());
746 else {
747 /* pointer, segment address pair */
748 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
751 for(unsigned j=0; j<16; j++) {
752 if(op2.xmmsbyte(j) < op1.xmmsbyte(j)) op1.xmmubyte(j) = op2.xmmubyte(j);
755 /* now write result back to destination */
756 BX_WRITE_XMM_REG(i->nnn(), op1);
757 #else
758 BX_INFO(("PMINSB_VdqWdq: required SSE4, use --enable-sse option"));
759 UndefinedOpcode(i);
760 #endif
763 /* 66 0F 38 39 */
764 void BX_CPU_C::PMINSD_VdqWdq(bxInstruction_c *i)
766 #if BX_SUPPORT_SSE >= 4
767 BX_CPU_THIS_PTR prepareSSE();
769 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
771 /* op2 is a register or memory reference */
772 if (i->modC0()) {
773 op2 = BX_READ_XMM_REG(i->rm());
775 else {
776 /* pointer, segment address pair */
777 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
780 if(op2.xmm32s(0) < op1.xmm32s(0)) op1.xmm32u(0) = op2.xmm32u(0);
781 if(op2.xmm32s(1) < op1.xmm32s(1)) op1.xmm32u(1) = op2.xmm32u(1);
782 if(op2.xmm32s(2) < op1.xmm32s(2)) op1.xmm32u(2) = op2.xmm32u(2);
783 if(op2.xmm32s(3) < op1.xmm32s(3)) op1.xmm32u(3) = op2.xmm32u(3);
785 /* now write result back to destination */
786 BX_WRITE_XMM_REG(i->nnn(), op1);
787 #else
788 BX_INFO(("PMINSD_VdqWdq: required SSE4, use --enable-sse option"));
789 UndefinedOpcode(i);
790 #endif
793 /* 66 0F 38 3A */
794 void BX_CPU_C::PMINUW_VdqWdq(bxInstruction_c *i)
796 #if BX_SUPPORT_SSE >= 4
797 BX_CPU_THIS_PTR prepareSSE();
799 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
801 /* op2 is a register or memory reference */
802 if (i->modC0()) {
803 op2 = BX_READ_XMM_REG(i->rm());
805 else {
806 /* pointer, segment address pair */
807 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
810 if(op2.xmm16u(0) < op1.xmm16u(0)) op1.xmm16u(0) = op2.xmm16u(0);
811 if(op2.xmm16u(1) < op1.xmm16u(1)) op1.xmm16u(1) = op2.xmm16u(1);
812 if(op2.xmm16u(2) < op1.xmm16u(2)) op1.xmm16u(2) = op2.xmm16u(2);
813 if(op2.xmm16u(3) < op1.xmm16u(3)) op1.xmm16u(3) = op2.xmm16u(3);
814 if(op2.xmm16u(4) < op1.xmm16u(4)) op1.xmm16u(4) = op2.xmm16u(4);
815 if(op2.xmm16u(5) < op1.xmm16u(5)) op1.xmm16u(5) = op2.xmm16u(5);
816 if(op2.xmm16u(6) < op1.xmm16u(6)) op1.xmm16u(6) = op2.xmm16u(6);
817 if(op2.xmm16u(7) < op1.xmm16u(7)) op1.xmm16u(7) = op2.xmm16u(7);
819 /* now write result back to destination */
820 BX_WRITE_XMM_REG(i->nnn(), op1);
821 #else
822 BX_INFO(("PMINUW_VdqWdq: required SSE4, use --enable-sse option"));
823 UndefinedOpcode(i);
824 #endif
827 /* 66 0F 38 3B */
828 void BX_CPU_C::PMINUD_VdqWdq(bxInstruction_c *i)
830 #if BX_SUPPORT_SSE >= 4
831 BX_CPU_THIS_PTR prepareSSE();
833 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
835 /* op2 is a register or memory reference */
836 if (i->modC0()) {
837 op2 = BX_READ_XMM_REG(i->rm());
839 else {
840 /* pointer, segment address pair */
841 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
844 if(op2.xmm32u(0) < op1.xmm32u(0)) op1.xmm32u(0) = op2.xmm32u(0);
845 if(op2.xmm32u(1) < op1.xmm32u(1)) op1.xmm32u(1) = op2.xmm32u(1);
846 if(op2.xmm32u(2) < op1.xmm32u(2)) op1.xmm32u(2) = op2.xmm32u(2);
847 if(op2.xmm32u(3) < op1.xmm32u(3)) op1.xmm32u(3) = op2.xmm32u(3);
849 /* now write result back to destination */
850 BX_WRITE_XMM_REG(i->nnn(), op1);
851 #else
852 BX_INFO(("PMINUD_VdqWdq: required SSE4, use --enable-sse option"));
853 UndefinedOpcode(i);
854 #endif
857 /* 66 0F 38 3C */
858 void BX_CPU_C::PMAXSB_VdqWdq(bxInstruction_c *i)
860 #if BX_SUPPORT_SSE >= 4
861 BX_CPU_THIS_PTR prepareSSE();
863 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
865 /* op2 is a register or memory reference */
866 if (i->modC0()) {
867 op2 = BX_READ_XMM_REG(i->rm());
869 else {
870 /* pointer, segment address pair */
871 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
874 for(unsigned j=0; j<16; j++) {
875 if(op2.xmmsbyte(j) > op1.xmmsbyte(j)) op1.xmmubyte(j) = op2.xmmubyte(j);
878 /* now write result back to destination */
879 BX_WRITE_XMM_REG(i->nnn(), op1);
880 #else
881 BX_INFO(("PMAXSB_VdqWdq: required SSE4, use --enable-sse option"));
882 UndefinedOpcode(i);
883 #endif
886 /* 66 0F 38 3D */
887 void BX_CPU_C::PMAXSD_VdqWdq(bxInstruction_c *i)
889 #if BX_SUPPORT_SSE >= 4
890 BX_CPU_THIS_PTR prepareSSE();
892 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
894 /* op2 is a register or memory reference */
895 if (i->modC0()) {
896 op2 = BX_READ_XMM_REG(i->rm());
898 else {
899 /* pointer, segment address pair */
900 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
903 if(op2.xmm32s(0) > op1.xmm32s(0)) op1.xmm32u(0) = op2.xmm32u(0);
904 if(op2.xmm32s(1) > op1.xmm32s(1)) op1.xmm32u(1) = op2.xmm32u(1);
905 if(op2.xmm32s(2) > op1.xmm32s(2)) op1.xmm32u(2) = op2.xmm32u(2);
906 if(op2.xmm32s(3) > op1.xmm32s(3)) op1.xmm32u(3) = op2.xmm32u(3);
908 /* now write result back to destination */
909 BX_WRITE_XMM_REG(i->nnn(), op1);
910 #else
911 BX_INFO(("PMAXSD_VdqWdq: required SSE4, use --enable-sse option"));
912 UndefinedOpcode(i);
913 #endif
916 /* 66 0F 38 3E */
917 void BX_CPU_C::PMAXUW_VdqWdq(bxInstruction_c *i)
919 #if BX_SUPPORT_SSE >= 4
920 BX_CPU_THIS_PTR prepareSSE();
922 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
924 /* op2 is a register or memory reference */
925 if (i->modC0()) {
926 op2 = BX_READ_XMM_REG(i->rm());
928 else {
929 /* pointer, segment address pair */
930 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
933 if(op2.xmm16u(0) > op1.xmm16u(0)) op1.xmm16u(0) = op2.xmm16u(0);
934 if(op2.xmm16u(1) > op1.xmm16u(1)) op1.xmm16u(1) = op2.xmm16u(1);
935 if(op2.xmm16u(2) > op1.xmm16u(2)) op1.xmm16u(2) = op2.xmm16u(2);
936 if(op2.xmm16u(3) > op1.xmm16u(3)) op1.xmm16u(3) = op2.xmm16u(3);
937 if(op2.xmm16u(4) > op1.xmm16u(4)) op1.xmm16u(4) = op2.xmm16u(4);
938 if(op2.xmm16u(5) > op1.xmm16u(5)) op1.xmm16u(5) = op2.xmm16u(5);
939 if(op2.xmm16u(6) > op1.xmm16u(6)) op1.xmm16u(6) = op2.xmm16u(6);
940 if(op2.xmm16u(7) > op1.xmm16u(7)) op1.xmm16u(7) = op2.xmm16u(7);
942 /* now write result back to destination */
943 BX_WRITE_XMM_REG(i->nnn(), op1);
944 #else
945 BX_INFO(("PMAXUW_VdqWdq: required SSE4, use --enable-sse option"));
946 UndefinedOpcode(i);
947 #endif
950 /* 66 0F 38 3F */
951 void BX_CPU_C::PMAXUD_VdqWdq(bxInstruction_c *i)
953 #if BX_SUPPORT_SSE >= 4
954 BX_CPU_THIS_PTR prepareSSE();
956 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
958 /* op2 is a register or memory reference */
959 if (i->modC0()) {
960 op2 = BX_READ_XMM_REG(i->rm());
962 else {
963 /* pointer, segment address pair */
964 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
967 if(op2.xmm32u(0) > op1.xmm32u(0)) op1.xmm32u(0) = op2.xmm32u(0);
968 if(op2.xmm32u(1) > op1.xmm32u(1)) op1.xmm32u(1) = op2.xmm32u(1);
969 if(op2.xmm32u(2) > op1.xmm32u(2)) op1.xmm32u(2) = op2.xmm32u(2);
970 if(op2.xmm32u(3) > op1.xmm32u(3)) op1.xmm32u(3) = op2.xmm32u(3);
972 /* now write result back to destination */
973 BX_WRITE_XMM_REG(i->nnn(), op1);
974 #else
975 BX_INFO(("PMAXUD_VdqWdq: required SSE4, use --enable-sse option"));
976 UndefinedOpcode(i);
977 #endif
980 /* 66 0F 38 40 */
981 void BX_CPU_C::PMULLD_VdqWdq(bxInstruction_c *i)
983 #if BX_SUPPORT_SSE >= 4
984 BX_CPU_THIS_PTR prepareSSE();
986 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
988 /* op2 is a register or memory reference */
989 if (i->modC0()) {
990 op2 = BX_READ_XMM_REG(i->rm());
992 else {
993 /* pointer, segment address pair */
994 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
997 Bit64s product1 = Bit64s(op1.xmm32s(0)) * Bit64s(op2.xmm32s(0));
998 Bit64s product2 = Bit64s(op1.xmm32s(1)) * Bit64s(op2.xmm32s(1));
999 Bit64s product3 = Bit64s(op1.xmm32s(2)) * Bit64s(op2.xmm32s(2));
1000 Bit64s product4 = Bit64s(op1.xmm32s(3)) * Bit64s(op2.xmm32s(3));
1002 result.xmm32u(0) = (Bit32u)(product1 & 0xFFFFFFFF);
1003 result.xmm32u(1) = (Bit32u)(product2 & 0xFFFFFFFF);
1004 result.xmm32u(2) = (Bit32u)(product3 & 0xFFFFFFFF);
1005 result.xmm32u(3) = (Bit32u)(product4 & 0xFFFFFFFF);
1007 /* now write result back to destination */
1008 BX_WRITE_XMM_REG(i->nnn(), result);
1009 #else
1010 BX_INFO(("PMULLD_VdqWdq: required SSE4, use --enable-sse option"));
1011 UndefinedOpcode(i);
1012 #endif
1015 /* 66 0F 38 41 */
1016 void BX_CPU_C::PHMINPOSUW_VdqWdq(bxInstruction_c *i)
1018 #if BX_SUPPORT_SSE >= 4
1019 BX_CPU_THIS_PTR prepareSSE();
1021 BxPackedXmmRegister op, result;
1023 /* op2 is a register or memory reference */
1024 if (i->modC0()) {
1025 op = BX_READ_XMM_REG(i->rm());
1027 else {
1028 /* pointer, segment address pair */
1029 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
1032 unsigned min = 0;
1034 for (unsigned j=1; j < 8; j++) {
1035 if (op.xmm16u(j) < op.xmm16u(min)) min = j;
1038 result.xmm16u(0) = op.xmm16u(min);
1039 result.xmm16u(1) = min;
1040 result.xmm32u(1) = 0;
1041 result.xmm64u(1) = 0;
1043 /* now write result back to destination */
1044 BX_WRITE_XMM_REG(i->nnn(), result);
1045 #else
1046 BX_INFO(("PHMINPOSUW_VdqWdq: required SSE4, use --enable-sse option"));
1047 UndefinedOpcode(i);
1048 #endif
1051 /* 66 0F 3A 0C */
1052 void BX_CPU_C::BLENDPS_VpsWpsIb(bxInstruction_c *i)
1054 #if BX_SUPPORT_SSE >= 4
1055 BX_CPU_THIS_PTR prepareSSE();
1057 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1058 Bit8u mask = i->Ib();
1060 /* op2 is a register or memory reference */
1061 if (i->modC0()) {
1062 op2 = BX_READ_XMM_REG(i->rm());
1064 else {
1065 /* pointer, segment address pair */
1066 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1069 if (mask & 0x1) op1.xmm32u(0) = op2.xmm32u(0);
1070 if (mask & 0x2) op1.xmm32u(1) = op2.xmm32u(1);
1071 if (mask & 0x4) op1.xmm32u(2) = op2.xmm32u(2);
1072 if (mask & 0x8) op1.xmm32u(3) = op2.xmm32u(3);
1074 /* now write result back to destination */
1075 BX_WRITE_XMM_REG(i->nnn(), op1);
1076 #else
1077 BX_INFO(("BLENDPS_VpsWpsIb: required SSE4, use --enable-sse option"));
1078 UndefinedOpcode(i);
1079 #endif
1082 /* 66 0F 3A 0D */
1083 void BX_CPU_C::BLENDPD_VpdWpdIb(bxInstruction_c *i)
1085 #if BX_SUPPORT_SSE >= 4
1086 BX_CPU_THIS_PTR prepareSSE();
1088 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1089 Bit8u mask = i->Ib();
1091 /* op2 is a register or memory reference */
1092 if (i->modC0()) {
1093 op2 = BX_READ_XMM_REG(i->rm());
1095 else {
1096 /* pointer, segment address pair */
1097 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1100 if (mask & 0x1) op1.xmm64u(0) = op2.xmm64u(0);
1101 if (mask & 0x2) op1.xmm64u(1) = op2.xmm64u(1);
1103 /* now write result back to destination */
1104 BX_WRITE_XMM_REG(i->nnn(), op1);
1105 #else
1106 BX_INFO(("BLENDPD_VpdWpdIb: required SSE4, use --enable-sse option"));
1107 UndefinedOpcode(i);
1108 #endif
1111 /* 66 0F 3A 0E */
1112 void BX_CPU_C::PBLENDW_VdqWdqIb(bxInstruction_c *i)
1114 #if BX_SUPPORT_SSE >= 4
1115 BX_CPU_THIS_PTR prepareSSE();
1117 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1118 Bit8u mask = i->Ib();
1120 /* op2 is a register or memory reference */
1121 if (i->modC0()) {
1122 op2 = BX_READ_XMM_REG(i->rm());
1124 else {
1125 /* pointer, segment address pair */
1126 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1129 if (mask & 0x01) op1.xmm16u(0) = op2.xmm16u(0);
1130 if (mask & 0x02) op1.xmm16u(1) = op2.xmm16u(1);
1131 if (mask & 0x04) op1.xmm16u(2) = op2.xmm16u(2);
1132 if (mask & 0x08) op1.xmm16u(3) = op2.xmm16u(3);
1133 if (mask & 0x10) op1.xmm16u(4) = op2.xmm16u(4);
1134 if (mask & 0x20) op1.xmm16u(5) = op2.xmm16u(5);
1135 if (mask & 0x40) op1.xmm16u(6) = op2.xmm16u(6);
1136 if (mask & 0x80) op1.xmm16u(7) = op2.xmm16u(7);
1138 /* now write result back to destination */
1139 BX_WRITE_XMM_REG(i->nnn(), op1);
1140 #else
1141 BX_INFO(("PBLENDW_VdqWdqIb: required SSE4, use --enable-sse option"));
1142 UndefinedOpcode(i);
1143 #endif
1146 /* 66 0F 3A 14 */
1147 void BX_CPU_C::PEXTRB_HbdUdqIb(bxInstruction_c *i)
1149 #if BX_SUPPORT_SSE >= 4
1150 BX_CPU_THIS_PTR prepareSSE();
1152 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
1153 Bit8u result = op.xmmubyte(i->Ib() & 0xF);
1155 /* result is a register or memory reference */
1156 if (i->modC0()) {
1157 BX_WRITE_32BIT_REGZ(i->nnn(), result);
1159 else {
1160 write_virtual_byte(i->seg(), RMAddr(i), &result);
1162 #else
1163 BX_INFO(("PEXTRB_HbdUdqIb: required SSE4, use --enable-sse option"));
1164 UndefinedOpcode(i);
1165 #endif
1168 /* 66 0F 3A 15 */
1169 void BX_CPU_C::PEXTRW_HwdUdqIb(bxInstruction_c *i)
1171 #if BX_SUPPORT_SSE >= 4
1172 BX_CPU_THIS_PTR prepareSSE();
1174 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
1175 Bit16u result = op.xmm16u(i->Ib() & 7);
1177 /* result is a register or memory reference */
1178 if (i->modC0()) {
1179 BX_WRITE_32BIT_REGZ(i->nnn(), result);
1181 else {
1182 write_virtual_word(i->seg(), RMAddr(i), &result);
1184 #else
1185 BX_INFO(("PEXTRW_HwdUdqIb: required SSE4, use --enable-sse option"));
1186 UndefinedOpcode(i);
1187 #endif
1190 /* 66 0F 3A 16 */
1191 void BX_CPU_C::PEXTRD_HdUdqIb(bxInstruction_c *i)
1193 #if BX_SUPPORT_SSE >= 4
1194 BX_CPU_THIS_PTR prepareSSE();
1196 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
1198 #if BX_SUPPORT_X86_64
1199 if (i->os64L()) /* 64 bit operand size mode */
1201 Bit64u result = op.xmm64u(i->Ib() & 1);
1203 /* result is a register or memory reference */
1204 if (i->modC0()) {
1205 BX_WRITE_64BIT_REG(i->nnn(), result);
1207 else {
1208 write_virtual_qword(i->seg(), RMAddr(i), &result);
1211 else
1212 #endif
1214 Bit32u result = op.xmm32u(i->Ib() & 3);
1216 /* result is a register or memory reference */
1217 if (i->modC0()) {
1218 BX_WRITE_32BIT_REGZ(i->nnn(), result);
1220 else {
1221 write_virtual_dword(i->seg(), RMAddr(i), &result);
1224 #else
1225 BX_INFO(("PEXTRD_HdUdqIb: required SSE4, use --enable-sse option"));
1226 UndefinedOpcode(i);
1227 #endif
1230 /* 66 0F 3A 17 */
1231 void BX_CPU_C::EXTRACTPS_HdUpsIb(bxInstruction_c *i)
1233 #if BX_SUPPORT_SSE >= 4
1234 BX_CPU_THIS_PTR prepareSSE();
1236 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
1237 Bit32u result = op.xmm32u(i->Ib() & 3);
1239 /* result is a register or memory reference */
1240 if (i->modC0()) {
1241 BX_WRITE_32BIT_REGZ(i->nnn(), result);
1243 else {
1244 write_virtual_dword(i->seg(), RMAddr(i), &result);
1246 #else
1247 BX_INFO(("EXTRACTPS_HdUpsIb: required SSE4, use --enable-sse option"));
1248 UndefinedOpcode(i);
1249 #endif
1252 /* 66 0F 3A 20 */
1253 void BX_CPU_C::PINSRB_VdqEbIb(bxInstruction_c *i)
1255 #if BX_SUPPORT_SSE >= 4
1256 BX_CPU_THIS_PTR prepareSSE();
1258 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
1259 Bit8u op2;
1261 /* op2 is a register or memory reference */
1262 if (i->modC0()) {
1263 op2 = BX_READ_16BIT_REG(i->rm()); // won't allow reading of AH/CH/BH/DH
1265 else {
1266 /* pointer, segment address pair */
1267 read_virtual_byte(i->seg(), RMAddr(i), &op2);
1270 op1.xmmubyte(i->Ib() & 0xF) = op2;
1272 /* now write result back to destination */
1273 BX_WRITE_XMM_REG(i->nnn(), op1);
1274 #else
1275 BX_INFO(("PINSRB_VdqEbIb: required SSE4, use --enable-sse option"));
1276 UndefinedOpcode(i);
1277 #endif
1280 /* 66 0F 3A 21 */
1281 void BX_CPU_C::INSERTPS_VpsWssIb(bxInstruction_c *i)
1283 #if BX_SUPPORT_SSE >= 4
1284 BX_CPU_THIS_PTR prepareSSE();
1286 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
1287 Bit8u control = i->Ib();
1288 Bit32u op2;
1290 /* op2 is a register or memory reference */
1291 if (i->modC0()) {
1292 BxPackedXmmRegister temp = BX_READ_XMM_REG(i->rm());
1293 op2 = temp.xmm32u((control >> 6) & 3);
1295 else {
1296 /* pointer, segment address pair */
1297 read_virtual_dword(i->seg(), RMAddr(i), &op2);
1300 op1.xmm32u((control >> 4) & 3) = op2;
1302 if (control & 1) op1.xmm32u(0) = 0;
1303 if (control & 2) op1.xmm32u(1) = 0;
1304 if (control & 4) op1.xmm32u(2) = 0;
1305 if (control & 8) op1.xmm32u(3) = 0;
1307 /* now write result back to destination */
1308 BX_WRITE_XMM_REG(i->nnn(), op1);
1309 #else
1310 BX_INFO(("INSERTPS_VpsWssIb: required SSE4, use --enable-sse option"));
1311 UndefinedOpcode(i);
1312 #endif
1315 /* 66 0F 3A 22 */
1316 void BX_CPU_C::PINSRD_VdqEdIb(bxInstruction_c *i)
1318 #if BX_SUPPORT_SSE >= 4
1319 BX_CPU_THIS_PTR prepareSSE();
1321 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
1323 #if BX_SUPPORT_X86_64
1324 if (i->os64L()) /* 64 bit operand size mode */
1326 Bit64u op2;
1328 /* op2 is a register or memory reference */
1329 if (i->modC0()) {
1330 op2 = BX_READ_64BIT_REG(i->rm());
1332 else {
1333 /* pointer, segment address pair */
1334 read_virtual_qword(i->seg(), RMAddr(i), &op2);
1337 op1.xmm64u(i->Ib() & 1) = op2;
1339 else
1340 #endif
1342 Bit32u op2;
1344 /* op2 is a register or memory reference */
1345 if (i->modC0()) {
1346 op2 = BX_READ_32BIT_REG(i->rm());
1348 else {
1349 /* pointer, segment address pair */
1350 read_virtual_dword(i->seg(), RMAddr(i), &op2);
1353 op1.xmm32u(i->Ib() & 3) = op2;
1356 /* now write result back to destination */
1357 BX_WRITE_XMM_REG(i->nnn(), op1);
1358 #else
1359 BX_INFO(("PINSRD_VdqEdIb: required SSE4, use --enable-sse option"));
1360 UndefinedOpcode(i);
1361 #endif
1364 /* 66 0F 3A 42 */
1365 void BX_CPU_C::MPSADBW_VdqWdqIb(bxInstruction_c *i)
1367 #if BX_SUPPORT_SSE >= 4
1368 BX_CPU_THIS_PTR prepareSSE();
1370 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1372 /* op2 is a register or memory reference */
1373 if (i->modC0()) {
1374 op2 = BX_READ_XMM_REG(i->rm());
1376 else {
1377 /* pointer, segment address pair */
1378 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1381 unsigned src_offset = (i->Ib() & 3) * 4;
1382 unsigned dst_offset = ((i->Ib() >> 2) & 1) * 4;
1384 for (unsigned j=0; j < 8; j++)
1386 result.xmm16u(j) = 0;
1388 for (unsigned k=0; k < 4; k++) {
1389 Bit8u temp1 = op1.xmmubyte(j + k + dst_offset);
1390 Bit8u temp2 = op2.xmmubyte( k + src_offset);
1391 if (temp1 > temp2)
1392 result.xmm16u(j) += (temp1 - temp2);
1393 else
1394 result.xmm16u(j) += (temp2 - temp1);
1398 BX_WRITE_XMM_REG(i->nnn(), result);
1399 #else
1400 BX_INFO(("MPSADBW_VdqWdqIb: required SSE4, use --enable-sse option"));
1401 UndefinedOpcode(i);
1402 #endif
1405 #endif // (BX_SUPPORT_SSE >= 4 || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
1407 /* 66 0F 60 */
1408 void BX_CPU_C::PUNPCKLBW_VdqWq(bxInstruction_c *i)
1410 #if BX_SUPPORT_SSE >= 2
1411 BX_CPU_THIS_PTR prepareSSE();
1413 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1415 /* op2 is a register or memory reference */
1416 if (i->modC0()) {
1417 op2 = BX_READ_XMM_REG(i->rm());
1419 else {
1420 /* pointer, segment address pair */
1421 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1424 result.xmmubyte(0x0) = op1.xmmubyte(0);
1425 result.xmmubyte(0x1) = op2.xmmubyte(0);
1426 result.xmmubyte(0x2) = op1.xmmubyte(1);
1427 result.xmmubyte(0x3) = op2.xmmubyte(1);
1428 result.xmmubyte(0x4) = op1.xmmubyte(2);
1429 result.xmmubyte(0x5) = op2.xmmubyte(2);
1430 result.xmmubyte(0x6) = op1.xmmubyte(3);
1431 result.xmmubyte(0x7) = op2.xmmubyte(3);
1432 result.xmmubyte(0x8) = op1.xmmubyte(4);
1433 result.xmmubyte(0x9) = op2.xmmubyte(4);
1434 result.xmmubyte(0xA) = op1.xmmubyte(5);
1435 result.xmmubyte(0xB) = op2.xmmubyte(5);
1436 result.xmmubyte(0xC) = op1.xmmubyte(6);
1437 result.xmmubyte(0xD) = op2.xmmubyte(6);
1438 result.xmmubyte(0xE) = op1.xmmubyte(7);
1439 result.xmmubyte(0xF) = op2.xmmubyte(7);
1441 /* now write result back to destination */
1442 BX_WRITE_XMM_REG(i->nnn(), result);
1443 #else
1444 BX_INFO(("PUNPCKLBW_VdqWq: required SSE2, use --enable-sse option"));
1445 UndefinedOpcode(i);
1446 #endif
1449 /* 66 0F 61 */
1450 void BX_CPU_C::PUNPCKLWD_VdqWq(bxInstruction_c *i)
1452 #if BX_SUPPORT_SSE >= 2
1453 BX_CPU_THIS_PTR prepareSSE();
1455 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1457 /* op2 is a register or memory reference */
1458 if (i->modC0()) {
1459 op2 = BX_READ_XMM_REG(i->rm());
1461 else {
1462 /* pointer, segment address pair */
1463 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1466 result.xmm16u(0) = op1.xmm16u(0);
1467 result.xmm16u(1) = op2.xmm16u(0);
1468 result.xmm16u(2) = op1.xmm16u(1);
1469 result.xmm16u(3) = op2.xmm16u(1);
1470 result.xmm16u(4) = op1.xmm16u(2);
1471 result.xmm16u(5) = op2.xmm16u(2);
1472 result.xmm16u(6) = op1.xmm16u(3);
1473 result.xmm16u(7) = op2.xmm16u(3);
1475 /* now write result back to destination */
1476 BX_WRITE_XMM_REG(i->nnn(), result);
1477 #else
1478 BX_INFO(("PUNPCKLWD_VdqWq: required SSE2, use --enable-sse option"));
1479 UndefinedOpcode(i);
1480 #endif
1483 /* UNPCKLPS: 0F 14 */
1484 /* PUNPCKLDQ: 66 0F 62 */
1485 void BX_CPU_C::UNPCKLPS_VpsWq(bxInstruction_c *i)
1487 #if BX_SUPPORT_SSE >= 1
1488 BX_CPU_THIS_PTR prepareSSE();
1490 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1492 /* op2 is a register or memory reference */
1493 if (i->modC0()) {
1494 op2 = BX_READ_XMM_REG(i->rm());
1496 else {
1497 /* pointer, segment address pair */
1498 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1501 result.xmm32u(0) = op1.xmm32u(0);
1502 result.xmm32u(1) = op2.xmm32u(0);
1503 result.xmm32u(2) = op1.xmm32u(1);
1504 result.xmm32u(3) = op2.xmm32u(1);
1506 /* now write result back to destination */
1507 BX_WRITE_XMM_REG(i->nnn(), result);
1508 #else
1509 BX_INFO(("UNPCKLPS_VpsWq: required SSE, use --enable-sse option"));
1510 UndefinedOpcode(i);
1511 #endif
1514 /* 66 0F 63 */
1515 void BX_CPU_C::PACKSSWB_VdqWq(bxInstruction_c *i)
1517 #if BX_SUPPORT_SSE >= 2
1518 BX_CPU_THIS_PTR prepareSSE();
1520 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1522 /* op2 is a register or memory reference */
1523 if (i->modC0()) {
1524 op2 = BX_READ_XMM_REG(i->rm());
1526 else {
1527 /* pointer, segment address pair */
1528 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1531 result.xmmsbyte(0x0) = SaturateWordSToByteS(op1.xmm16s(0));
1532 result.xmmsbyte(0x1) = SaturateWordSToByteS(op1.xmm16s(1));
1533 result.xmmsbyte(0x2) = SaturateWordSToByteS(op1.xmm16s(2));
1534 result.xmmsbyte(0x3) = SaturateWordSToByteS(op1.xmm16s(3));
1535 result.xmmsbyte(0x4) = SaturateWordSToByteS(op1.xmm16s(4));
1536 result.xmmsbyte(0x5) = SaturateWordSToByteS(op1.xmm16s(5));
1537 result.xmmsbyte(0x6) = SaturateWordSToByteS(op1.xmm16s(6));
1538 result.xmmsbyte(0x7) = SaturateWordSToByteS(op1.xmm16s(7));
1540 result.xmmsbyte(0x8) = SaturateWordSToByteS(op2.xmm16s(0));
1541 result.xmmsbyte(0x9) = SaturateWordSToByteS(op2.xmm16s(1));
1542 result.xmmsbyte(0xA) = SaturateWordSToByteS(op2.xmm16s(2));
1543 result.xmmsbyte(0xB) = SaturateWordSToByteS(op2.xmm16s(3));
1544 result.xmmsbyte(0xC) = SaturateWordSToByteS(op2.xmm16s(4));
1545 result.xmmsbyte(0xD) = SaturateWordSToByteS(op2.xmm16s(5));
1546 result.xmmsbyte(0xE) = SaturateWordSToByteS(op2.xmm16s(6));
1547 result.xmmsbyte(0xF) = SaturateWordSToByteS(op2.xmm16s(7));
1549 /* now write result back to destination */
1550 BX_WRITE_XMM_REG(i->nnn(), result);
1551 #else
1552 BX_INFO(("PACKSSWB_VdqWq: required SSE2, use --enable-sse option"));
1553 UndefinedOpcode(i);
1554 #endif
1557 /* 66 0F 64 */
1558 void BX_CPU_C::PCMPGTB_VdqWq(bxInstruction_c *i)
1560 #if BX_SUPPORT_SSE >= 2
1561 BX_CPU_THIS_PTR prepareSSE();
1563 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1565 /* op2 is a register or memory reference */
1566 if (i->modC0()) {
1567 op2 = BX_READ_XMM_REG(i->rm());
1569 else {
1570 /* pointer, segment address pair */
1571 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1574 for(unsigned j=0; j<16; j++) {
1575 op1.xmmsbyte(j) = (op1.xmmsbyte(j) > op2.xmmsbyte(j)) ? 0xff : 0;
1578 /* now write result back to destination */
1579 BX_WRITE_XMM_REG(i->nnn(), op1);
1580 #else
1581 BX_INFO(("PCMPGTB_VdqWq: required SSE2, use --enable-sse option"));
1582 UndefinedOpcode(i);
1583 #endif
1586 /* 66 0F 65 */
1587 void BX_CPU_C::PCMPGTW_VdqWq(bxInstruction_c *i)
1589 #if BX_SUPPORT_SSE >= 2
1590 BX_CPU_THIS_PTR prepareSSE();
1592 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1594 /* op2 is a register or memory reference */
1595 if (i->modC0()) {
1596 op2 = BX_READ_XMM_REG(i->rm());
1598 else {
1599 /* pointer, segment address pair */
1600 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1603 op1.xmm16s(0) = (op1.xmm16s(0) > op2.xmm16s(0)) ? 0xffff : 0;
1604 op1.xmm16s(1) = (op1.xmm16s(1) > op2.xmm16s(1)) ? 0xffff : 0;
1605 op1.xmm16s(2) = (op1.xmm16s(2) > op2.xmm16s(2)) ? 0xffff : 0;
1606 op1.xmm16s(3) = (op1.xmm16s(3) > op2.xmm16s(3)) ? 0xffff : 0;
1607 op1.xmm16s(4) = (op1.xmm16s(4) > op2.xmm16s(4)) ? 0xffff : 0;
1608 op1.xmm16s(5) = (op1.xmm16s(5) > op2.xmm16s(5)) ? 0xffff : 0;
1609 op1.xmm16s(6) = (op1.xmm16s(6) > op2.xmm16s(6)) ? 0xffff : 0;
1610 op1.xmm16s(7) = (op1.xmm16s(7) > op2.xmm16s(7)) ? 0xffff : 0;
1612 /* now write result back to destination */
1613 BX_WRITE_XMM_REG(i->nnn(), op1);
1614 #else
1615 BX_INFO(("PCMPGTW_VdqWq: required SSE2, use --enable-sse option"));
1616 UndefinedOpcode(i);
1617 #endif
1620 /* 66 0F 66 */
1621 void BX_CPU_C::PCMPGTD_VdqWdq(bxInstruction_c *i)
1623 #if BX_SUPPORT_SSE >= 2
1624 BX_CPU_THIS_PTR prepareSSE();
1626 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1628 /* op2 is a register or memory reference */
1629 if (i->modC0()) {
1630 op2 = BX_READ_XMM_REG(i->rm());
1632 else {
1633 /* pointer, segment address pair */
1634 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1637 op1.xmm32s(0) = (op1.xmm32s(0) > op2.xmm32s(0)) ? 0xffffffff : 0;
1638 op1.xmm32s(1) = (op1.xmm32s(1) > op2.xmm32s(1)) ? 0xffffffff : 0;
1639 op1.xmm32s(2) = (op1.xmm32s(2) > op2.xmm32s(2)) ? 0xffffffff : 0;
1640 op1.xmm32s(3) = (op1.xmm32s(3) > op2.xmm32s(3)) ? 0xffffffff : 0;
1642 /* now write result back to destination */
1643 BX_WRITE_XMM_REG(i->nnn(), op1);
1644 #else
1645 BX_INFO(("PCMPGTD_VdqWdq: required SSE2, use --enable-sse option"));
1646 UndefinedOpcode(i);
1647 #endif
1650 /* 66 0F 67 */
1651 void BX_CPU_C::PACKUSWB_VdqWdq(bxInstruction_c *i)
1653 #if BX_SUPPORT_SSE >= 2
1654 BX_CPU_THIS_PTR prepareSSE();
1656 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1658 /* op2 is a register or memory reference */
1659 if (i->modC0()) {
1660 op2 = BX_READ_XMM_REG(i->rm());
1662 else {
1663 /* pointer, segment address pair */
1664 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1667 result.xmmubyte(0x0) = SaturateWordSToByteU(op1.xmm16s(0));
1668 result.xmmubyte(0x1) = SaturateWordSToByteU(op1.xmm16s(1));
1669 result.xmmubyte(0x2) = SaturateWordSToByteU(op1.xmm16s(2));
1670 result.xmmubyte(0x3) = SaturateWordSToByteU(op1.xmm16s(3));
1671 result.xmmubyte(0x4) = SaturateWordSToByteU(op1.xmm16s(4));
1672 result.xmmubyte(0x5) = SaturateWordSToByteU(op1.xmm16s(5));
1673 result.xmmubyte(0x6) = SaturateWordSToByteU(op1.xmm16s(6));
1674 result.xmmubyte(0x7) = SaturateWordSToByteU(op1.xmm16s(7));
1676 result.xmmubyte(0x8) = SaturateWordSToByteU(op2.xmm16s(0));
1677 result.xmmubyte(0x9) = SaturateWordSToByteU(op2.xmm16s(1));
1678 result.xmmubyte(0xA) = SaturateWordSToByteU(op2.xmm16s(2));
1679 result.xmmubyte(0xB) = SaturateWordSToByteU(op2.xmm16s(3));
1680 result.xmmubyte(0xC) = SaturateWordSToByteU(op2.xmm16s(4));
1681 result.xmmubyte(0xD) = SaturateWordSToByteU(op2.xmm16s(5));
1682 result.xmmubyte(0xE) = SaturateWordSToByteU(op2.xmm16s(6));
1683 result.xmmubyte(0xF) = SaturateWordSToByteU(op2.xmm16s(7));
1685 /* now write result back to destination */
1686 BX_WRITE_XMM_REG(i->nnn(), result);
1687 #else
1688 BX_INFO(("PACKUSWB_VdqWdq: required SSE2, use --enable-sse option"));
1689 UndefinedOpcode(i);
1690 #endif
1693 /* 66 0F 68 */
1694 void BX_CPU_C::PUNPCKHBW_VdqWq(bxInstruction_c *i)
1696 #if BX_SUPPORT_SSE >= 2
1697 BX_CPU_THIS_PTR prepareSSE();
1699 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1701 /* op2 is a register or memory reference */
1702 if (i->modC0()) {
1703 op2 = BX_READ_XMM_REG(i->rm());
1705 else {
1706 /* pointer, segment address pair */
1707 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1710 result.xmmubyte(0x0) = op1.xmmubyte(0x8);
1711 result.xmmubyte(0x1) = op2.xmmubyte(0x8);
1712 result.xmmubyte(0x2) = op1.xmmubyte(0x9);
1713 result.xmmubyte(0x3) = op2.xmmubyte(0x9);
1714 result.xmmubyte(0x4) = op1.xmmubyte(0xA);
1715 result.xmmubyte(0x5) = op2.xmmubyte(0xA);
1716 result.xmmubyte(0x6) = op1.xmmubyte(0xB);
1717 result.xmmubyte(0x7) = op2.xmmubyte(0xB);
1718 result.xmmubyte(0x8) = op1.xmmubyte(0xC);
1719 result.xmmubyte(0x9) = op2.xmmubyte(0xC);
1720 result.xmmubyte(0xA) = op1.xmmubyte(0xD);
1721 result.xmmubyte(0xB) = op2.xmmubyte(0xD);
1722 result.xmmubyte(0xC) = op1.xmmubyte(0xE);
1723 result.xmmubyte(0xD) = op2.xmmubyte(0xE);
1724 result.xmmubyte(0xE) = op1.xmmubyte(0xF);
1725 result.xmmubyte(0xF) = op2.xmmubyte(0xF);
1727 /* now write result back to destination */
1728 BX_WRITE_XMM_REG(i->nnn(), result);
1729 #else
1730 BX_INFO(("PUNPCKHBW_VdqWq: required SSE2, use --enable-sse option"));
1731 UndefinedOpcode(i);
1732 #endif
1735 /* 66 0F 69 */
1736 void BX_CPU_C::PUNPCKHWD_VdqWq(bxInstruction_c *i)
1738 #if BX_SUPPORT_SSE >= 2
1739 BX_CPU_THIS_PTR prepareSSE();
1741 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1743 /* op2 is a register or memory reference */
1744 if (i->modC0()) {
1745 op2 = BX_READ_XMM_REG(i->rm());
1747 else {
1748 /* pointer, segment address pair */
1749 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1752 result.xmm16u(0) = op1.xmm16u(4);
1753 result.xmm16u(1) = op2.xmm16u(4);
1754 result.xmm16u(2) = op1.xmm16u(5);
1755 result.xmm16u(3) = op2.xmm16u(5);
1756 result.xmm16u(4) = op1.xmm16u(6);
1757 result.xmm16u(5) = op2.xmm16u(6);
1758 result.xmm16u(6) = op1.xmm16u(7);
1759 result.xmm16u(7) = op2.xmm16u(7);
1761 /* now write result back to destination */
1762 BX_WRITE_XMM_REG(i->nnn(), result);
1763 #else
1764 BX_INFO(("PUNPCKHWD_VdqWq: required SSE2, use --enable-sse option"));
1765 UndefinedOpcode(i);
1766 #endif
1769 /* UNPCKHPS: 0F 15 */
1770 /* PUNPCKHDQ: 66 0F 6A */
1771 void BX_CPU_C::UNPCKHPS_VpsWq(bxInstruction_c *i)
1773 #if BX_SUPPORT_SSE >= 1
1774 BX_CPU_THIS_PTR prepareSSE();
1776 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1778 /* op2 is a register or memory reference */
1779 if (i->modC0()) {
1780 op2 = BX_READ_XMM_REG(i->rm());
1782 else {
1783 /* pointer, segment address pair */
1784 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1787 result.xmm32u(0) = op1.xmm32u(2);
1788 result.xmm32u(1) = op2.xmm32u(2);
1789 result.xmm32u(2) = op1.xmm32u(3);
1790 result.xmm32u(3) = op2.xmm32u(3);
1792 /* now write result back to destination */
1793 BX_WRITE_XMM_REG(i->nnn(), result);
1794 #else
1795 BX_INFO(("UNPCKHPS_VpsWq: required SSE, use --enable-sse option"));
1796 UndefinedOpcode(i);
1797 #endif
1800 /* 66 0F 6B */
1801 void BX_CPU_C::PACKSSDW_VdqWdq(bxInstruction_c *i)
1803 #if BX_SUPPORT_SSE >= 2
1804 BX_CPU_THIS_PTR prepareSSE();
1806 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1808 /* op2 is a register or memory reference */
1809 if (i->modC0()) {
1810 op2 = BX_READ_XMM_REG(i->rm());
1812 else {
1813 /* pointer, segment address pair */
1814 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1817 result.xmm16s(0) = SaturateDwordSToWordS(op1.xmm32s(0));
1818 result.xmm16s(1) = SaturateDwordSToWordS(op1.xmm32s(1));
1819 result.xmm16s(2) = SaturateDwordSToWordS(op1.xmm32s(2));
1820 result.xmm16s(3) = SaturateDwordSToWordS(op1.xmm32s(3));
1822 result.xmm16s(4) = SaturateDwordSToWordS(op2.xmm32s(0));
1823 result.xmm16s(5) = SaturateDwordSToWordS(op2.xmm32s(1));
1824 result.xmm16s(6) = SaturateDwordSToWordS(op2.xmm32s(2));
1825 result.xmm16s(7) = SaturateDwordSToWordS(op2.xmm32s(3));
1827 /* now write result back to destination */
1828 BX_WRITE_XMM_REG(i->nnn(), result);
1829 #else
1830 BX_INFO(("PACKSSDW_VdqWdq: required SSE2, use --enable-sse option"));
1831 UndefinedOpcode(i);
1832 #endif
1834 /* UNPCKLPD: 66 0F 14 */
1835 /* PUNPCKLQDQ: 66 0F 6C */
1836 void BX_CPU_C::PUNPCKLQDQ_VdqWq(bxInstruction_c *i)
1838 #if BX_SUPPORT_SSE >= 2
1839 BX_CPU_THIS_PTR prepareSSE();
1841 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1843 /* op2 is a register or memory reference */
1844 if (i->modC0()) {
1845 op2 = BX_READ_XMM_REG(i->rm());
1847 else {
1848 /* pointer, segment address pair */
1849 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1852 op1.xmm64u(1) = op2.xmm64u(0);
1854 /* now write result back to destination */
1855 BX_WRITE_XMM_REG(i->nnn(), op1);
1856 #else
1857 BX_INFO(("PUNPCKLQDQ_VdqWq: required SSE2, use --enable-sse option"));
1858 UndefinedOpcode(i);
1859 #endif
1862 /* UNPCKHPD: 66 0F 15 */
1863 /* PUNPCKHQDQ: 66 0F 6D */
1864 void BX_CPU_C::PUNPCKHQDQ_VdqWq(bxInstruction_c *i)
1866 #if BX_SUPPORT_SSE >= 2
1867 BX_CPU_THIS_PTR prepareSSE();
1869 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1871 /* op2 is a register or memory reference */
1872 if (i->modC0()) {
1873 op2 = BX_READ_XMM_REG(i->rm());
1875 else {
1876 /* pointer, segment address pair */
1877 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1880 result.xmm64u(0) = op1.xmm64u(1);
1881 result.xmm64u(1) = op2.xmm64u(1);
1883 /* now write result back to destination */
1884 BX_WRITE_XMM_REG(i->nnn(), result);
1885 #else
1886 BX_INFO(("PUNPCKHQDQ_VdqWq: required SSE2, use --enable-sse option"));
1887 UndefinedOpcode(i);
1888 #endif
1891 /* 66 0F 70 */
1892 void BX_CPU_C::PSHUFD_VdqWdqIb(bxInstruction_c *i)
1894 #if BX_SUPPORT_SSE >= 2
1895 BX_CPU_THIS_PTR prepareSSE();
1897 BxPackedXmmRegister op, result;
1898 Bit8u order = i->Ib();
1900 /* op is a register or memory reference */
1901 if (i->modC0()) {
1902 op = BX_READ_XMM_REG(i->rm());
1904 else {
1905 /* pointer, segment address pair */
1906 read_virtual_dqword(i->seg(), RMAddr(i), (Bit8u *) &op);
1909 result.xmm32u(0) = op.xmm32u((order >> 0) & 0x3);
1910 result.xmm32u(1) = op.xmm32u((order >> 2) & 0x3);
1911 result.xmm32u(2) = op.xmm32u((order >> 4) & 0x3);
1912 result.xmm32u(3) = op.xmm32u((order >> 6) & 0x3);
1914 /* now write result back to destination */
1915 BX_WRITE_XMM_REG(i->nnn(), result);
1916 #else
1917 BX_INFO(("PSHUFD_VdqWdqIb: required SSE2, use --enable-sse option"));
1918 UndefinedOpcode(i);
1919 #endif
1922 /* F2 0F 70 */
1923 void BX_CPU_C::PSHUFHW_VqWqIb(bxInstruction_c *i)
1925 #if BX_SUPPORT_SSE >= 2
1926 BX_CPU_THIS_PTR prepareSSE();
1928 BxPackedXmmRegister op, result;
1929 Bit8u order = i->Ib();
1931 /* op is a register or memory reference */
1932 if (i->modC0()) {
1933 op = BX_READ_XMM_REG(i->rm());
1935 else {
1936 /* pointer, segment address pair */
1937 read_virtual_dqword(i->seg(), RMAddr(i), (Bit8u *) &op);
1940 result.xmm64u(0) = op.xmm64u(0);
1941 result.xmm16u(4) = op.xmm16u(4 + ((order >> 0) & 0x3));
1942 result.xmm16u(5) = op.xmm16u(4 + ((order >> 2) & 0x3));
1943 result.xmm16u(6) = op.xmm16u(4 + ((order >> 4) & 0x3));
1944 result.xmm16u(7) = op.xmm16u(4 + ((order >> 6) & 0x3));
1946 /* now write result back to destination */
1947 BX_WRITE_XMM_REG(i->nnn(), result);
1948 #else
1949 BX_INFO(("PSHUFHW_VqWqIb: required SSE2, use --enable-sse option"));
1950 UndefinedOpcode(i);
1951 #endif
1954 /* F3 0F 70 */
1955 void BX_CPU_C::PSHUFLW_VqWqIb(bxInstruction_c *i)
1957 #if BX_SUPPORT_SSE >= 2
1958 BX_CPU_THIS_PTR prepareSSE();
1960 BxPackedXmmRegister op, result;
1961 Bit8u order = i->Ib();
1963 /* op is a register or memory reference */
1964 if (i->modC0()) {
1965 op = BX_READ_XMM_REG(i->rm());
1967 else {
1968 /* pointer, segment address pair */
1969 read_virtual_dqword(i->seg(), RMAddr(i), (Bit8u *) &op);
1972 result.xmm16u(0) = op.xmm16u((order >> 0) & 0x3);
1973 result.xmm16u(1) = op.xmm16u((order >> 2) & 0x3);
1974 result.xmm16u(2) = op.xmm16u((order >> 4) & 0x3);
1975 result.xmm16u(3) = op.xmm16u((order >> 6) & 0x3);
1976 result.xmm64u(1) = op.xmm64u(1);
1978 /* now write result back to destination */
1979 BX_WRITE_XMM_REG(i->nnn(), result);
1980 #else
1981 BX_INFO(("PSHUFLW_VqWqIb: required SSE, use --enable-sse option"));
1982 UndefinedOpcode(i);
1983 #endif
1986 /* 66 0F 74 */
1987 void BX_CPU_C::PCMPEQB_VdqWdq(bxInstruction_c *i)
1989 #if BX_SUPPORT_SSE >= 2
1990 BX_CPU_THIS_PTR prepareSSE();
1992 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1994 /* op2 is a register or memory reference */
1995 if (i->modC0()) {
1996 op2 = BX_READ_XMM_REG(i->rm());
1998 else {
1999 /* pointer, segment address pair */
2000 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2003 for(unsigned j=0; j<16; j++) {
2004 op1.xmmubyte(j) = (op1.xmmubyte(j) == op2.xmmubyte(j)) ? 0xff : 0;
2007 /* now write result back to destination */
2008 BX_WRITE_XMM_REG(i->nnn(), op1);
2009 #else
2010 BX_INFO(("PCMPEQB_VdqWdq: required SSE2, use --enable-sse option"));
2011 UndefinedOpcode(i);
2012 #endif
2015 /* 66 0F 75 */
2016 void BX_CPU_C::PCMPEQW_VdqWdq(bxInstruction_c *i)
2018 #if BX_SUPPORT_SSE >= 2
2019 BX_CPU_THIS_PTR prepareSSE();
2021 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2023 /* op2 is a register or memory reference */
2024 if (i->modC0()) {
2025 op2 = BX_READ_XMM_REG(i->rm());
2027 else {
2028 /* pointer, segment address pair */
2029 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2032 op1.xmm16u(0) = (op1.xmm16u(0) == op2.xmm16u(0)) ? 0xffff : 0;
2033 op1.xmm16u(1) = (op1.xmm16u(1) == op2.xmm16u(1)) ? 0xffff : 0;
2034 op1.xmm16u(2) = (op1.xmm16u(2) == op2.xmm16u(2)) ? 0xffff : 0;
2035 op1.xmm16u(3) = (op1.xmm16u(3) == op2.xmm16u(3)) ? 0xffff : 0;
2036 op1.xmm16u(4) = (op1.xmm16u(4) == op2.xmm16u(4)) ? 0xffff : 0;
2037 op1.xmm16u(5) = (op1.xmm16u(5) == op2.xmm16u(5)) ? 0xffff : 0;
2038 op1.xmm16u(6) = (op1.xmm16u(6) == op2.xmm16u(6)) ? 0xffff : 0;
2039 op1.xmm16u(7) = (op1.xmm16u(7) == op2.xmm16u(7)) ? 0xffff : 0;
2041 /* now write result back to destination */
2042 BX_WRITE_XMM_REG(i->nnn(), op1);
2043 #else
2044 BX_INFO(("PCMPEQW_VdqWdq: required SSE2, use --enable-sse option"));
2045 UndefinedOpcode(i);
2046 #endif
2049 /* 66 0F 76 */
2050 void BX_CPU_C::PCMPEQD_VdqWdq(bxInstruction_c *i)
2052 #if BX_SUPPORT_SSE >= 2
2053 BX_CPU_THIS_PTR prepareSSE();
2055 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2057 /* op2 is a register or memory reference */
2058 if (i->modC0()) {
2059 op2 = BX_READ_XMM_REG(i->rm());
2061 else {
2062 /* pointer, segment address pair */
2063 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2066 op1.xmm32u(0) = (op1.xmm32u(0) == op2.xmm32u(0)) ? 0xffffffff : 0;
2067 op1.xmm32u(1) = (op1.xmm32u(1) == op2.xmm32u(1)) ? 0xffffffff : 0;
2068 op1.xmm32u(2) = (op1.xmm32u(2) == op2.xmm32u(2)) ? 0xffffffff : 0;
2069 op1.xmm32u(3) = (op1.xmm32u(3) == op2.xmm32u(3)) ? 0xffffffff : 0;
2071 /* now write result back to destination */
2072 BX_WRITE_XMM_REG(i->nnn(), op1);
2073 #else
2074 BX_INFO(("PCMPEQD_VdqWdq: required SSE2, use --enable-sse option"));
2075 UndefinedOpcode(i);
2076 #endif
2079 /* 66 0F C4 */
2080 void BX_CPU_C::PINSRW_VdqEwIb(bxInstruction_c *i)
2082 #if BX_SUPPORT_SSE >= 2
2083 BX_CPU_THIS_PTR prepareSSE();
2085 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
2086 Bit16u op2;
2087 Bit8u count = i->Ib() & 0x7;
2089 /* op2 is a register or memory reference */
2090 if (i->modC0()) {
2091 op2 = BX_READ_16BIT_REG(i->rm());
2093 else {
2094 /* pointer, segment address pair */
2095 read_virtual_word(i->seg(), RMAddr(i), &op2);
2098 op1.xmm16u(count) = op2;
2100 /* now write result back to destination */
2101 BX_WRITE_XMM_REG(i->nnn(), op1);
2102 #else
2103 BX_INFO(("PINSRW_VdqEdIb: required SSE2, use --enable-sse option"));
2104 UndefinedOpcode(i);
2105 #endif
2108 /* 66 0F C5 */
2109 void BX_CPU_C::PEXTRW_GdUdqIb(bxInstruction_c *i)
2111 #if BX_SUPPORT_SSE >= 2
2112 BX_CPU_THIS_PTR prepareSSE();
2114 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
2115 Bit8u count = i->Ib() & 0x7;
2116 Bit32u result = (Bit32u) op.xmm16u(count);
2118 BX_WRITE_32BIT_REGZ(i->nnn(), result);
2119 #else
2120 BX_INFO(("PEXTRW_GdUdqIb: required SSE2, use --enable-sse option"));
2121 UndefinedOpcode(i);
2122 #endif
2125 /* 0F C6 */
2126 void BX_CPU_C::SHUFPS_VpsWpsIb(bxInstruction_c *i)
2128 #if BX_SUPPORT_SSE >= 1
2129 BX_CPU_THIS_PTR prepareSSE();
2131 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2132 Bit8u order = i->Ib();
2134 /* op2 is a register or memory reference */
2135 if (i->modC0()) {
2136 op2 = BX_READ_XMM_REG(i->rm());
2138 else {
2139 /* pointer, segment address pair */
2140 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2143 result.xmm32u(0) = op1.xmm32u((order >> 0) & 0x3);
2144 result.xmm32u(1) = op1.xmm32u((order >> 2) & 0x3);
2145 result.xmm32u(2) = op2.xmm32u((order >> 4) & 0x3);
2146 result.xmm32u(3) = op2.xmm32u((order >> 6) & 0x3);
2148 /* now write result back to destination */
2149 BX_WRITE_XMM_REG(i->nnn(), result);
2150 #else
2151 BX_INFO(("SHUFPS_VpsWpsIb: required SSE, use --enable-sse option"));
2152 UndefinedOpcode(i);
2153 #endif
2156 /* 66 0F C6 */
2157 void BX_CPU_C::SHUFPD_VpdWpdIb(bxInstruction_c *i)
2159 #if BX_SUPPORT_SSE >= 2
2160 BX_CPU_THIS_PTR prepareSSE();
2162 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2163 Bit8u order = i->Ib();
2165 /* op2 is a register or memory reference */
2166 if (i->modC0()) {
2167 op2 = BX_READ_XMM_REG(i->rm());
2169 else {
2170 /* pointer, segment address pair */
2171 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2174 result.xmm64u(0) = op1.xmm64u((order >> 0) & 0x1);
2175 result.xmm64u(1) = op2.xmm64u((order >> 1) & 0x1);
2177 /* now write result back to destination */
2178 BX_WRITE_XMM_REG(i->nnn(), result);
2179 #else
2180 BX_INFO(("SHUFPD_VpdWpdIb: required SSE2, use --enable-sse option"));
2181 UndefinedOpcode(i);
2182 #endif
2185 /* 66 0F D1 */
2186 void BX_CPU_C::PSRLW_VdqWdq(bxInstruction_c *i)
2188 #if BX_SUPPORT_SSE >= 2
2189 BX_CPU_THIS_PTR prepareSSE();
2191 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2193 /* op2 is a register or memory reference */
2194 if (i->modC0()) {
2195 op2 = BX_READ_XMM_REG(i->rm());
2197 else {
2198 /* pointer, segment address pair */
2199 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2202 if(op2.xmm64u(0) > 15) /* looking only to low 64 bits */
2204 op1.xmm64u(0) = 0;
2205 op1.xmm64u(1) = 0;
2207 else
2209 Bit8u shift = op2.xmmubyte(0);
2211 op1.xmm16u(0) >>= shift;
2212 op1.xmm16u(1) >>= shift;
2213 op1.xmm16u(2) >>= shift;
2214 op1.xmm16u(3) >>= shift;
2215 op1.xmm16u(4) >>= shift;
2216 op1.xmm16u(5) >>= shift;
2217 op1.xmm16u(6) >>= shift;
2218 op1.xmm16u(7) >>= shift;
2221 /* now write result back to destination */
2222 BX_WRITE_XMM_REG(i->nnn(), op1);
2223 #else
2224 BX_INFO(("PSRLW_VdqWdq: required SSE2, use --enable-sse option"));
2225 UndefinedOpcode(i);
2226 #endif
2229 /* 66 0F D2 */
2230 void BX_CPU_C::PSRLD_VdqWdq(bxInstruction_c *i)
2232 #if BX_SUPPORT_SSE >= 2
2233 BX_CPU_THIS_PTR prepareSSE();
2235 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2237 /* op2 is a register or memory reference */
2238 if (i->modC0()) {
2239 op2 = BX_READ_XMM_REG(i->rm());
2241 else {
2242 /* pointer, segment address pair */
2243 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2246 if(op2.xmm64u(0) > 31) /* looking only to low 64 bits */
2248 op1.xmm64u(0) = 0;
2249 op1.xmm64u(1) = 0;
2251 else
2253 Bit8u shift = op2.xmmubyte(0);
2255 op1.xmm32u(0) >>= shift;
2256 op1.xmm32u(1) >>= shift;
2257 op1.xmm32u(2) >>= shift;
2258 op1.xmm32u(3) >>= shift;
2261 /* now write result back to destination */
2262 BX_WRITE_XMM_REG(i->nnn(), op1);
2263 #else
2264 BX_INFO(("PSRLD_VdqWdq: required SSE2, use --enable-sse option"));
2265 UndefinedOpcode(i);
2266 #endif
2269 /* 66 0F D3 */
2270 void BX_CPU_C::PSRLQ_VdqWdq(bxInstruction_c *i)
2272 #if BX_SUPPORT_SSE >= 2
2273 BX_CPU_THIS_PTR prepareSSE();
2275 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2277 /* op2 is a register or memory reference */
2278 if (i->modC0()) {
2279 op2 = BX_READ_XMM_REG(i->rm());
2281 else {
2282 /* pointer, segment address pair */
2283 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2286 if(op2.xmm64u(0) > 63) /* looking only to low 64 bits */
2288 op1.xmm64u(0) = 0;
2289 op1.xmm64u(1) = 0;
2291 else
2293 Bit8u shift = op2.xmmubyte(0);
2295 op1.xmm64u(0) >>= shift;
2296 op1.xmm64u(1) >>= shift;
2299 /* now write result back to destination */
2300 BX_WRITE_XMM_REG(i->nnn(), op1);
2301 #else
2302 BX_INFO(("PSRLQ_VdqWdq: required SSE2, use --enable-sse option"));
2303 UndefinedOpcode(i);
2304 #endif
2307 /* 66 0F D4 */
2308 void BX_CPU_C::PADDQ_VdqWdq(bxInstruction_c *i)
2310 #if BX_SUPPORT_SSE >= 2
2311 BX_CPU_THIS_PTR prepareSSE();
2313 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2315 /* op2 is a register or memory reference */
2316 if (i->modC0()) {
2317 op2 = BX_READ_XMM_REG(i->rm());
2319 else {
2320 /* pointer, segment address pair */
2321 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2324 op1.xmm64u(0) += op2.xmm64u(0);
2325 op1.xmm64u(1) += op2.xmm64u(1);
2327 /* now write result back to destination */
2328 BX_WRITE_XMM_REG(i->nnn(), op1);
2329 #else
2330 BX_INFO(("PADDQ_VdqWdq: required SSE2, use --enable-sse option"));
2331 UndefinedOpcode(i);
2332 #endif
2335 /* 66 0F D5 */
2336 void BX_CPU_C::PMULLW_VdqWdq(bxInstruction_c *i)
2338 #if BX_SUPPORT_SSE >= 2
2339 BX_CPU_THIS_PTR prepareSSE();
2341 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2343 /* op2 is a register or memory reference */
2344 if (i->modC0()) {
2345 op2 = BX_READ_XMM_REG(i->rm());
2347 else {
2348 /* pointer, segment address pair */
2349 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2352 Bit32u product1 = Bit32u(op1.xmm16u(0)) * Bit32u(op2.xmm16u(0));
2353 Bit32u product2 = Bit32u(op1.xmm16u(1)) * Bit32u(op2.xmm16u(1));
2354 Bit32u product3 = Bit32u(op1.xmm16u(2)) * Bit32u(op2.xmm16u(2));
2355 Bit32u product4 = Bit32u(op1.xmm16u(3)) * Bit32u(op2.xmm16u(3));
2356 Bit32u product5 = Bit32u(op1.xmm16u(4)) * Bit32u(op2.xmm16u(4));
2357 Bit32u product6 = Bit32u(op1.xmm16u(5)) * Bit32u(op2.xmm16u(5));
2358 Bit32u product7 = Bit32u(op1.xmm16u(6)) * Bit32u(op2.xmm16u(6));
2359 Bit32u product8 = Bit32u(op1.xmm16u(7)) * Bit32u(op2.xmm16u(7));
2361 result.xmm16u(0) = product1 & 0xffff;
2362 result.xmm16u(1) = product2 & 0xffff;
2363 result.xmm16u(2) = product3 & 0xffff;
2364 result.xmm16u(3) = product4 & 0xffff;
2365 result.xmm16u(4) = product5 & 0xffff;
2366 result.xmm16u(5) = product6 & 0xffff;
2367 result.xmm16u(6) = product7 & 0xffff;
2368 result.xmm16u(7) = product8 & 0xffff;
2370 /* now write result back to destination */
2371 BX_WRITE_XMM_REG(i->nnn(), result);
2372 #else
2373 BX_INFO(("PMULLW_VdqWdq: required SSE2, use --enable-sse option"));
2374 UndefinedOpcode(i);
2375 #endif
2378 /* 66 0F D8 */
2379 void BX_CPU_C::PSUBUSB_VdqWdq(bxInstruction_c *i)
2381 #if BX_SUPPORT_SSE >= 2
2382 BX_CPU_THIS_PTR prepareSSE();
2384 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2386 /* op2 is a register or memory reference */
2387 if (i->modC0()) {
2388 op2 = BX_READ_XMM_REG(i->rm());
2390 else {
2391 /* pointer, segment address pair */
2392 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2395 result.xmm64u(0) = result.xmm64u(1) = 0;
2397 for(unsigned j=0; j<16; j++)
2399 if(op1.xmmubyte(j) > op2.xmmubyte(j))
2401 result.xmmubyte(j) = op1.xmmubyte(j) - op2.xmmubyte(j);
2405 /* now write result back to destination */
2406 BX_WRITE_XMM_REG(i->nnn(), result);
2407 #else
2408 BX_INFO(("PSUBUSB_VdqWdq: required SSE2, use --enable-sse option"));
2409 UndefinedOpcode(i);
2410 #endif
2413 /* 66 0F D9 */
2414 void BX_CPU_C::PSUBUSW_VdqWdq(bxInstruction_c *i)
2416 #if BX_SUPPORT_SSE >= 2
2417 BX_CPU_THIS_PTR prepareSSE();
2419 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2421 /* op2 is a register or memory reference */
2422 if (i->modC0()) {
2423 op2 = BX_READ_XMM_REG(i->rm());
2425 else {
2426 /* pointer, segment address pair */
2427 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2430 result.xmm64u(0) = result.xmm64u(1) = 0;
2432 for(unsigned j=0; j<8; j++)
2434 if(op1.xmm16u(j) > op2.xmm16u(j))
2436 result.xmm16u(j) = op1.xmm16u(j) - op2.xmm16u(j);
2440 /* now write result back to destination */
2441 BX_WRITE_XMM_REG(i->nnn(), result);
2442 #else
2443 BX_INFO(("PSUBUSW_VdqWdq: required SSE2, use --enable-sse option"));
2444 UndefinedOpcode(i);
2445 #endif
2448 /* 66 0F DA */
2449 void BX_CPU_C::PMINUB_VdqWdq(bxInstruction_c *i)
2451 #if BX_SUPPORT_SSE >= 2
2452 BX_CPU_THIS_PTR prepareSSE();
2454 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2456 /* op2 is a register or memory reference */
2457 if (i->modC0()) {
2458 op2 = BX_READ_XMM_REG(i->rm());
2460 else {
2461 /* pointer, segment address pair */
2462 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2465 for(unsigned j=0; j<16; j++) {
2466 if(op2.xmmubyte(j) < op1.xmmubyte(j)) op1.xmmubyte(j) = op2.xmmubyte(j);
2469 /* now write result back to destination */
2470 BX_WRITE_XMM_REG(i->nnn(), op1);
2471 #else
2472 BX_INFO(("PMINUB_VdqWdq: required SSE2, use --enable-sse option"));
2473 UndefinedOpcode(i);
2474 #endif
2477 /* ANDPS: 0F 54 */
2478 /* ANDPD: 66 0F 54 */
2479 /* PAND: 66 0F DB */
2480 void BX_CPU_C::ANDPS_VpsWps(bxInstruction_c *i)
2482 #if BX_SUPPORT_SSE >= 1
2483 BX_CPU_THIS_PTR prepareSSE();
2485 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2487 /* op2 is a register or memory reference */
2488 if (i->modC0()) {
2489 op2 = BX_READ_XMM_REG(i->rm());
2491 else {
2492 /* pointer, segment address pair */
2493 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2496 op1.xmm64u(0) &= op2.xmm64u(0);
2497 op1.xmm64u(1) &= op2.xmm64u(1);
2499 /* now write result back to destination */
2500 BX_WRITE_XMM_REG(i->nnn(), op1);
2501 #else
2502 BX_INFO(("ANDPS_VpsWps: required SSE, use --enable-sse option"));
2503 UndefinedOpcode(i);
2504 #endif
2507 /* 66 0F DC */
2508 void BX_CPU_C::PADDUSB_VdqWdq(bxInstruction_c *i)
2510 #if BX_SUPPORT_SSE >= 2
2511 BX_CPU_THIS_PTR prepareSSE();
2513 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2515 /* op2 is a register or memory reference */
2516 if (i->modC0()) {
2517 op2 = BX_READ_XMM_REG(i->rm());
2519 else {
2520 /* pointer, segment address pair */
2521 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2524 for(unsigned j=0; j<16; j++) {
2525 result.xmmubyte(j) = SaturateWordSToByteU(Bit16s(op1.xmmubyte(j)) + Bit16s(op2.xmmubyte(j)));
2528 /* now write result back to destination */
2529 BX_WRITE_XMM_REG(i->nnn(), result);
2530 #else
2531 BX_INFO(("PADDUSB_VdqWdq: required SSE2, use --enable-sse option"));
2532 UndefinedOpcode(i);
2533 #endif
2536 /* 66 0F DD */
2537 void BX_CPU_C::PADDUSW_VdqWdq(bxInstruction_c *i)
2539 #if BX_SUPPORT_SSE >= 2
2540 BX_CPU_THIS_PTR prepareSSE();
2542 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2544 /* op2 is a register or memory reference */
2545 if (i->modC0()) {
2546 op2 = BX_READ_XMM_REG(i->rm());
2548 else {
2549 /* pointer, segment address pair */
2550 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2553 result.xmm16u(0) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(0)) + Bit32s(op2.xmm16u(0)));
2554 result.xmm16u(1) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(1)) + Bit32s(op2.xmm16u(1)));
2555 result.xmm16u(2) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(2)) + Bit32s(op2.xmm16u(2)));
2556 result.xmm16u(3) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(3)) + Bit32s(op2.xmm16u(3)));
2557 result.xmm16u(4) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(4)) + Bit32s(op2.xmm16u(4)));
2558 result.xmm16u(5) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(5)) + Bit32s(op2.xmm16u(5)));
2559 result.xmm16u(6) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(6)) + Bit32s(op2.xmm16u(6)));
2560 result.xmm16u(7) = SaturateDwordSToWordU(Bit32s(op1.xmm16u(7)) + Bit32s(op2.xmm16u(7)));
2562 /* now write result back to destination */
2563 BX_WRITE_XMM_REG(i->nnn(), result);
2564 #else
2565 BX_INFO(("PADDUSW_VdqWdq: required SSE2, use --enable-sse option"));
2566 UndefinedOpcode(i);
2567 #endif
2570 /* 66 0F DE */
2571 void BX_CPU_C::PMAXUB_VdqWdq(bxInstruction_c *i)
2573 #if BX_SUPPORT_SSE >= 2
2574 BX_CPU_THIS_PTR prepareSSE();
2576 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2578 /* op2 is a register or memory reference */
2579 if (i->modC0()) {
2580 op2 = BX_READ_XMM_REG(i->rm());
2582 else {
2583 /* pointer, segment address pair */
2584 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2587 for(unsigned j=0; j<16; j++) {
2588 if(op2.xmmubyte(j) > op1.xmmubyte(j)) op1.xmmubyte(j) = op2.xmmubyte(j);
2591 /* now write result back to destination */
2592 BX_WRITE_XMM_REG(i->nnn(), op1);
2593 #else
2594 BX_INFO(("PMAXUB_VdqWdq: required SSE2, use --enable-sse option"));
2595 UndefinedOpcode(i);
2596 #endif
2599 /* ANDNPS: 0F 55 */
2600 /* ANDNPD: 66 0F 55 */
2601 /* PANDN: 66 0F DF */
2602 void BX_CPU_C::ANDNPS_VpsWps(bxInstruction_c *i)
2604 #if BX_SUPPORT_SSE >= 1
2605 BX_CPU_THIS_PTR prepareSSE();
2607 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2609 /* op2 is a register or memory reference */
2610 if (i->modC0()) {
2611 op2 = BX_READ_XMM_REG(i->rm());
2613 else {
2614 /* pointer, segment address pair */
2615 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2618 op1.xmm64u(0) = ~(op1.xmm64u(0)) & op2.xmm64u(0);
2619 op1.xmm64u(1) = ~(op1.xmm64u(1)) & op2.xmm64u(1);
2621 /* now write result back to destination */
2622 BX_WRITE_XMM_REG(i->nnn(), op1);
2623 #else
2624 BX_INFO(("ANDNPS_VpsWps: required SSE, use --enable-sse option"));
2625 UndefinedOpcode(i);
2626 #endif
2629 /* 66 0F E0 */
2630 void BX_CPU_C::PAVGB_VdqWdq(bxInstruction_c *i)
2632 #if BX_SUPPORT_SSE
2633 BX_CPU_THIS_PTR prepareSSE();
2635 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2637 /* op2 is a register or memory reference */
2638 if (i->modC0()) {
2639 op2 = BX_READ_XMM_REG(i->rm());
2641 else {
2642 /* pointer, segment address pair */
2643 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2646 for(unsigned j=0; j<16; j++) {
2647 op1.xmmubyte(j) = (op1.xmmubyte(j) + op2.xmmubyte(j) + 1) >> 1;
2650 /* now write result back to destination */
2651 BX_WRITE_XMM_REG(i->nnn(), op1);
2652 #else
2653 BX_INFO(("PAVGB_VdqWdq: required SSE, use --enable-sse option"));
2654 UndefinedOpcode(i);
2655 #endif
2658 /* 66 0F E1 */
2659 void BX_CPU_C::PSRAW_VdqWdq(bxInstruction_c *i)
2661 #if BX_SUPPORT_SSE >= 2
2662 BX_CPU_THIS_PTR prepareSSE();
2664 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2666 /* op2 is a register or memory reference */
2667 if (i->modC0()) {
2668 op2 = BX_READ_XMM_REG(i->rm());
2670 else {
2671 /* pointer, segment address pair */
2672 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2675 if(op2.xmm64u(0) == 0) return;
2677 if(op2.xmm64u(0) > 15) /* looking only to low 64 bits */
2679 result.xmm16u(0) = (op1.xmm16u(0) & 0x8000) ? 0xffff : 0;
2680 result.xmm16u(1) = (op1.xmm16u(1) & 0x8000) ? 0xffff : 0;
2681 result.xmm16u(2) = (op1.xmm16u(2) & 0x8000) ? 0xffff : 0;
2682 result.xmm16u(3) = (op1.xmm16u(3) & 0x8000) ? 0xffff : 0;
2683 result.xmm16u(4) = (op1.xmm16u(4) & 0x8000) ? 0xffff : 0;
2684 result.xmm16u(5) = (op1.xmm16u(5) & 0x8000) ? 0xffff : 0;
2685 result.xmm16u(6) = (op1.xmm16u(6) & 0x8000) ? 0xffff : 0;
2686 result.xmm16u(7) = (op1.xmm16u(7) & 0x8000) ? 0xffff : 0;
2688 else
2690 Bit8u shift = op2.xmmubyte(0);
2692 result.xmm16u(0) = op1.xmm16u(0) >> shift;
2693 result.xmm16u(1) = op1.xmm16u(1) >> shift;
2694 result.xmm16u(2) = op1.xmm16u(2) >> shift;
2695 result.xmm16u(3) = op1.xmm16u(3) >> shift;
2696 result.xmm16u(4) = op1.xmm16u(4) >> shift;
2697 result.xmm16u(5) = op1.xmm16u(5) >> shift;
2698 result.xmm16u(6) = op1.xmm16u(6) >> shift;
2699 result.xmm16u(7) = op1.xmm16u(7) >> shift;
2701 if(op1.xmm16u(0) & 0x8000) result.xmm16u(0) |= (0xffff << (16 - shift));
2702 if(op1.xmm16u(1) & 0x8000) result.xmm16u(1) |= (0xffff << (16 - shift));
2703 if(op1.xmm16u(2) & 0x8000) result.xmm16u(2) |= (0xffff << (16 - shift));
2704 if(op1.xmm16u(3) & 0x8000) result.xmm16u(3) |= (0xffff << (16 - shift));
2705 if(op1.xmm16u(4) & 0x8000) result.xmm16u(4) |= (0xffff << (16 - shift));
2706 if(op1.xmm16u(5) & 0x8000) result.xmm16u(5) |= (0xffff << (16 - shift));
2707 if(op1.xmm16u(6) & 0x8000) result.xmm16u(6) |= (0xffff << (16 - shift));
2708 if(op1.xmm16u(7) & 0x8000) result.xmm16u(7) |= (0xffff << (16 - shift));
2711 /* now write result back to destination */
2712 BX_WRITE_XMM_REG(i->nnn(), result);
2713 #else
2714 BX_INFO(("PSRAW_VdqWdq: required SSE2, use --enable-sse option"));
2715 UndefinedOpcode(i);
2716 #endif
2719 /* 66 0F E2 */
2720 void BX_CPU_C::PSRAD_VdqWdq(bxInstruction_c *i)
2722 #if BX_SUPPORT_SSE >= 2
2723 BX_CPU_THIS_PTR prepareSSE();
2725 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2727 /* op2 is a register or memory reference */
2728 if (i->modC0()) {
2729 op2 = BX_READ_XMM_REG(i->rm());
2731 else {
2732 /* pointer, segment address pair */
2733 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2736 if(op2.xmm64u(0) == 0) return;
2738 if(op2.xmm64u(0) > 31) /* looking only to low 64 bits */
2740 result.xmm32u(0) = (op1.xmm32u(0) & 0x80000000) ? 0xffffffff : 0;
2741 result.xmm32u(1) = (op1.xmm32u(1) & 0x80000000) ? 0xffffffff : 0;
2742 result.xmm32u(2) = (op1.xmm32u(2) & 0x80000000) ? 0xffffffff : 0;
2743 result.xmm32u(3) = (op1.xmm32u(3) & 0x80000000) ? 0xffffffff : 0;
2745 else
2747 Bit8u shift = op2.xmmubyte(0);
2749 result.xmm32u(0) = op1.xmm32u(0) >> shift;
2750 result.xmm32u(1) = op1.xmm32u(1) >> shift;
2751 result.xmm32u(2) = op1.xmm32u(2) >> shift;
2752 result.xmm32u(3) = op1.xmm32u(3) >> shift;
2754 if(op1.xmm32u(0) & 0x80000000) result.xmm32u(0) |= (0xffffffff << (32-shift));
2755 if(op1.xmm32u(1) & 0x80000000) result.xmm32u(1) |= (0xffffffff << (32-shift));
2756 if(op1.xmm32u(2) & 0x80000000) result.xmm32u(2) |= (0xffffffff << (32-shift));
2757 if(op1.xmm32u(3) & 0x80000000) result.xmm32u(3) |= (0xffffffff << (32-shift));
2760 /* now write result back to destination */
2761 BX_WRITE_XMM_REG(i->nnn(), result);
2762 #else
2763 BX_INFO(("PSRAD_VdqWdq: required SSE2, use --enable-sse option"));
2764 UndefinedOpcode(i);
2765 #endif
2768 /* 66 0F E3 */
2769 void BX_CPU_C::PAVGW_VdqWdq(bxInstruction_c *i)
2771 #if BX_SUPPORT_SSE
2772 BX_CPU_THIS_PTR prepareSSE();
2774 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2776 /* op2 is a register or memory reference */
2777 if (i->modC0()) {
2778 op2 = BX_READ_XMM_REG(i->rm());
2780 else {
2781 /* pointer, segment address pair */
2782 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2785 op1.xmm16u(0) = (op1.xmm16u(0) + op2.xmm16u(0) + 1) >> 1;
2786 op1.xmm16u(1) = (op1.xmm16u(1) + op2.xmm16u(1) + 1) >> 1;
2787 op1.xmm16u(2) = (op1.xmm16u(2) + op2.xmm16u(2) + 1) >> 1;
2788 op1.xmm16u(3) = (op1.xmm16u(3) + op2.xmm16u(3) + 1) >> 1;
2789 op1.xmm16u(4) = (op1.xmm16u(4) + op2.xmm16u(4) + 1) >> 1;
2790 op1.xmm16u(5) = (op1.xmm16u(5) + op2.xmm16u(5) + 1) >> 1;
2791 op1.xmm16u(6) = (op1.xmm16u(6) + op2.xmm16u(6) + 1) >> 1;
2792 op1.xmm16u(7) = (op1.xmm16u(7) + op2.xmm16u(7) + 1) >> 1;
2794 /* now write result back to destination */
2795 BX_WRITE_XMM_REG(i->nnn(), op1);
2796 #else
2797 BX_INFO(("PAVGW_VdqWdq: required SSE, use --enable-sse option"));
2798 UndefinedOpcode(i);
2799 #endif
2802 /* 66 0F E4 */
2803 void BX_CPU_C::PMULHUW_VdqWdq(bxInstruction_c *i)
2805 #if BX_SUPPORT_SSE >= 2
2806 BX_CPU_THIS_PTR prepareSSE();
2808 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2810 /* op2 is a register or memory reference */
2811 if (i->modC0()) {
2812 op2 = BX_READ_XMM_REG(i->rm());
2814 else {
2815 /* pointer, segment address pair */
2816 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2819 Bit32u product1 = Bit32u(op1.xmm16u(0)) * Bit32u(op2.xmm16u(0));
2820 Bit32u product2 = Bit32u(op1.xmm16u(1)) * Bit32u(op2.xmm16u(1));
2821 Bit32u product3 = Bit32u(op1.xmm16u(2)) * Bit32u(op2.xmm16u(2));
2822 Bit32u product4 = Bit32u(op1.xmm16u(3)) * Bit32u(op2.xmm16u(3));
2823 Bit32u product5 = Bit32u(op1.xmm16u(4)) * Bit32u(op2.xmm16u(4));
2824 Bit32u product6 = Bit32u(op1.xmm16u(5)) * Bit32u(op2.xmm16u(5));
2825 Bit32u product7 = Bit32u(op1.xmm16u(6)) * Bit32u(op2.xmm16u(6));
2826 Bit32u product8 = Bit32u(op1.xmm16u(7)) * Bit32u(op2.xmm16u(7));
2828 result.xmm16u(0) = (Bit16u)(product1 >> 16);
2829 result.xmm16u(1) = (Bit16u)(product2 >> 16);
2830 result.xmm16u(2) = (Bit16u)(product3 >> 16);
2831 result.xmm16u(3) = (Bit16u)(product4 >> 16);
2832 result.xmm16u(4) = (Bit16u)(product5 >> 16);
2833 result.xmm16u(5) = (Bit16u)(product6 >> 16);
2834 result.xmm16u(6) = (Bit16u)(product7 >> 16);
2835 result.xmm16u(7) = (Bit16u)(product8 >> 16);
2837 /* now write result back to destination */
2838 BX_WRITE_XMM_REG(i->nnn(), result);
2839 #else
2840 BX_INFO(("PMULHUW_VdqWdq: required SSE2, use --enable-sse option"));
2841 UndefinedOpcode(i);
2842 #endif
2845 /* 66 0F E5 */
2846 void BX_CPU_C::PMULHW_VdqWdq(bxInstruction_c *i)
2848 #if BX_SUPPORT_SSE >= 2
2849 BX_CPU_THIS_PTR prepareSSE();
2851 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2853 /* op2 is a register or memory reference */
2854 if (i->modC0()) {
2855 op2 = BX_READ_XMM_REG(i->rm());
2857 else {
2858 /* pointer, segment address pair */
2859 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2862 Bit32s product1 = Bit32s(op1.xmm16s(0)) * Bit32s(op2.xmm16s(0));
2863 Bit32s product2 = Bit32s(op1.xmm16s(1)) * Bit32s(op2.xmm16s(1));
2864 Bit32s product3 = Bit32s(op1.xmm16s(2)) * Bit32s(op2.xmm16s(2));
2865 Bit32s product4 = Bit32s(op1.xmm16s(3)) * Bit32s(op2.xmm16s(3));
2866 Bit32s product5 = Bit32s(op1.xmm16s(4)) * Bit32s(op2.xmm16s(4));
2867 Bit32s product6 = Bit32s(op1.xmm16s(5)) * Bit32s(op2.xmm16s(5));
2868 Bit32s product7 = Bit32s(op1.xmm16s(6)) * Bit32s(op2.xmm16s(6));
2869 Bit32s product8 = Bit32s(op1.xmm16s(7)) * Bit32s(op2.xmm16s(7));
2871 result.xmm16u(0) = (Bit16u)(product1 >> 16);
2872 result.xmm16u(1) = (Bit16u)(product2 >> 16);
2873 result.xmm16u(2) = (Bit16u)(product3 >> 16);
2874 result.xmm16u(3) = (Bit16u)(product4 >> 16);
2875 result.xmm16u(4) = (Bit16u)(product5 >> 16);
2876 result.xmm16u(5) = (Bit16u)(product6 >> 16);
2877 result.xmm16u(6) = (Bit16u)(product7 >> 16);
2878 result.xmm16u(7) = (Bit16u)(product8 >> 16);
2880 /* now write result back to destination */
2881 BX_WRITE_XMM_REG(i->nnn(), result);
2882 #else
2883 BX_INFO(("PMULHW_VdqWdq: required SSE2, use --enable-sse option"));
2884 UndefinedOpcode(i);
2885 #endif
2888 /* 66 0F E8 */
2889 void BX_CPU_C::PSUBSB_VdqWdq(bxInstruction_c *i)
2891 #if BX_SUPPORT_SSE >= 2
2892 BX_CPU_THIS_PTR prepareSSE();
2894 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2896 /* op2 is a register or memory reference */
2897 if (i->modC0()) {
2898 op2 = BX_READ_XMM_REG(i->rm());
2900 else {
2901 /* pointer, segment address pair */
2902 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2905 for(unsigned j=0; j<16; j++) {
2906 result.xmmsbyte(j) = SaturateWordSToByteS(Bit16s(op1.xmmsbyte(j)) - Bit16s(op2.xmmsbyte(j)));
2909 /* now write result back to destination */
2910 BX_WRITE_XMM_REG(i->nnn(), result);
2911 #else
2912 BX_INFO(("PSUBSB_VdqWdq: required SSE2, use --enable-sse option"));
2913 UndefinedOpcode(i);
2914 #endif
2917 /* 66 0F E9 */
2918 void BX_CPU_C::PSUBSW_VdqWdq(bxInstruction_c *i)
2920 #if BX_SUPPORT_SSE >= 2
2921 BX_CPU_THIS_PTR prepareSSE();
2923 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
2925 /* op2 is a register or memory reference */
2926 if (i->modC0()) {
2927 op2 = BX_READ_XMM_REG(i->rm());
2929 else {
2930 /* pointer, segment address pair */
2931 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2934 result.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) - Bit32s(op2.xmm16s(0)));
2935 result.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(1)) - Bit32s(op2.xmm16s(1)));
2936 result.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) - Bit32s(op2.xmm16s(2)));
2937 result.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(3)) - Bit32s(op2.xmm16s(3)));
2938 result.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) - Bit32s(op2.xmm16s(4)));
2939 result.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(5)) - Bit32s(op2.xmm16s(5)));
2940 result.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) - Bit32s(op2.xmm16s(6)));
2941 result.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(7)) - Bit32s(op2.xmm16s(7)));
2943 /* now write result back to destination */
2944 BX_WRITE_XMM_REG(i->nnn(), result);
2945 #else
2946 BX_INFO(("PSUBSW_VdqWdq: required SSE2, use --enable-sse option"));
2947 UndefinedOpcode(i);
2948 #endif
2951 /* 66 0F EA */
2952 void BX_CPU_C::PMINSW_VdqWdq(bxInstruction_c *i)
2954 #if BX_SUPPORT_SSE >= 2
2955 BX_CPU_THIS_PTR prepareSSE();
2957 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2959 /* op2 is a register or memory reference */
2960 if (i->modC0()) {
2961 op2 = BX_READ_XMM_REG(i->rm());
2963 else {
2964 /* pointer, segment address pair */
2965 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
2968 if(op2.xmm16s(0) < op1.xmm16s(0)) op1.xmm16s(0) = op2.xmm16s(0);
2969 if(op2.xmm16s(1) < op1.xmm16s(1)) op1.xmm16s(1) = op2.xmm16s(1);
2970 if(op2.xmm16s(2) < op1.xmm16s(2)) op1.xmm16s(2) = op2.xmm16s(2);
2971 if(op2.xmm16s(3) < op1.xmm16s(3)) op1.xmm16s(3) = op2.xmm16s(3);
2972 if(op2.xmm16s(4) < op1.xmm16s(4)) op1.xmm16s(4) = op2.xmm16s(4);
2973 if(op2.xmm16s(5) < op1.xmm16s(5)) op1.xmm16s(5) = op2.xmm16s(5);
2974 if(op2.xmm16s(6) < op1.xmm16s(6)) op1.xmm16s(6) = op2.xmm16s(6);
2975 if(op2.xmm16s(7) < op1.xmm16s(7)) op1.xmm16s(7) = op2.xmm16s(7);
2977 /* now write result back to destination */
2978 BX_WRITE_XMM_REG(i->nnn(), op1);
2979 #else
2980 BX_INFO(("PMINSW_VdqWdq: required SSE2, use --enable-sse option"));
2981 UndefinedOpcode(i);
2982 #endif
2985 /* ORPS: 0F 56 */
2986 /* ORPD: 66 0F 56 */
2987 /* POR: 66 0F EB */
2988 void BX_CPU_C::ORPS_VpsWps(bxInstruction_c *i)
2990 #if BX_SUPPORT_SSE >= 1
2991 BX_CPU_THIS_PTR prepareSSE();
2993 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2995 /* op2 is a register or memory reference */
2996 if (i->modC0()) {
2997 op2 = BX_READ_XMM_REG(i->rm());
2999 else {
3000 /* pointer, segment address pair */
3001 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3004 op1.xmm64u(0) |= op2.xmm64u(0);
3005 op1.xmm64u(1) |= op2.xmm64u(1);
3007 /* now write result back to destination */
3008 BX_WRITE_XMM_REG(i->nnn(), op1);
3009 #else
3010 BX_INFO(("ORPS_VpsWps: required SSE, use --enable-sse option"));
3011 UndefinedOpcode(i);
3012 #endif
3015 /* 66 0F EC */
3016 void BX_CPU_C::PADDSB_VdqWdq(bxInstruction_c *i)
3018 #if BX_SUPPORT_SSE >= 2
3019 BX_CPU_THIS_PTR prepareSSE();
3021 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
3023 /* op2 is a register or memory reference */
3024 if (i->modC0()) {
3025 op2 = BX_READ_XMM_REG(i->rm());
3027 else {
3028 /* pointer, segment address pair */
3029 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3032 for(unsigned j=0; j<16; j++) {
3033 result.xmmsbyte(j) = SaturateWordSToByteS(Bit16s(op1.xmmsbyte(j)) + Bit16s(op2.xmmsbyte(j)));
3036 /* now write result back to destination */
3037 BX_WRITE_XMM_REG(i->nnn(), result);
3038 #else
3039 BX_INFO(("PADDSB_VdqWdq: required SSE2, use --enable-sse option"));
3040 UndefinedOpcode(i);
3041 #endif
3044 /* 66 0F ED */
3045 void BX_CPU_C::PADDSW_VdqWdq(bxInstruction_c *i)
3047 #if BX_SUPPORT_SSE >= 2
3048 BX_CPU_THIS_PTR prepareSSE();
3050 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
3052 /* op2 is a register or memory reference */
3053 if (i->modC0()) {
3054 op2 = BX_READ_XMM_REG(i->rm());
3056 else {
3057 /* pointer, segment address pair */
3058 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3061 result.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) + Bit32s(op2.xmm16s(0)));
3062 result.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(1)) + Bit32s(op2.xmm16s(1)));
3063 result.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) + Bit32s(op2.xmm16s(2)));
3064 result.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(3)) + Bit32s(op2.xmm16s(3)));
3065 result.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) + Bit32s(op2.xmm16s(4)));
3066 result.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(5)) + Bit32s(op2.xmm16s(5)));
3067 result.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) + Bit32s(op2.xmm16s(6)));
3068 result.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(7)) + Bit32s(op2.xmm16s(7)));
3070 /* now write result back to destination */
3071 BX_WRITE_XMM_REG(i->nnn(), result);
3072 #else
3073 BX_INFO(("PADDSW_VdqWdq: required SSE2, use --enable-sse option"));
3074 UndefinedOpcode(i);
3075 #endif
3078 /* 66 0F EE */
3079 void BX_CPU_C::PMAXSW_VdqWdq(bxInstruction_c *i)
3081 #if BX_SUPPORT_SSE >= 2
3082 BX_CPU_THIS_PTR prepareSSE();
3084 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3086 /* op2 is a register or memory reference */
3087 if (i->modC0()) {
3088 op2 = BX_READ_XMM_REG(i->rm());
3090 else {
3091 /* pointer, segment address pair */
3092 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3095 if(op2.xmm16s(0) > op1.xmm16s(0)) op1.xmm16s(0) = op2.xmm16s(0);
3096 if(op2.xmm16s(1) > op1.xmm16s(1)) op1.xmm16s(1) = op2.xmm16s(1);
3097 if(op2.xmm16s(2) > op1.xmm16s(2)) op1.xmm16s(2) = op2.xmm16s(2);
3098 if(op2.xmm16s(3) > op1.xmm16s(3)) op1.xmm16s(3) = op2.xmm16s(3);
3099 if(op2.xmm16s(4) > op1.xmm16s(4)) op1.xmm16s(4) = op2.xmm16s(4);
3100 if(op2.xmm16s(5) > op1.xmm16s(5)) op1.xmm16s(5) = op2.xmm16s(5);
3101 if(op2.xmm16s(6) > op1.xmm16s(6)) op1.xmm16s(6) = op2.xmm16s(6);
3102 if(op2.xmm16s(7) > op1.xmm16s(7)) op1.xmm16s(7) = op2.xmm16s(7);
3104 /* now write result back to destination */
3105 BX_WRITE_XMM_REG(i->nnn(), op1);
3106 #else
3107 BX_INFO(("PMAXSW_VdqWdq: required SSE2, use --enable-sse option"));
3108 UndefinedOpcode(i);
3109 #endif
3112 /* XORPS: 0F 57 */
3113 /* XORPD: 66 0F 57 */
3114 /* PXOR: 66 0F EF */
3115 void BX_CPU_C::XORPS_VpsWps(bxInstruction_c *i)
3117 #if BX_SUPPORT_SSE >= 1
3118 BX_CPU_THIS_PTR prepareSSE();
3120 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3122 /* op2 is a register or memory reference */
3123 if (i->modC0()) {
3124 op2 = BX_READ_XMM_REG(i->rm());
3126 else {
3127 /* pointer, segment address pair */
3128 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3131 op1.xmm64u(0) ^= op2.xmm64u(0);
3132 op1.xmm64u(1) ^= op2.xmm64u(1);
3134 /* now write result back to destination */
3135 BX_WRITE_XMM_REG(i->nnn(), op1);
3136 #else
3137 BX_INFO(("XORPS_VpsWps: required SSE, use --enable-sse option"));
3138 UndefinedOpcode(i);
3139 #endif
3142 /* 66 0F F1 */
3143 void BX_CPU_C::PSLLW_VdqWdq(bxInstruction_c *i)
3145 #if BX_SUPPORT_SSE >= 2
3146 BX_CPU_THIS_PTR prepareSSE();
3148 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3150 /* op2 is a register or memory reference */
3151 if (i->modC0()) {
3152 op2 = BX_READ_XMM_REG(i->rm());
3154 else {
3155 /* pointer, segment address pair */
3156 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3159 if(op2.xmm64u(0) > 15) /* looking only to low 64 bits */
3161 op1.xmm64u(0) = 0;
3162 op1.xmm64u(1) = 0;
3164 else
3166 Bit8u shift = op2.xmmubyte(0);
3168 op1.xmm16u(0) <<= shift;
3169 op1.xmm16u(1) <<= shift;
3170 op1.xmm16u(2) <<= shift;
3171 op1.xmm16u(3) <<= shift;
3172 op1.xmm16u(4) <<= shift;
3173 op1.xmm16u(5) <<= shift;
3174 op1.xmm16u(6) <<= shift;
3175 op1.xmm16u(7) <<= shift;
3178 /* now write result back to destination */
3179 BX_WRITE_XMM_REG(i->nnn(), op1);
3180 #else
3181 BX_INFO(("PSLLW_VdqWdq: required SSE2, use --enable-sse option"));
3182 UndefinedOpcode(i);
3183 #endif
3186 /* 66 0F F2 */
3187 void BX_CPU_C::PSLLD_VdqWdq(bxInstruction_c *i)
3189 #if BX_SUPPORT_SSE >= 2
3190 BX_CPU_THIS_PTR prepareSSE();
3192 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3194 /* op2 is a register or memory reference */
3195 if (i->modC0()) {
3196 op2 = BX_READ_XMM_REG(i->rm());
3198 else {
3199 /* pointer, segment address pair */
3200 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3203 if(op2.xmm64u(0) > 31) /* looking only to low 64 bits */
3205 op1.xmm64u(0) = 0;
3206 op1.xmm64u(1) = 0;
3208 else
3210 Bit8u shift = op2.xmmubyte(0);
3212 op1.xmm32u(0) <<= shift;
3213 op1.xmm32u(1) <<= shift;
3214 op1.xmm32u(2) <<= shift;
3215 op1.xmm32u(3) <<= shift;
3218 /* now write result back to destination */
3219 BX_WRITE_XMM_REG(i->nnn(), op1);
3220 #else
3221 BX_INFO(("PSLLD_VdqWdq: required SSE2, use --enable-sse option"));
3222 UndefinedOpcode(i);
3223 #endif
3226 /* 66 0F F3 */
3227 void BX_CPU_C::PSLLQ_VdqWdq(bxInstruction_c *i)
3229 #if BX_SUPPORT_SSE >= 2
3230 BX_CPU_THIS_PTR prepareSSE();
3232 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3234 /* op2 is a register or memory reference */
3235 if (i->modC0()) {
3236 op2 = BX_READ_XMM_REG(i->rm());
3238 else {
3239 /* pointer, segment address pair */
3240 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3243 if(op2.xmm64u(0) > 63) /* looking only to low 64 bits */
3245 op1.xmm64u(0) = 0;
3246 op1.xmm64u(1) = 0;
3248 else
3250 Bit8u shift = op2.xmmubyte(0);
3252 op1.xmm64u(0) <<= shift;
3253 op1.xmm64u(1) <<= shift;
3256 /* now write result back to destination */
3257 BX_WRITE_XMM_REG(i->nnn(), op1);
3258 #else
3259 BX_INFO(("PSLLQ_VdqWdq: required SSE2, use --enable-sse option"));
3260 UndefinedOpcode(i);
3261 #endif
3264 /* 66 0F F4 */
3265 void BX_CPU_C::PMULUDQ_VdqWdq(bxInstruction_c *i)
3267 #if BX_SUPPORT_SSE >= 2
3268 BX_CPU_THIS_PTR prepareSSE();
3270 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
3272 /* op2 is a register or memory reference */
3273 if (i->modC0()) {
3274 op2 = BX_READ_XMM_REG(i->rm());
3276 else {
3277 /* pointer, segment address pair */
3278 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3281 result.xmm64u(0) = Bit64u(op1.xmm32u(0)) * Bit64u(op2.xmm32u(0));
3282 result.xmm64u(1) = Bit64u(op1.xmm32u(2)) * Bit64u(op2.xmm32u(2));
3284 /* now write result back to destination */
3285 BX_WRITE_XMM_REG(i->nnn(), result);
3286 #else
3287 BX_INFO(("PMULUDQ_VdqWdq: required SSE2, use --enable-sse option"));
3288 UndefinedOpcode(i);
3289 #endif
3292 /* 66 0F F5 */
3293 void BX_CPU_C::PMADDWD_VdqWdq(bxInstruction_c *i)
3295 #if BX_SUPPORT_SSE >= 2
3296 BX_CPU_THIS_PTR prepareSSE();
3298 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
3300 /* op2 is a register or memory reference */
3301 if (i->modC0()) {
3302 op2 = BX_READ_XMM_REG(i->rm());
3304 else {
3305 /* pointer, segment address pair */
3306 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3309 for(unsigned j=0; j<4; j++)
3311 if(op1.xmm32u(j) == 0x80008000 && op2.xmm32u(j) == 0x80008000) {
3312 result.xmm32u(j) = 0x80000000;
3314 else {
3315 result.xmm32u(j) =
3316 Bit32s(op1.xmm16s(2*j+0)) * Bit32s(op2.xmm16s(2*j+0)) +
3317 Bit32s(op1.xmm16s(2*j+1)) * Bit32s(op2.xmm16s(2*j+1));
3321 /* now write result back to destination */
3322 BX_WRITE_XMM_REG(i->nnn(), result);
3323 #else
3324 BX_INFO(("PMADDWD_VdqWdq: required SSE2, use --enable-sse option"));
3325 UndefinedOpcode(i);
3326 #endif
3329 /* 66 0F F6 */
3330 void BX_CPU_C::PSADBW_VdqWdq(bxInstruction_c *i)
3332 #if BX_SUPPORT_SSE >= 2
3333 BX_CPU_THIS_PTR prepareSSE();
3335 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3336 Bit16u temp1 = 0, temp2 = 0;
3338 /* op2 is a register or memory reference */
3339 if (i->modC0()) {
3340 op2 = BX_READ_XMM_REG(i->rm());
3342 else {
3343 /* pointer, segment address pair */
3344 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3347 temp1 += abs(op1.xmmubyte(0x0) - op2.xmmubyte(0x0));
3348 temp1 += abs(op1.xmmubyte(0x1) - op2.xmmubyte(0x1));
3349 temp1 += abs(op1.xmmubyte(0x2) - op2.xmmubyte(0x2));
3350 temp1 += abs(op1.xmmubyte(0x3) - op2.xmmubyte(0x3));
3351 temp1 += abs(op1.xmmubyte(0x4) - op2.xmmubyte(0x4));
3352 temp1 += abs(op1.xmmubyte(0x5) - op2.xmmubyte(0x5));
3353 temp1 += abs(op1.xmmubyte(0x6) - op2.xmmubyte(0x6));
3354 temp1 += abs(op1.xmmubyte(0x7) - op2.xmmubyte(0x7));
3356 temp2 += abs(op1.xmmubyte(0x8) - op2.xmmubyte(0x8));
3357 temp2 += abs(op1.xmmubyte(0x9) - op2.xmmubyte(0x9));
3358 temp2 += abs(op1.xmmubyte(0xA) - op2.xmmubyte(0xA));
3359 temp2 += abs(op1.xmmubyte(0xB) - op2.xmmubyte(0xB));
3360 temp2 += abs(op1.xmmubyte(0xC) - op2.xmmubyte(0xC));
3361 temp2 += abs(op1.xmmubyte(0xD) - op2.xmmubyte(0xD));
3362 temp2 += abs(op1.xmmubyte(0xE) - op2.xmmubyte(0xE));
3363 temp2 += abs(op1.xmmubyte(0xF) - op2.xmmubyte(0xF));
3365 op1.xmm64u(0) = Bit64u(temp1);
3366 op1.xmm64u(1) = Bit64u(temp2);
3368 /* now write result back to destination */
3369 BX_WRITE_XMM_REG(i->nnn(), op1);
3370 #else
3371 BX_INFO(("PSADBW_VdqWdq: required SSE2, use --enable-sse option"));
3372 UndefinedOpcode(i);
3373 #endif
3376 /* 66 0F F8 */
3377 void BX_CPU_C::PSUBB_VdqWdq(bxInstruction_c *i)
3379 #if BX_SUPPORT_SSE >= 2
3380 BX_CPU_THIS_PTR prepareSSE();
3382 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3384 /* op2 is a register or memory reference */
3385 if (i->modC0()) {
3386 op2 = BX_READ_XMM_REG(i->rm());
3388 else {
3389 /* pointer, segment address pair */
3390 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3393 for(unsigned j=0; j<16; j++) {
3394 op1.xmmubyte(j) -= op2.xmmubyte(j);
3397 /* now write result back to destination */
3398 BX_WRITE_XMM_REG(i->nnn(), op1);
3399 #else
3400 BX_INFO(("PSUBB_VdqWdq: required SSE2, use --enable-sse option"));
3401 UndefinedOpcode(i);
3402 #endif
3405 /* 66 0F F9 */
3406 void BX_CPU_C::PSUBW_VdqWdq(bxInstruction_c *i)
3408 #if BX_SUPPORT_SSE >= 2
3409 BX_CPU_THIS_PTR prepareSSE();
3411 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3413 /* op2 is a register or memory reference */
3414 if (i->modC0()) {
3415 op2 = BX_READ_XMM_REG(i->rm());
3417 else {
3418 /* pointer, segment address pair */
3419 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3422 op1.xmm16u(0) -= op2.xmm16u(0);
3423 op1.xmm16u(1) -= op2.xmm16u(1);
3424 op1.xmm16u(2) -= op2.xmm16u(2);
3425 op1.xmm16u(3) -= op2.xmm16u(3);
3426 op1.xmm16u(4) -= op2.xmm16u(4);
3427 op1.xmm16u(5) -= op2.xmm16u(5);
3428 op1.xmm16u(6) -= op2.xmm16u(6);
3429 op1.xmm16u(7) -= op2.xmm16u(7);
3431 /* now write result back to destination */
3432 BX_WRITE_XMM_REG(i->nnn(), op1);
3433 #else
3434 BX_INFO(("PSUBW_VdqWdq: required SSE2, use --enable-sse option"));
3435 UndefinedOpcode(i);
3436 #endif
3439 /* 66 0F FA */
3440 void BX_CPU_C::PSUBD_VdqWdq(bxInstruction_c *i)
3442 #if BX_SUPPORT_SSE >= 2
3443 BX_CPU_THIS_PTR prepareSSE();
3445 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3447 /* op2 is a register or memory reference */
3448 if (i->modC0()) {
3449 op2 = BX_READ_XMM_REG(i->rm());
3451 else {
3452 /* pointer, segment address pair */
3453 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3456 op1.xmm32u(0) -= op2.xmm32u(0);
3457 op1.xmm32u(1) -= op2.xmm32u(1);
3458 op1.xmm32u(2) -= op2.xmm32u(2);
3459 op1.xmm32u(3) -= op2.xmm32u(3);
3461 /* now write result back to destination */
3462 BX_WRITE_XMM_REG(i->nnn(), op1);
3463 #else
3464 BX_INFO(("PSUBD_VdqWdq: required SSE2, use --enable-sse option"));
3465 UndefinedOpcode(i);
3466 #endif
3469 /* 66 0F FB */
3470 void BX_CPU_C::PSUBQ_VdqWdq(bxInstruction_c *i)
3472 #if BX_SUPPORT_SSE >= 2
3473 BX_CPU_THIS_PTR prepareSSE();
3475 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3477 /* op2 is a register or memory reference */
3478 if (i->modC0()) {
3479 op2 = BX_READ_XMM_REG(i->rm());
3481 else {
3482 /* pointer, segment address pair */
3483 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3486 op1.xmm64u(0) -= op2.xmm64u(0);
3487 op1.xmm64u(1) -= op2.xmm64u(1);
3489 /* now write result back to destination */
3490 BX_WRITE_XMM_REG(i->nnn(), op1);
3491 #else
3492 BX_INFO(("PSUBQ_VdqWdq: required SSE2, use --enable-sse option"));
3493 UndefinedOpcode(i);
3494 #endif
3497 /* 66 0F FC */
3498 void BX_CPU_C::PADDB_VdqWdq(bxInstruction_c *i)
3500 #if BX_SUPPORT_SSE >= 2
3501 BX_CPU_THIS_PTR prepareSSE();
3503 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3505 /* op2 is a register or memory reference */
3506 if (i->modC0()) {
3507 op2 = BX_READ_XMM_REG(i->rm());
3509 else {
3510 /* pointer, segment address pair */
3511 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3514 for(unsigned j=0; j<16; j++) {
3515 op1.xmmubyte(j) += op2.xmmubyte(j);
3518 /* now write result back to destination */
3519 BX_WRITE_XMM_REG(i->nnn(), op1);
3520 #else
3521 BX_INFO(("PADDB_VdqWdq: required SSE2, use --enable-sse option"));
3522 UndefinedOpcode(i);
3523 #endif
3526 /* 66 0F FD */
3527 void BX_CPU_C::PADDW_VdqWdq(bxInstruction_c *i)
3529 #if BX_SUPPORT_SSE >= 2
3530 BX_CPU_THIS_PTR prepareSSE();
3532 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3534 /* op2 is a register or memory reference */
3535 if (i->modC0()) {
3536 op2 = BX_READ_XMM_REG(i->rm());
3538 else {
3539 /* pointer, segment address pair */
3540 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3543 op1.xmm16u(0) += op2.xmm16u(0);
3544 op1.xmm16u(1) += op2.xmm16u(1);
3545 op1.xmm16u(2) += op2.xmm16u(2);
3546 op1.xmm16u(3) += op2.xmm16u(3);
3547 op1.xmm16u(4) += op2.xmm16u(4);
3548 op1.xmm16u(5) += op2.xmm16u(5);
3549 op1.xmm16u(6) += op2.xmm16u(6);
3550 op1.xmm16u(7) += op2.xmm16u(7);
3552 /* now write result back to destination */
3553 BX_WRITE_XMM_REG(i->nnn(), op1);
3554 #else
3555 BX_INFO(("PADDW_VdqWdq: required SSE2, use --enable-sse option"));
3556 UndefinedOpcode(i);
3557 #endif
3560 /* 66 0F FE */
3561 void BX_CPU_C::PADDD_VdqWdq(bxInstruction_c *i)
3563 #if BX_SUPPORT_SSE >= 2
3564 BX_CPU_THIS_PTR prepareSSE();
3566 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3568 /* op2 is a register or memory reference */
3569 if (i->modC0()) {
3570 op2 = BX_READ_XMM_REG(i->rm());
3572 else {
3573 /* pointer, segment address pair */
3574 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
3577 op1.xmm32u(0) += op2.xmm32u(0);
3578 op1.xmm32u(1) += op2.xmm32u(1);
3579 op1.xmm32u(2) += op2.xmm32u(2);
3580 op1.xmm32u(3) += op2.xmm32u(3);
3582 /* now write result back to destination */
3583 BX_WRITE_XMM_REG(i->nnn(), op1);
3584 #else
3585 BX_INFO(("PADDD_VdqWdq: required SSE2, use --enable-sse option"));
3586 UndefinedOpcode(i);
3587 #endif
3590 /* 66 0F 71 Grp12 010 */
3591 void BX_CPU_C::PSRLW_UdqIb(bxInstruction_c *i)
3593 #if BX_SUPPORT_SSE >= 2
3594 BX_CPU_THIS_PTR prepareSSE();
3596 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
3597 Bit8u shift = i->Ib();
3599 if(shift > 15) {
3600 op.xmm64u(0) = 0;
3601 op.xmm64u(1) = 0;
3603 else {
3604 op.xmm16u(0) >>= shift;
3605 op.xmm16u(1) >>= shift;
3606 op.xmm16u(2) >>= shift;
3607 op.xmm16u(3) >>= shift;
3608 op.xmm16u(4) >>= shift;
3609 op.xmm16u(5) >>= shift;
3610 op.xmm16u(6) >>= shift;
3611 op.xmm16u(7) >>= shift;
3614 /* now write result back to destination */
3615 BX_WRITE_XMM_REG(i->rm(), op);
3616 #else
3617 BX_INFO(("PSRLW_UdqIb: required SSE2, use --enable-sse option"));
3618 UndefinedOpcode(i);
3619 #endif
3622 /* 0F 71 Grp12 100 */
3623 void BX_CPU_C::PSRAW_UdqIb(bxInstruction_c *i)
3625 #if BX_SUPPORT_SSE >= 2
3626 BX_CPU_THIS_PTR prepareSSE();
3628 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm()), result;
3629 Bit8u shift = i->Ib();
3631 if(shift == 0) return;
3633 if(shift > 15) {
3634 result.xmm16u(0) = (op.xmm16u(0) & 0x8000) ? 0xffff : 0;
3635 result.xmm16u(1) = (op.xmm16u(1) & 0x8000) ? 0xffff : 0;
3636 result.xmm16u(2) = (op.xmm16u(2) & 0x8000) ? 0xffff : 0;
3637 result.xmm16u(3) = (op.xmm16u(3) & 0x8000) ? 0xffff : 0;
3638 result.xmm16u(4) = (op.xmm16u(4) & 0x8000) ? 0xffff : 0;
3639 result.xmm16u(5) = (op.xmm16u(5) & 0x8000) ? 0xffff : 0;
3640 result.xmm16u(6) = (op.xmm16u(6) & 0x8000) ? 0xffff : 0;
3641 result.xmm16u(7) = (op.xmm16u(7) & 0x8000) ? 0xffff : 0;
3643 else {
3644 result.xmm16u(0) = op.xmm16u(0) >> shift;
3645 result.xmm16u(1) = op.xmm16u(1) >> shift;
3646 result.xmm16u(2) = op.xmm16u(2) >> shift;
3647 result.xmm16u(3) = op.xmm16u(3) >> shift;
3648 result.xmm16u(4) = op.xmm16u(4) >> shift;
3649 result.xmm16u(5) = op.xmm16u(5) >> shift;
3650 result.xmm16u(6) = op.xmm16u(6) >> shift;
3651 result.xmm16u(7) = op.xmm16u(7) >> shift;
3653 if(op.xmm16u(0) & 0x8000) result.xmm16u(0) |= (0xffff << (16 - shift));
3654 if(op.xmm16u(1) & 0x8000) result.xmm16u(1) |= (0xffff << (16 - shift));
3655 if(op.xmm16u(2) & 0x8000) result.xmm16u(2) |= (0xffff << (16 - shift));
3656 if(op.xmm16u(3) & 0x8000) result.xmm16u(3) |= (0xffff << (16 - shift));
3657 if(op.xmm16u(4) & 0x8000) result.xmm16u(4) |= (0xffff << (16 - shift));
3658 if(op.xmm16u(5) & 0x8000) result.xmm16u(5) |= (0xffff << (16 - shift));
3659 if(op.xmm16u(6) & 0x8000) result.xmm16u(6) |= (0xffff << (16 - shift));
3660 if(op.xmm16u(7) & 0x8000) result.xmm16u(7) |= (0xffff << (16 - shift));
3663 /* now write result back to destination */
3664 BX_WRITE_XMM_REG(i->rm(), result);
3665 #else
3666 BX_INFO(("PSRAW_UdqIb: required SSE2, use --enable-sse option"));
3667 UndefinedOpcode(i);
3668 #endif
3671 /* 66 0F 71 Grp12 110 */
3672 void BX_CPU_C::PSLLW_UdqIb(bxInstruction_c *i)
3674 #if BX_SUPPORT_SSE >= 2
3675 BX_CPU_THIS_PTR prepareSSE();
3677 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
3678 Bit8u shift = i->Ib();
3680 if(shift > 15) {
3681 op.xmm64u(0) = 0;
3682 op.xmm64u(1) = 0;
3684 else {
3685 op.xmm16u(0) <<= shift;
3686 op.xmm16u(1) <<= shift;
3687 op.xmm16u(2) <<= shift;
3688 op.xmm16u(3) <<= shift;
3689 op.xmm16u(4) <<= shift;
3690 op.xmm16u(5) <<= shift;
3691 op.xmm16u(6) <<= shift;
3692 op.xmm16u(7) <<= shift;
3695 /* now write result back to destination */
3696 BX_WRITE_XMM_REG(i->rm(), op);
3697 #else
3698 BX_INFO(("PSLLW_UdqIb: required SSE2, use --enable-sse option"));
3699 UndefinedOpcode(i);
3700 #endif
3703 /* 66 0F 72 Grp13 010 */
3704 void BX_CPU_C::PSRLD_UdqIb(bxInstruction_c *i)
3706 #if BX_SUPPORT_SSE >= 2
3707 BX_CPU_THIS_PTR prepareSSE();
3709 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
3710 Bit8u shift = i->Ib();
3712 if(shift > 31) {
3713 op.xmm64u(0) = 0;
3714 op.xmm64u(1) = 0;
3716 else {
3717 op.xmm32u(0) >>= shift;
3718 op.xmm32u(1) >>= shift;
3719 op.xmm32u(2) >>= shift;
3720 op.xmm32u(3) >>= shift;
3723 /* now write result back to destination */
3724 BX_WRITE_XMM_REG(i->rm(), op);
3725 #else
3726 BX_INFO(("PSRLD_UdqIb: required SSE2, use --enable-sse option"));
3727 UndefinedOpcode(i);
3728 #endif
3731 /* 0F 72 Grp13 100 */
3732 void BX_CPU_C::PSRAD_UdqIb(bxInstruction_c *i)
3734 #if BX_SUPPORT_SSE >= 2
3735 BX_CPU_THIS_PTR prepareSSE();
3737 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm()), result;
3738 Bit8u shift = i->Ib();
3740 if(shift == 0) return;
3742 if(shift > 31) {
3743 result.xmm32u(0) = (op.xmm32u(0) & 0x80000000) ? 0xffffffff : 0;
3744 result.xmm32u(1) = (op.xmm32u(1) & 0x80000000) ? 0xffffffff : 0;
3745 result.xmm32u(2) = (op.xmm32u(2) & 0x80000000) ? 0xffffffff : 0;
3746 result.xmm32u(3) = (op.xmm32u(3) & 0x80000000) ? 0xffffffff : 0;
3748 else {
3749 result.xmm32u(0) = op.xmm32u(0) >> shift;
3750 result.xmm32u(1) = op.xmm32u(1) >> shift;
3751 result.xmm32u(2) = op.xmm32u(2) >> shift;
3752 result.xmm32u(3) = op.xmm32u(3) >> shift;
3754 if(op.xmm32u(0) & 0x80000000) result.xmm32u(0) |= (0xffffffff << (32-shift));
3755 if(op.xmm32u(1) & 0x80000000) result.xmm32u(1) |= (0xffffffff << (32-shift));
3756 if(op.xmm32u(2) & 0x80000000) result.xmm32u(2) |= (0xffffffff << (32-shift));
3757 if(op.xmm32u(3) & 0x80000000) result.xmm32u(3) |= (0xffffffff << (32-shift));
3760 /* now write result back to destination */
3761 BX_WRITE_XMM_REG(i->rm(), result);
3762 #else
3763 BX_INFO(("PSRAD_UdqIb: required SSE2, use --enable-sse option"));
3764 UndefinedOpcode(i);
3765 #endif
3768 /* 66 0F 72 Grp13 110 */
3769 void BX_CPU_C::PSLLD_UdqIb(bxInstruction_c *i)
3771 #if BX_SUPPORT_SSE >= 2
3772 BX_CPU_THIS_PTR prepareSSE();
3774 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
3775 Bit8u shift = i->Ib();
3777 if(shift > 31) {
3778 op.xmm64u(0) = 0;
3779 op.xmm64u(1) = 0;
3781 else {
3782 op.xmm32u(0) <<= shift;
3783 op.xmm32u(1) <<= shift;
3784 op.xmm32u(2) <<= shift;
3785 op.xmm32u(3) <<= shift;
3788 /* now write result back to destination */
3789 BX_WRITE_XMM_REG(i->rm(), op);
3790 #else
3791 BX_INFO(("PSLLD_UdqIb: required SSE2, use --enable-sse option"));
3792 UndefinedOpcode(i);
3793 #endif
3796 /* 66 0F 73 Grp14 010 */
3797 void BX_CPU_C::PSRLQ_UdqIb(bxInstruction_c *i)
3799 #if BX_SUPPORT_SSE >= 2
3800 BX_CPU_THIS_PTR prepareSSE();
3802 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
3803 Bit8u shift = i->Ib();
3805 if(shift > 63) {
3806 op.xmm64u(0) = 0;
3807 op.xmm64u(1) = 0;
3809 else {
3810 op.xmm64u(0) >>= shift;
3811 op.xmm64u(1) >>= shift;
3814 /* now write result back to destination */
3815 BX_WRITE_XMM_REG(i->rm(), op);
3816 #else
3817 BX_INFO(("PSRLQ_UdqIb: required SSE2, use --enable-sse option"));
3818 UndefinedOpcode(i);
3819 #endif
3822 /* 66 0F 73 Grp14 011 */
3823 void BX_CPU_C::PSRLDQ_UdqIb(bxInstruction_c *i)
3825 #if BX_SUPPORT_SSE >= 2
3826 BX_CPU_THIS_PTR prepareSSE();
3828 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm()), result;
3829 Bit8u shift = i->Ib();
3831 result.xmm64u(0) = result.xmm64u(1) = 0;
3833 for(unsigned j=shift; j<16; j++) {
3834 result.xmmubyte(j-shift) = op.xmmubyte(j);
3837 /* now write result back to destination */
3838 BX_WRITE_XMM_REG(i->rm(), result);
3839 #else
3840 BX_INFO(("PSRLDQ_UdqIb: required SSE2, use --enable-sse option"));
3841 UndefinedOpcode(i);
3842 #endif
3845 /* 66 0F 73 Grp14 110 */
3846 void BX_CPU_C::PSLLQ_UdqIb(bxInstruction_c *i)
3848 #if BX_SUPPORT_SSE >= 2
3849 BX_CPU_THIS_PTR prepareSSE();
3851 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
3852 Bit8u shift = i->Ib();
3854 if(shift > 63) {
3855 op.xmm64u(0) = 0;
3856 op.xmm64u(1) = 0;
3858 else {
3859 op.xmm64u(0) <<= shift;
3860 op.xmm64u(1) <<= shift;
3863 /* now write result back to destination */
3864 BX_WRITE_XMM_REG(i->rm(), op);
3865 #else
3866 BX_INFO(("PSLLQ_UdqIb: required SSE2, use --enable-sse option"));
3867 UndefinedOpcode(i);
3868 #endif
3871 /* 66 0F 73 Grp14 111 */
3872 void BX_CPU_C::PSLLDQ_UdqIb(bxInstruction_c *i)
3874 #if BX_SUPPORT_SSE >= 2
3875 BX_CPU_THIS_PTR prepareSSE();
3877 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm()), result;
3878 Bit8u shift = i->Ib();
3880 result.xmm64u(0) = result.xmm64u(1) = 0;
3882 for(unsigned j=shift; j<16; j++) {
3883 result.xmmubyte(j) = op.xmmubyte(j-shift);
3886 /* now write result back to destination */
3887 BX_WRITE_XMM_REG(i->rm(), result);
3888 #else
3889 BX_INFO(("PSLLDQ_UdqIb: required SSE2, use --enable-sse option"));
3890 UndefinedOpcode(i);
3891 #endif