configure/compile changes + small optimizations
[bochs-mirror.git] / cpu / sse_move.cc
blob4441231b5654ee661f4481d6884c1237051f9ec8
1 /////////////////////////////////////////////////////////////////////////
2 // $Id: sse_move.cc,v 1.68 2007/12/01 16:45:17 sshwarts Exp $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 // Copyright (c) 2003 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 /////////////////////////////////////////////////////////////////////////
25 #define NEED_CPU_REG_SHORTCUTS 1
26 #include "bochs.h"
27 #include "cpu.h"
28 #define LOG_THIS BX_CPU_THIS_PTR
31 #if BX_SUPPORT_SSE
33 void BX_CPU_C::prepareSSE(void)
35 if(BX_CPU_THIS_PTR cr0.get_TS())
36 exception(BX_NM_EXCEPTION, 0, 0);
38 if(BX_CPU_THIS_PTR cr0.get_EM())
39 exception(BX_UD_EXCEPTION, 0, 0);
41 if(! (BX_CPU_THIS_PTR cr4.get_OSFXSR()))
42 exception(BX_UD_EXCEPTION, 0, 0);
45 #define BX_MXCSR_REGISTER (BX_CPU_THIS_PTR mxcsr.mxcsr)
47 void BX_CPU_C::print_state_SSE(void)
49 BX_DEBUG(("MXCSR: 0x%08x\n", BX_MXCSR_REGISTER));
50 for(unsigned i=0;i<BX_XMM_REGISTERS;i++) {
51 BxPackedXmmRegister xmm = BX_READ_XMM_REG(i);
52 BX_DEBUG(("XMM%02u: %08x%08x:%08x%08x\n", i,
53 xmm.xmm32u(3), xmm.xmm32u(2), xmm.xmm32u(1), xmm.xmm32u(0)));
57 #endif
59 /* ************************************ */
60 /* SSE: SAVE/RESTORE FPU/MMX/SSEx STATE */
61 /* ************************************ */
63 /* 0F AE Grp15 010 */
64 void BX_CPU_C::LDMXCSR(bxInstruction_c *i)
66 #if BX_SUPPORT_SSE >= 1
67 BX_CPU_THIS_PTR prepareSSE();
69 Bit32u new_mxcsr;
71 read_virtual_dword(i->seg(), RMAddr(i), &new_mxcsr);
72 if(new_mxcsr & ~MXCSR_MASK)
73 exception(BX_GP_EXCEPTION, 0, 0);
75 BX_MXCSR_REGISTER = new_mxcsr;
76 #else
77 BX_INFO(("LDMXCSR: required SSE, use --enable-sse option"));
78 UndefinedOpcode(i);
79 #endif
82 /* 0F AE Grp15 011 */
83 void BX_CPU_C::STMXCSR(bxInstruction_c *i)
85 #if BX_SUPPORT_SSE >= 1
86 BX_CPU_THIS_PTR prepareSSE();
88 Bit32u mxcsr = BX_MXCSR_REGISTER & MXCSR_MASK;
89 write_virtual_dword(i->seg(), RMAddr(i), &mxcsr);
90 #else
91 BX_INFO(("STMXCSR: required SSE, use --enable-sse option"));
92 UndefinedOpcode(i);
93 #endif
96 /* 0F AE Grp15 000 */
97 void BX_CPU_C::FXSAVE(bxInstruction_c *i)
99 #if BX_CPU_LEVEL >= 6
100 unsigned index;
101 BxPackedXmmRegister xmm;
103 BX_DEBUG(("FXSAVE: save FPU/MMX/SSE state"));
105 #if BX_SUPPORT_MMX
106 if(BX_CPU_THIS_PTR cr0.get_TS())
107 exception(BX_NM_EXCEPTION, 0, 0);
109 if(BX_CPU_THIS_PTR cr0.get_EM())
110 exception(BX_UD_EXCEPTION, 0, 0);
111 #endif
113 xmm.xmm16u(0) = BX_CPU_THIS_PTR the_i387.get_control_word();
114 xmm.xmm16u(1) = BX_CPU_THIS_PTR the_i387.get_status_word ();
116 Bit16u twd = BX_CPU_THIS_PTR the_i387.get_tag_word(), tag_byte = 0;
118 if((twd & 0x0003) != 0x0003) tag_byte |= 0x01;
119 if((twd & 0x000c) != 0x000c) tag_byte |= 0x02;
120 if((twd & 0x0030) != 0x0030) tag_byte |= 0x04;
121 if((twd & 0x00c0) != 0x00c0) tag_byte |= 0x08;
122 if((twd & 0x0300) != 0x0300) tag_byte |= 0x10;
123 if((twd & 0x0c00) != 0x0c00) tag_byte |= 0x20;
124 if((twd & 0x3000) != 0x3000) tag_byte |= 0x40;
125 if((twd & 0xc000) != 0xc000) tag_byte |= 0x80;
127 xmm.xmm16u(2) = tag_byte;
129 /* x87 FPU Opcode (16 bits) */
130 /* The lower 11 bits contain the FPU opcode, upper 5 bits are reserved */
131 xmm.xmm16u(3) = BX_CPU_THIS_PTR the_i387.foo;
134 * x87 FPU IP Offset (32/64 bits)
135 * The contents of this field differ depending on the current
136 * addressing mode (16/32/64 bit) when the FXSAVE instruction was executed:
137 * + 64-bit mode - 64-bit IP offset
138 * + 32-bit mode - 32-bit IP offset
139 * + 16-bit mode - low 16 bits are IP offset; high 16 bits are reserved.
140 * x87 CS FPU IP Selector
141 * + 16 bit, in 16/32 bit mode only
143 #if BX_SUPPORT_X86_64
144 if (i->os64L()) /* 64 bit operand size mode */
146 xmm.xmm64u(1) = (BX_CPU_THIS_PTR the_i387.fip);
148 else
149 #endif
151 xmm.xmm32u(2) = (BX_CPU_THIS_PTR the_i387.fip) & 0xffffffff;
152 xmm.xmm32u(3) = (BX_CPU_THIS_PTR the_i387.fcs);
155 write_virtual_dqword_aligned(i->seg(), RMAddr(i), (Bit8u *) &xmm);
158 * x87 FPU Instruction Operand (Data) Pointer Offset (32/64 bits)
159 * The contents of this field differ depending on the current
160 * addressing mode (16/32 bit) when the FXSAVE instruction was executed:
161 * + 64-bit mode - 64-bit offset
162 * + 32-bit mode - 32-bit offset
163 * + 16-bit mode - low 16 bits are offset; high 16 bits are reserved.
164 * x87 DS FPU Instruction Operand (Data) Pointer Selector
165 * + 16 bit, in 16/32 bit mode only
167 #if BX_SUPPORT_X86_64
168 if (i->os64L()) /* 64 bit operand size mode */
170 xmm.xmm64u(0) = (BX_CPU_THIS_PTR the_i387.fdp);
172 else
173 #endif
175 xmm.xmm32u(0) = (BX_CPU_THIS_PTR the_i387.fdp) & 0xffffffff;
176 xmm.xmm32u(1) = (BX_CPU_THIS_PTR the_i387.fds);
179 #if BX_SUPPORT_SSE >= 1
180 xmm.xmm32u(2) = BX_MXCSR_REGISTER;
181 xmm.xmm32u(3) = MXCSR_MASK;
182 #else
183 xmm.xmm32u(2) = 0;
184 xmm.xmm32u(3) = 0;
185 #endif
187 write_virtual_dqword_aligned(i->seg(), RMAddr(i) + 16, (Bit8u *) &xmm);
189 /* store i387 register file */
190 for(index=0; index < 8; index++)
192 const floatx80 &fp = BX_FPU_REG(index);
194 xmm.xmm64u(0) = fp.fraction;
195 xmm.xmm64u(1) = 0;
196 xmm.xmm16u(4) = fp.exp;
198 write_virtual_dqword_aligned(i->seg(), RMAddr(i)+index*16+32, (Bit8u *) &xmm);
201 #if BX_SUPPORT_X86_64
202 if (BX_CPU_THIS_PTR efer.ffxsr && CPL == 0 && Is64BitMode())
203 return; // skip saving of the XMM state
204 #endif
206 #if BX_SUPPORT_SSE >= 1
207 /* store XMM register file */
208 for(index=0; index < BX_XMM_REGISTERS; index++)
210 // save XMM8-XMM15 only in 64-bit mode
211 if (index < 8 || Is64BitMode()) {
212 write_virtual_dqword_aligned(i->seg(),
213 RMAddr(i)+index*16+160, (Bit8u *) &(BX_CPU_THIS_PTR xmm[index]));
216 #endif
218 /* do not touch reserved fields */
219 #else
220 BX_INFO(("FXSAVE: required P6 support, use --enable-cpu-level=6 option"));
221 UndefinedOpcode(i);
222 #endif
225 /* 0F AE Grp15 001 */
226 void BX_CPU_C::FXRSTOR(bxInstruction_c *i)
228 #if BX_CPU_LEVEL >= 6
229 BxPackedXmmRegister xmm;
230 int index;
232 BX_DEBUG(("FXRSTOR: restore FPU/MMX/SSE state"));
234 #if BX_SUPPORT_MMX
235 if(BX_CPU_THIS_PTR cr0.get_TS())
236 exception(BX_NM_EXCEPTION, 0, 0);
238 if(BX_CPU_THIS_PTR cr0.get_EM())
239 exception(BX_UD_EXCEPTION, 0, 0);
240 #endif
242 read_virtual_dqword_aligned(i->seg(), RMAddr(i), (Bit8u *) &xmm);
244 BX_CPU_THIS_PTR the_i387.cwd = xmm.xmm16u(0);
245 BX_CPU_THIS_PTR the_i387.swd = xmm.xmm16u(1);
246 BX_CPU_THIS_PTR the_i387.tos = (xmm.xmm16u(1) >> 11) & 0x07;
248 /* Restore x87 FPU Opcode */
249 /* The lower 11 bits contain the FPU opcode, upper 5 bits are reserved */
250 BX_CPU_THIS_PTR the_i387.foo = xmm.xmm16u(3) & 0x7FF;
252 /* Restore x87 FPU IP */
253 #if BX_SUPPORT_X86_64
254 if (i->os64L()) /* 64 bit operand size mode */
256 BX_CPU_THIS_PTR the_i387.fip = xmm.xmm64u(1);
258 else
259 #endif
261 BX_CPU_THIS_PTR the_i387.fip = xmm.xmm32u(2);
262 BX_CPU_THIS_PTR the_i387.fcs = xmm.xmm16u(5);
265 Bit32u twd = 0, tag_byte = xmm.xmm16u(2);
267 /* Restore x87 FPU DP */
268 read_virtual_dqword_aligned(i->seg(), RMAddr(i) + 16, (Bit8u *) &xmm);
270 #if BX_SUPPORT_X86_64
271 if (i->os64L()) /* 64 bit operand size mode */
273 BX_CPU_THIS_PTR the_i387.fdp = xmm.xmm64u(0);
275 else
276 #endif
278 BX_CPU_THIS_PTR the_i387.fdp = xmm.xmm32u(0);
279 BX_CPU_THIS_PTR the_i387.fds = xmm.xmm16u(2);
282 #if BX_SUPPORT_SSE >= 1
283 /* If the OSFXSR bit in CR4 is not set, the FXRSTOR instruction does
284 not restore the states of the XMM and MXCSR registers. */
285 if(BX_CPU_THIS_PTR cr4.get_OSFXSR())
287 Bit32u new_mxcsr = xmm.xmm32u(2);
288 if(new_mxcsr & ~MXCSR_MASK)
289 exception(BX_GP_EXCEPTION, 0, 0);
291 BX_MXCSR_REGISTER = new_mxcsr;
293 #endif
295 /* load i387 register file */
296 for(index=0; index < 8; index++)
298 read_virtual_tword(i->seg(), RMAddr(i)+index*16+32, &(BX_FPU_REG(index)));
301 /* FTW
303 * Note that the original format for FTW can be recreated from the stored
304 * FTW valid bits and the stored 80-bit FP data (assuming the stored data
305 * was not the contents of MMX registers) using the following table:
307 | Exponent | Exponent | Fraction | J,M bits | FTW valid | x87 FTW |
308 | all 1s | all 0s | all 0s | | | |
309 -------------------------------------------------------------------
310 | 0 | 0 | 0 | 0x | 1 | S 10 |
311 | 0 | 0 | 0 | 1x | 1 | V 00 |
312 -------------------------------------------------------------------
313 | 0 | 0 | 1 | 00 | 1 | S 10 |
314 | 0 | 0 | 1 | 10 | 1 | V 00 |
315 -------------------------------------------------------------------
316 | 0 | 1 | 0 | 0x | 1 | S 10 |
317 | 0 | 1 | 0 | 1x | 1 | S 10 |
318 -------------------------------------------------------------------
319 | 0 | 1 | 1 | 00 | 1 | Z 01 |
320 | 0 | 1 | 1 | 10 | 1 | S 10 |
321 -------------------------------------------------------------------
322 | 1 | 0 | 0 | 1x | 1 | S 10 |
323 | 1 | 0 | 0 | 1x | 1 | S 10 |
324 -------------------------------------------------------------------
325 | 1 | 0 | 1 | 00 | 1 | S 10 |
326 | 1 | 0 | 1 | 10 | 1 | S 10 |
327 -------------------------------------------------------------------
328 | all combinations above | 0 | E 11 |
331 * The J-bit is defined to be the 1-bit binary integer to the left of
332 * the decimal place in the significand.
334 * The M-bit is defined to be the most significant bit of the fractional
335 * portion of the significand (i.e., the bit immediately to the right of
336 * the decimal place). When the M-bit is the most significant bit of the
337 * fractional portion of the significand, it must be 0 if the fraction
338 * is all 0's.
341 for(index = 7;index >= 0; index--, twd <<= 2, tag_byte <<= 1)
343 if(tag_byte & 0x80) {
344 const floatx80 &fpu_reg = BX_FPU_REG(index);
345 twd |= FPU_tagof(fpu_reg);
347 else {
348 twd |= FPU_Tag_Empty;
352 BX_CPU_THIS_PTR the_i387.twd = (twd >> 2);
354 #if BX_SUPPORT_X86_64
355 if (BX_CPU_THIS_PTR efer.ffxsr && CPL == 0 && Is64BitMode())
356 return; // skip restore of the XMM state
357 #endif
359 #if BX_SUPPORT_SSE >= 1
360 /* If the OSFXSR bit in CR4 is not set, the FXRSTOR instruction does
361 not restore the states of the XMM and MXCSR registers. */
362 if(BX_CPU_THIS_PTR cr4.get_OSFXSR())
364 /* load XMM register file */
365 for(index=0; index < BX_XMM_REGISTERS; index++)
367 // restore XMM8-XMM15 only in 64-bit mode
368 if (index < 8 || Is64BitMode()) {
369 read_virtual_dqword_aligned(i->seg(),
370 RMAddr(i)+index*16+160, (Bit8u *) &(BX_CPU_THIS_PTR xmm[index]));
374 #endif
376 #else
377 BX_INFO(("FXRSTOR: required P6 support, use --enable-cpu-level=6 option"));
378 UndefinedOpcode(i);
379 #endif
382 /* *************************** */
383 /* SSE: MEMORY MOVE OPERATIONS */
384 /* *************************** */
386 /* All these opcodes never generate SIMD floating point exeptions */
388 /* MOVUPS: 0F 10 */
389 /* MOVUPD: 66 0F 10 */
390 /* MOVDQU: F3 0F 6F */
391 void BX_CPU_C::MOVUPS_VpsWps(bxInstruction_c *i)
393 #if BX_SUPPORT_SSE >= 1
394 BX_CPU_THIS_PTR prepareSSE();
396 BxPackedXmmRegister op;
398 /* op is a register or memory reference */
399 if (i->modC0()) {
400 op = BX_READ_XMM_REG(i->rm());
402 else {
403 /* pointer, segment address pair */
404 read_virtual_dqword(i->seg(), RMAddr(i), (Bit8u *) &op);
407 /* now write result back to destination */
408 BX_WRITE_XMM_REG(i->nnn(), op);
409 #else
410 BX_INFO(("MOVUPS_VpsWps: required SSE, use --enable-sse option"));
411 UndefinedOpcode(i);
412 #endif
415 /* MOVUPS: 0F 11 */
416 /* MOVUPD: 66 0F 11 */
417 /* MOVDQU: F3 0F 7F */
418 void BX_CPU_C::MOVUPS_WpsVps(bxInstruction_c *i)
420 #if BX_SUPPORT_SSE >= 1
421 BX_CPU_THIS_PTR prepareSSE();
423 BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
425 /* op is a register or memory reference */
426 if (i->modC0()) {
427 BX_WRITE_XMM_REG(i->rm(), op);
429 else {
430 write_virtual_dqword(i->seg(), RMAddr(i), (Bit8u *) &op);
432 #else
433 BX_INFO(("MOVUPS_WpsVps: required SSE, use --enable-sse option"));
434 UndefinedOpcode(i);
435 #endif
438 /* MOVAPS: 0F 28 */
439 /* MOVAPD: 66 0F 28 */
440 /* MOVDQA: F3 0F 6F */
441 void BX_CPU_C::MOVAPS_VpsWps(bxInstruction_c *i)
443 #if BX_SUPPORT_SSE >= 1
444 BX_CPU_THIS_PTR prepareSSE();
446 BxPackedXmmRegister op;
448 /* op is a register or memory reference */
449 if (i->modC0()) {
450 op = BX_READ_XMM_REG(i->rm());
452 else {
453 /* pointer, segment address pair */
454 read_virtual_dqword_aligned(i->seg(), RMAddr(i), (Bit8u *) &op);
457 /* now write result back to destination */
458 BX_WRITE_XMM_REG(i->nnn(), op);
459 #else
460 BX_INFO(("MOVAPS_VpsWps: required SSE, use --enable-sse option"));
461 UndefinedOpcode(i);
462 #endif
465 /* MOVAPS: 0F 29 */
466 /* MOVAPD: 66 0F 29 */
467 /* MOVDQA: F3 0F 7F */
468 void BX_CPU_C::MOVAPS_WpsVps(bxInstruction_c *i)
470 #if BX_SUPPORT_SSE >= 1
471 BX_CPU_THIS_PTR prepareSSE();
473 BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
475 /* op is a register or memory reference */
476 if (i->modC0()) {
477 BX_WRITE_XMM_REG(i->rm(), op);
479 else {
480 write_virtual_dqword_aligned(i->seg(), RMAddr(i), (Bit8u *) &op);
482 #else
483 BX_INFO(("MOVAPS_WpsVps: required SSE, use --enable-sse option"));
484 UndefinedOpcode(i);
485 #endif
488 /* F3 0F 10 */
489 void BX_CPU_C::MOVSS_VssWss(bxInstruction_c *i)
491 #if BX_SUPPORT_SSE >= 1
492 BX_CPU_THIS_PTR prepareSSE();
494 BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
495 Bit32u val32;
497 /* op2 is a register or memory reference */
498 if (i->modC0())
500 /* If the source operand is an XMM register, the high-order
501 96 bits of the destination XMM register are not modified. */
502 op.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->rm());
504 else {
505 /* pointer, segment address pair */
506 read_virtual_dword(i->seg(), RMAddr(i), &val32);
508 /* If the source operand is a memory location, the high-order
509 96 bits of the destination XMM register are cleared to 0s */
510 op.xmm32u(0) = val32;
511 op.xmm32u(1) = 0;
512 op.xmm64u(1) = 0;
515 /* now write result back to destination */
516 BX_WRITE_XMM_REG(i->nnn(), op);
517 #else
518 BX_INFO(("MOVSS_VssWss: required SSE, use --enable-sse option"));
519 UndefinedOpcode(i);
520 #endif
523 /* F3 0F 11 */
524 void BX_CPU_C::MOVSS_WssVss(bxInstruction_c *i)
526 #if BX_SUPPORT_SSE >= 1
527 BX_CPU_THIS_PTR prepareSSE();
529 Bit32u val32 = BX_READ_XMM_REG_LO_DWORD(i->nnn());
531 /* destination is a register or memory reference */
532 if (i->modC0())
534 /* If the source operand is an XMM register, the high-order
535 96 bits of the destination XMM register are not modified. */
536 BX_WRITE_XMM_REG_LO_DWORD(i->rm(), val32);
538 else {
539 /* pointer, segment address pair */
540 write_virtual_dword(i->seg(), RMAddr(i), &val32);
542 #else
543 BX_INFO(("MOVSS_WssVss: required SSE, use --enable-sse option"));
544 UndefinedOpcode(i);
545 #endif
548 /* F2 0F 10 */
549 void BX_CPU_C::MOVSD_VsdWsd(bxInstruction_c *i)
551 #if BX_SUPPORT_SSE >= 2
552 BX_CPU_THIS_PTR prepareSSE();
554 BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
555 Bit64u val64;
557 /* op2 is a register or memory reference */
558 if (i->modC0())
560 /* If the source operand is an XMM register, the high-order
561 64 bits of the destination XMM register are not modified. */
562 op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->rm());
564 else {
565 /* pointer, segment address pair */
566 read_virtual_qword(i->seg(), RMAddr(i), &val64);
568 /* If the source operand is a memory location, the high-order
569 64 bits of the destination XMM register are cleared to 0s */
570 op.xmm64u(0) = val64;
571 op.xmm64u(1) = 0;
574 /* now write result back to destination */
575 BX_WRITE_XMM_REG(i->nnn(), op);
576 #else
577 BX_INFO(("MOVSD_VsdWsd: required SSE2, use --enable-sse option"));
578 UndefinedOpcode(i);
579 #endif
582 /* F2 0F 11 */
583 void BX_CPU_C::MOVSD_WsdVsd(bxInstruction_c *i)
585 #if BX_SUPPORT_SSE >= 2
586 BX_CPU_THIS_PTR prepareSSE();
588 Bit64u val64 = BX_READ_XMM_REG_LO_QWORD(i->nnn());
590 /* destination is a register or memory reference */
591 if (i->modC0())
593 /* If the source operand is an XMM register, the high-order
594 64 bits of the destination XMM register are not modified. */
595 BX_WRITE_XMM_REG_LO_QWORD(i->rm(), val64);
597 else {
598 /* pointer, segment address pair */
599 write_virtual_qword(i->seg(), RMAddr(i), &val64);
601 #else
602 BX_INFO(("MOVSD_WsdVsd: required SSE2, use --enable-sse option"));
603 UndefinedOpcode(i);
604 #endif
607 /* MOVLPS: 0F 12 */
608 /* MOVLPD: 66 0F 12 */
609 void BX_CPU_C::MOVLPS_VpsMq(bxInstruction_c *i)
611 #if BX_SUPPORT_SSE >= 1
612 BX_CPU_THIS_PTR prepareSSE();
613 Bit64u val64;
615 if (i->modC0()) /* MOVHLPS xmm1, xmm2 opcode */
617 val64 = BX_READ_XMM_REG_HI_QWORD(i->rm());
619 else {
620 /* pointer, segment address pair */
621 read_virtual_qword(i->seg(), RMAddr(i), &val64);
624 /* now write result back to destination */
625 BX_WRITE_XMM_REG_LO_QWORD(i->nnn(), val64);
626 #else
627 BX_INFO(("MOVLPS_VpsMq: required SSE, use --enable-sse option"));
628 UndefinedOpcode(i);
629 #endif
632 /* F2 0F 12 */
633 void BX_CPU_C::MOVDDUP_VpdWq(bxInstruction_c *i)
635 #if BX_SUPPORT_SSE >= 3
636 BX_CPU_THIS_PTR prepareSSE();
637 Bit64u val64;
638 BxPackedXmmRegister op;
640 if (i->modC0())
642 val64 = BX_READ_XMM_REG_LO_QWORD(i->rm());
644 else {
645 /* pointer, segment address pair */
646 read_virtual_qword(i->seg(), RMAddr(i), &val64);
649 op.xmm64u(0) = val64;
650 op.xmm64u(1) = val64;
652 /* now write result back to destination */
653 BX_WRITE_XMM_REG(i->nnn(), op);
654 #else
655 BX_INFO(("MOVDDUP_VpdWq: required SSE3, use --enable-sse option"));
656 UndefinedOpcode(i);
657 #endif
660 /* F3 0F 12 */
661 void BX_CPU_C::MOVSLDUP_VpsWps(bxInstruction_c *i)
663 #if BX_SUPPORT_SSE >= 3
664 BX_CPU_THIS_PTR prepareSSE();
665 BxPackedXmmRegister op, result;
667 /* op is a register or memory reference */
668 if (i->modC0()) {
669 op = BX_READ_XMM_REG(i->rm());
671 else {
672 /* pointer, segment address pair */
673 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
676 result.xmm32u(0) = op.xmm32u(0);
677 result.xmm32u(1) = op.xmm32u(0);
678 result.xmm32u(2) = op.xmm32u(2);
679 result.xmm32u(3) = op.xmm32u(2);
681 /* now write result back to destination */
682 BX_WRITE_XMM_REG(i->nnn(), result);
683 #else
684 BX_INFO(("MOVSLDUP_VpsWps: required SSE3, use --enable-sse option"));
685 UndefinedOpcode(i);
686 #endif
689 /* F3 0F 16 */
690 void BX_CPU_C::MOVSHDUP_VpsWps(bxInstruction_c *i)
692 #if BX_SUPPORT_SSE >= 3
693 BX_CPU_THIS_PTR prepareSSE();
694 BxPackedXmmRegister op, result;
696 /* op is a register or memory reference */
697 if (i->modC0()) {
698 op = BX_READ_XMM_REG(i->rm());
700 else {
701 /* pointer, segment address pair */
702 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
705 result.xmm32u(0) = op.xmm32u(1);
706 result.xmm32u(1) = op.xmm32u(1);
707 result.xmm32u(2) = op.xmm32u(3);
708 result.xmm32u(3) = op.xmm32u(3);
710 /* now write result back to destination */
711 BX_WRITE_XMM_REG(i->nnn(), result);
712 #else
713 BX_INFO(("MOVHLDUP_VpsWps: required SSE3, use --enable-sse option"));
714 UndefinedOpcode(i);
715 #endif
718 /* MOVLPS: 0F 13 */
719 /* MOVLPD: 66 0F 13 */
720 void BX_CPU_C::MOVLPS_MqVps(bxInstruction_c *i)
722 #if BX_SUPPORT_SSE >= 1
723 BX_CPU_THIS_PTR prepareSSE();
724 write_virtual_qword(i->seg(), RMAddr(i), &BX_XMM_REG_LO_QWORD(i->nnn()));
725 #else
726 BX_INFO(("MOVLPS_MqVps: required SSE, use --enable-sse option"));
727 UndefinedOpcode(i);
728 #endif
731 /* MOVHPS: 0F 16 */
732 /* MOVHPD: 66 0F 16 */
733 void BX_CPU_C::MOVHPS_VpsMq(bxInstruction_c *i)
735 #if BX_SUPPORT_SSE >= 1
736 BX_CPU_THIS_PTR prepareSSE();
737 Bit64u val64;
739 if (i->modC0()) /* MOVLHPS xmm1, xmm2 opcode */
741 val64 = BX_READ_XMM_REG_LO_QWORD(i->rm());
743 else {
744 /* pointer, segment address pair */
745 read_virtual_qword(i->seg(), RMAddr(i), &val64);
748 /* now write result back to destination */
749 BX_WRITE_XMM_REG_HI_QWORD(i->nnn(), val64);
750 #else
751 BX_INFO(("MOVHPS_VpsMq: required SSE, use --enable-sse option"));
752 UndefinedOpcode(i);
753 #endif
756 /* MOVHPS: 0F 17 */
757 /* MOVHPD: 66 0F 17 */
758 void BX_CPU_C::MOVHPS_MqVps(bxInstruction_c *i)
760 #if BX_SUPPORT_SSE >= 1
761 BX_CPU_THIS_PTR prepareSSE();
762 write_virtual_qword(i->seg(), RMAddr(i), &BX_XMM_REG_HI_QWORD(i->nnn()));
763 #else
764 BX_INFO(("MOVHPS_MqVps: required SSE, use --enable-sse option"));
765 UndefinedOpcode(i);
766 #endif
769 /* F2 0F F0 */
770 void BX_CPU_C::LDDQU_VdqMdq(bxInstruction_c *i)
772 #if BX_SUPPORT_SSE >= 3
773 BX_CPU_THIS_PTR prepareSSE();
775 BxPackedXmmRegister op;
776 read_virtual_dqword(i->seg(), RMAddr(i), (Bit8u *) &op);
778 /* now write result back to destination */
779 BX_WRITE_XMM_REG(i->nnn(), op);
781 #else
782 BX_INFO(("LDDQU_VdqMdq: required SSE3, use --enable-sse option"));
783 UndefinedOpcode(i);
784 #endif
787 /* 66 0F F7 */
788 void BX_CPU_C::MASKMOVDQU_VdqUdq(bxInstruction_c *i)
790 #if BX_SUPPORT_SSE >= 2
791 BX_CPU_THIS_PTR prepareSSE();
793 if (! i->modC0()) {
794 BX_INFO(("MASKMOVDQU_VdqUdq: unexpected memory reference"));
795 UndefinedOpcode(i);
798 bx_address rdi;
799 BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn()),
800 mask = BX_READ_XMM_REG(i->rm());
802 #if BX_SUPPORT_X86_64
803 if (i->as64L()) { /* 64 bit address mode */
804 rdi = RDI;
806 else
807 #endif
808 if (i->as32L()) {
809 rdi = EDI;
811 else { /* 16 bit address mode */
812 rdi = DI;
815 /* partial write, no data will be written to memory if mask is all 0s */
816 for(unsigned j=0; j<16; j++)
818 if(mask.xmmubyte(j) & 0x80)
819 write_virtual_byte(BX_SEG_REG_DS, rdi+j, &op.xmmubyte(j));
822 #else
823 BX_INFO(("MASKMOVDQU_VdqUdq: required SSE2, use --enable-sse option"));
824 UndefinedOpcode(i);
825 #endif
828 /* 0F 50 */
829 void BX_CPU_C::MOVMSKPS_GdVRps(bxInstruction_c *i)
831 #if BX_SUPPORT_SSE >= 1
832 BX_CPU_THIS_PTR prepareSSE();
834 BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
835 Bit32u val32 = 0;
837 if(op.xmm32u(0) & 0x80000000) val32 |= 0x1;
838 if(op.xmm32u(1) & 0x80000000) val32 |= 0x2;
839 if(op.xmm32u(2) & 0x80000000) val32 |= 0x4;
840 if(op.xmm32u(3) & 0x80000000) val32 |= 0x8;
842 BX_WRITE_32BIT_REGZ(i->rm(), val32);
843 #else
844 BX_INFO(("MOVMSKPS_GdVRps: required SSE, use --enable-sse option"));
845 UndefinedOpcode(i);
846 #endif
849 /* 66 0F 50 */
850 void BX_CPU_C::MOVMSKPD_GdVRpd(bxInstruction_c *i)
852 #if BX_SUPPORT_SSE >= 2
853 BX_CPU_THIS_PTR prepareSSE();
855 BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
856 Bit32u val32 = 0;
858 if(op.xmm32u(1) & 0x80000000) val32 |= 0x1;
859 if(op.xmm32u(3) & 0x80000000) val32 |= 0x2;
861 BX_WRITE_32BIT_REGZ(i->rm(), val32);
862 #else
863 BX_INFO(("MOVMSKPD_GdVRpd: required SSE2, use --enable-sse option"));
864 UndefinedOpcode(i);
865 #endif
868 /* 66 0F 6E */
869 void BX_CPU_C::MOVD_VdqEd(bxInstruction_c *i)
871 #if BX_SUPPORT_SSE >= 2
872 BX_CPU_THIS_PTR prepareSSE();
874 BxPackedXmmRegister op1;
875 Bit32u op2;
877 /* op2 is a register or memory reference */
878 if (i->modC0()) {
879 op2 = BX_READ_32BIT_REG(i->rm());
881 else {
882 /* pointer, segment address pair */
883 read_virtual_dword(i->seg(), RMAddr(i), &op2);
886 op1.xmm64u(0) = (Bit64u)(op2);
887 op1.xmm64u(1) = 0;
889 /* now write result back to destination */
890 BX_WRITE_XMM_REG(i->nnn(), op1);
891 #else
892 BX_INFO(("MOVD_VdqEd: required SSE2, use --enable-sse option"));
893 UndefinedOpcode(i);
894 #endif
897 #if BX_SUPPORT_X86_64
899 /* 66 0F 6E */
900 void BX_CPU_C::MOVQ_VdqEq(bxInstruction_c *i)
902 #if BX_SUPPORT_SSE >= 2
903 BX_CPU_THIS_PTR prepareSSE();
905 BxPackedXmmRegister op1;
906 Bit64u op2;
908 /* op2 is a register or memory reference */
909 if (i->modC0()) {
910 op2 = BX_READ_64BIT_REG(i->rm());
912 else {
913 /* pointer, segment address pair */
914 read_virtual_qword(i->seg(), RMAddr(i), &op2);
917 op1.xmm64u(0) = op2;
918 op1.xmm64u(1) = 0;
920 /* now write result back to destination */
921 BX_WRITE_XMM_REG(i->nnn(), op1);
922 #else
923 BX_INFO(("MOVQ_VdqEq: required SSE2, use --enable-sse option"));
924 UndefinedOpcode(i);
925 #endif
928 #endif
930 /* 66 0F 7E */
931 void BX_CPU_C::MOVD_EdVd(bxInstruction_c *i)
933 #if BX_SUPPORT_SSE >= 2
934 BX_CPU_THIS_PTR prepareSSE();
936 Bit32u op2 = BX_READ_XMM_REG_LO_DWORD(i->nnn());
938 /* destination is a register or memory reference */
939 if (i->modC0()) {
940 BX_WRITE_32BIT_REGZ(i->rm(), op2);
942 else {
943 /* pointer, segment address pair */
944 write_virtual_dword(i->seg(), RMAddr(i), &op2);
946 #else
947 BX_INFO(("MOVD_EdVd: required SSE2, use --enable-sse option"));
948 UndefinedOpcode(i);
949 #endif
952 #if BX_SUPPORT_X86_64
954 /* 66 0F 7E */
955 void BX_CPU_C::MOVQ_EqVq(bxInstruction_c *i)
957 #if BX_SUPPORT_SSE >= 2
958 BX_CPU_THIS_PTR prepareSSE();
960 Bit64u op2 = BX_READ_XMM_REG_LO_QWORD(i->nnn());
962 /* destination is a register or memory reference */
963 if (i->modC0()) {
964 BX_WRITE_64BIT_REG(i->rm(), op2);
966 else {
967 /* pointer, segment address pair */
968 write_virtual_qword(i->seg(), RMAddr(i), &op2);
970 #else
971 BX_INFO(("MOVQ_EqVq: required SSE2, use --enable-sse option"));
972 UndefinedOpcode(i);
973 #endif
976 #endif
978 /* F3 0F 7E */
979 void BX_CPU_C::MOVQ_VqWq(bxInstruction_c *i)
981 #if BX_SUPPORT_SSE >= 2
982 BX_CPU_THIS_PTR prepareSSE();
984 BxPackedXmmRegister op;
985 Bit64u val64;
987 if (i->modC0()) {
988 op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->rm());
990 else {
991 /* pointer, segment address pair */
992 read_virtual_qword(i->seg(), RMAddr(i), &val64);
993 op.xmm64u(0) = val64;
996 /* zero-extension to 128 bit */
997 op.xmm64u(1) = 0;
999 /* now write result back to destination */
1000 BX_WRITE_XMM_REG(i->nnn(), op);
1001 #else
1002 BX_INFO(("MOVQ_VqWq: required SSE2, use --enable-sse option"));
1003 UndefinedOpcode(i);
1004 #endif
1007 /* 66 0F D6 */
1008 void BX_CPU_C::MOVQ_WqVq(bxInstruction_c *i)
1010 #if BX_SUPPORT_SSE >= 2
1011 BX_CPU_THIS_PTR prepareSSE();
1013 BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
1015 if (i->modC0())
1017 op.xmm64u(1) = 0; /* zero-extension to 128 bits */
1018 BX_WRITE_XMM_REG(i->rm(), op);
1020 else {
1021 write_virtual_qword(i->seg(), RMAddr(i), &(op.xmm64u(0)));
1023 #else
1024 BX_INFO(("MOVQ_WqVq: required SSE2, use --enable-sse option"));
1025 UndefinedOpcode(i);
1026 #endif
1029 /* F2 0F D6 */
1030 void BX_CPU_C::MOVDQ2Q_PqVRq(bxInstruction_c *i)
1032 #if BX_SUPPORT_SSE >= 2
1033 BX_CPU_THIS_PTR prepareSSE();
1034 BX_CPU_THIS_PTR prepareFPU2MMX();
1036 BxPackedMmxRegister mm;
1037 MMXUQ(mm) = BX_READ_XMM_REG_LO_QWORD(i->nnn());
1039 BX_WRITE_MMX_REG(i->rm(), mm);
1040 #else
1041 BX_INFO(("MOVDQ2Q_PqVRq: required SSE2, use --enable-sse option"));
1042 UndefinedOpcode(i);
1043 #endif
1046 /* F3 0F D6 */
1047 void BX_CPU_C::MOVQ2DQ_VdqQq(bxInstruction_c *i)
1049 #if BX_SUPPORT_SSE >= 2
1050 BX_CPU_THIS_PTR prepareSSE();
1051 BX_CPU_THIS_PTR prepareFPU2MMX();
1053 BxPackedXmmRegister op;
1054 BxPackedMmxRegister mm = BX_READ_MMX_REG(i->nnn());
1056 op.xmm64u(0) = MMXUQ(mm);
1057 op.xmm64u(1) = 0;
1059 BX_WRITE_XMM_REG(i->rm(), op);
1060 #else
1061 BX_INFO(("MOVQ2DQ_VdqQq: required SSE2, use --enable-sse option"));
1062 UndefinedOpcode(i);
1063 #endif
1066 /* 66 0F D7 */
1067 void BX_CPU_C::PMOVMSKB_GdUdq(bxInstruction_c *i)
1069 #if BX_SUPPORT_SSE >= 2
1070 BX_CPU_THIS_PTR prepareSSE();
1072 BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
1073 Bit32u result = 0;
1075 if(op.xmmubyte(0x0) & 0x80) result |= 0x0001;
1076 if(op.xmmubyte(0x1) & 0x80) result |= 0x0002;
1077 if(op.xmmubyte(0x2) & 0x80) result |= 0x0004;
1078 if(op.xmmubyte(0x3) & 0x80) result |= 0x0008;
1079 if(op.xmmubyte(0x4) & 0x80) result |= 0x0010;
1080 if(op.xmmubyte(0x5) & 0x80) result |= 0x0020;
1081 if(op.xmmubyte(0x6) & 0x80) result |= 0x0040;
1082 if(op.xmmubyte(0x7) & 0x80) result |= 0x0080;
1083 if(op.xmmubyte(0x8) & 0x80) result |= 0x0100;
1084 if(op.xmmubyte(0x9) & 0x80) result |= 0x0200;
1085 if(op.xmmubyte(0xA) & 0x80) result |= 0x0400;
1086 if(op.xmmubyte(0xB) & 0x80) result |= 0x0800;
1087 if(op.xmmubyte(0xC) & 0x80) result |= 0x1000;
1088 if(op.xmmubyte(0xD) & 0x80) result |= 0x2000;
1089 if(op.xmmubyte(0xE) & 0x80) result |= 0x4000;
1090 if(op.xmmubyte(0xF) & 0x80) result |= 0x8000;
1092 /* now write result back to destination */
1093 BX_WRITE_32BIT_REGZ(i->nnn(), result);
1095 #else
1096 BX_INFO(("PMOVMSKB_GdUdq: required SSE2, use --enable-sse option"));
1097 UndefinedOpcode(i);
1098 #endif
1101 /* **************************** */
1102 /* SSE: STORE DATA NON-TEMPORAL */
1103 /* **************************** */
1105 /* 0F C3 */
1106 void BX_CPU_C::MOVNTI_MdGd(bxInstruction_c *i)
1108 #if BX_SUPPORT_SSE >= 2
1109 Bit32u val32 = BX_READ_32BIT_REG(i->nnn());
1110 write_virtual_dword(i->seg(), RMAddr(i), &val32);
1111 #else
1112 BX_INFO(("MOVNTI_MdGd: required SSE2, use --enable-sse option"));
1113 UndefinedOpcode(i);
1114 #endif
1117 #if BX_SUPPORT_X86_64
1119 /* 0F C3 */
1120 void BX_CPU_C::MOVNTI_MqGq(bxInstruction_c *i)
1122 #if BX_SUPPORT_SSE >= 2
1123 Bit64u val64 = BX_READ_64BIT_REG(i->nnn());
1124 write_virtual_qword(i->seg(), RMAddr(i), &val64);
1125 #else
1126 BX_INFO(("MOVNTI_MqGq: required SSE2, use --enable-sse option"));
1127 UndefinedOpcode(i);
1128 #endif
1131 #endif
1133 /* MOVNTPS: 0F 2B */
1134 /* MOVNTPD: 66 0F 2B */
1135 /* MOVNTDQ: 66 0F E7 */
1136 void BX_CPU_C::MOVNTPS_MpsVps(bxInstruction_c *i)
1138 #if BX_SUPPORT_SSE >= 1
1139 BX_CPU_THIS_PTR prepareSSE();
1140 write_virtual_dqword_aligned(i->seg(), RMAddr(i), (Bit8u *)(&BX_READ_XMM_REG(i->nnn())));
1141 #else
1142 BX_INFO(("MOVNTPS_MpsVps: required SSE, use --enable-sse option"));
1143 UndefinedOpcode(i);
1144 #endif
1147 /* ************************** */
1148 /* 3-BYTE-OPCODE INSTRUCTIONS */
1149 /* ************************** */
1151 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
1153 /* 66 0F 38 20 */
1154 void BX_CPU_C::PMOVSXBW_VdqWq(bxInstruction_c *i)
1156 #if BX_SUPPORT_SSE >= 4
1157 BX_CPU_THIS_PTR prepareSSE();
1158 BxPackedXmmRegister result;
1159 Bit64u val64;
1161 if (i->modC0())
1163 val64 = BX_READ_XMM_REG_LO_QWORD(i->rm());
1165 else {
1166 /* pointer, segment address pair */
1167 read_virtual_qword(i->seg(), RMAddr(i), &val64);
1170 result.xmm16u(0) = (Bit8s) (val64 & 0xFF);
1171 result.xmm16u(1) = (Bit8s) ((val64 >> 8) & 0xFF);
1172 result.xmm16u(2) = (Bit8s) ((val64 >> 16) & 0xFF);
1173 result.xmm16u(3) = (Bit8s) ((val64 >> 24) & 0xFF);
1174 result.xmm16u(4) = (Bit8s) ((val64 >> 32) & 0xFF);
1175 result.xmm16u(5) = (Bit8s) ((val64 >> 40) & 0xFF);
1176 result.xmm16u(6) = (Bit8s) ((val64 >> 48) & 0xFF);
1177 result.xmm16u(7) = (Bit8s) (val64 >> 56);
1179 /* now write result back to destination */
1180 BX_WRITE_XMM_REG(i->nnn(), result);
1181 #else
1182 BX_INFO(("PMOVSXBW_VdqWq: required SSE4, use --enable-sse option"));
1183 UndefinedOpcode(i);
1184 #endif
1187 /* 66 0F 38 21 */
1188 void BX_CPU_C::PMOVSXBD_VdqWd(bxInstruction_c *i)
1190 #if BX_SUPPORT_SSE >= 4
1191 BX_CPU_THIS_PTR prepareSSE();
1192 BxPackedXmmRegister result;
1193 Bit32u val32;
1195 if (i->modC0())
1197 val32 = BX_READ_XMM_REG_LO_DWORD(i->rm());
1199 else {
1200 /* pointer, segment address pair */
1201 read_virtual_dword(i->seg(), RMAddr(i), &val32);
1204 result.xmm32u(0) = (Bit8s) (val32 & 0xFF);
1205 result.xmm32u(1) = (Bit8s) ((val32 >> 8) & 0xFF);
1206 result.xmm32u(2) = (Bit8s) ((val32 >> 16) & 0xFF);
1207 result.xmm32u(3) = (Bit8s) (val32 >> 24);
1209 /* now write result back to destination */
1210 BX_WRITE_XMM_REG(i->nnn(), result);
1211 #else
1212 BX_INFO(("PMOVSXBD_VdqWd: required SSE4, use --enable-sse option"));
1213 UndefinedOpcode(i);
1214 #endif
1217 /* 66 0F 38 22 */
1218 void BX_CPU_C::PMOVSXBQ_VdqWw(bxInstruction_c *i)
1220 #if BX_SUPPORT_SSE >= 4
1221 BX_CPU_THIS_PTR prepareSSE();
1222 BxPackedXmmRegister result;
1223 Bit16u val16;
1225 if (i->modC0())
1227 val16 = BX_READ_XMM_REG_LO_WORD(i->rm());
1229 else {
1230 /* pointer, segment address pair */
1231 read_virtual_word(i->seg(), RMAddr(i), &val16);
1234 result.xmm64u(0) = (Bit8s) (val16 & 0xFF);
1235 result.xmm64u(1) = (Bit8s) (val16 >> 8);
1237 /* now write result back to destination */
1238 BX_WRITE_XMM_REG(i->nnn(), result);
1239 #else
1240 BX_INFO(("PMOVSXBQ_VdqWw: required SSE4, use --enable-sse option"));
1241 UndefinedOpcode(i);
1242 #endif
1245 /* 66 0F 38 23 */
1246 void BX_CPU_C::PMOVSXWD_VdqWq(bxInstruction_c *i)
1248 #if BX_SUPPORT_SSE >= 4
1249 BX_CPU_THIS_PTR prepareSSE();
1250 BxPackedXmmRegister result;
1251 Bit64u val64;
1253 if (i->modC0())
1255 val64 = BX_READ_XMM_REG_LO_QWORD(i->rm());
1257 else {
1258 /* pointer, segment address pair */
1259 read_virtual_qword(i->seg(), RMAddr(i), &val64);
1262 result.xmm32u(0) = (Bit16s) (val64 & 0xFFFF);
1263 result.xmm32u(1) = (Bit16s) ((val64 >> 16) & 0xFFFF);
1264 result.xmm32u(2) = (Bit16s) ((val64 >> 32) & 0xFFFF);
1265 result.xmm32u(3) = (Bit16s) (val64 >> 48);
1267 /* now write result back to destination */
1268 BX_WRITE_XMM_REG(i->nnn(), result);
1269 #else
1270 BX_INFO(("PMOVSXWD_VdqWq: required SSE4, use --enable-sse option"));
1271 UndefinedOpcode(i);
1272 #endif
1275 /* 66 0F 38 24 */
1276 void BX_CPU_C::PMOVSXWQ_VdqWd(bxInstruction_c *i)
1278 #if BX_SUPPORT_SSE >= 4
1279 BX_CPU_THIS_PTR prepareSSE();
1280 BxPackedXmmRegister result;
1281 Bit32u val32;
1283 if (i->modC0())
1285 val32 = BX_READ_XMM_REG_LO_DWORD(i->rm());
1287 else {
1288 /* pointer, segment address pair */
1289 read_virtual_dword(i->seg(), RMAddr(i), &val32);
1292 result.xmm64u(0) = (Bit16s) (val32 & 0xFFFF);
1293 result.xmm64u(1) = (Bit16s) (val32 >> 16);
1295 /* now write result back to destination */
1296 BX_WRITE_XMM_REG(i->nnn(), result);
1297 #else
1298 BX_INFO(("PMOVSXWQ_VdqWd: required SSE4, use --enable-sse option"));
1299 UndefinedOpcode(i);
1300 #endif
1303 /* 66 0F 38 25 */
1304 void BX_CPU_C::PMOVSXDQ_VdqWq(bxInstruction_c *i)
1306 #if BX_SUPPORT_SSE >= 4
1307 BX_CPU_THIS_PTR prepareSSE();
1308 BxPackedXmmRegister result;
1309 Bit64u val64;
1311 if (i->modC0())
1313 val64 = BX_READ_XMM_REG_LO_QWORD(i->rm());
1315 else {
1316 /* pointer, segment address pair */
1317 read_virtual_qword(i->seg(), RMAddr(i), &val64);
1320 result.xmm64u(0) = (Bit32s) (val64 & 0xFFFFFFFF);
1321 result.xmm64u(1) = (Bit32s) (val64 >> 32);
1323 /* now write result back to destination */
1324 BX_WRITE_XMM_REG(i->nnn(), result);
1325 #else
1326 BX_INFO(("PMOVSXDQ_VdqWq: required SSE4, use --enable-sse option"));
1327 UndefinedOpcode(i);
1328 #endif
1331 /* 66 0F 38 2A */
1332 void BX_CPU_C::MOVNTDQA_VdqMdq(bxInstruction_c *i)
1334 #if BX_SUPPORT_SSE >= 4
1335 /* source must be memory reference */
1336 if (i->modC0()) {
1337 BX_INFO(("MOVNTDQA_VdqMdq: must be memory reference"));
1338 UndefinedOpcode(i);
1341 BX_CPU_THIS_PTR prepareSSE();
1343 BxPackedXmmRegister op;
1345 read_virtual_dqword_aligned(i->seg(), RMAddr(i), (Bit8u *) &op);
1347 /* now write result back to destination */
1348 BX_WRITE_XMM_REG(i->nnn(), op);
1350 #else
1351 BX_INFO(("MOVNTDQA_VdqMdq: required SSE4, use --enable-sse option"));
1352 UndefinedOpcode(i);
1353 #endif
1356 /* 66 0F 38 30 */
1357 void BX_CPU_C::PMOVZXBW_VdqWq(bxInstruction_c *i)
1359 #if BX_SUPPORT_SSE >= 4
1360 BX_CPU_THIS_PTR prepareSSE();
1361 BxPackedXmmRegister result;
1362 Bit64u val64;
1364 if (i->modC0())
1366 val64 = BX_READ_XMM_REG_LO_QWORD(i->rm());
1368 else {
1369 /* pointer, segment address pair */
1370 read_virtual_qword(i->seg(), RMAddr(i), &val64);
1373 result.xmm16u(0) = val64 & 0xFF;
1374 result.xmm16u(1) = (val64 >> 8) & 0xFF;
1375 result.xmm16u(2) = (val64 >> 16) & 0xFF;
1376 result.xmm16u(3) = (val64 >> 24) & 0xFF;
1377 result.xmm16u(4) = (val64 >> 32) & 0xFF;
1378 result.xmm16u(5) = (val64 >> 40) & 0xFF;
1379 result.xmm16u(6) = (val64 >> 48) & 0xFF;
1380 result.xmm16u(7) = val64 >> 56;
1382 /* now write result back to destination */
1383 BX_WRITE_XMM_REG(i->nnn(), result);
1384 #else
1385 BX_INFO(("PMOVZXBW_VdqWq: required SSE4, use --enable-sse option"));
1386 UndefinedOpcode(i);
1387 #endif
1390 /* 66 0F 38 31 */
1391 void BX_CPU_C::PMOVZXBD_VdqWd(bxInstruction_c *i)
1393 #if BX_SUPPORT_SSE >= 4
1394 BX_CPU_THIS_PTR prepareSSE();
1395 BxPackedXmmRegister result;
1396 Bit32u val32;
1398 if (i->modC0())
1400 val32 = BX_READ_XMM_REG_LO_DWORD(i->rm());
1402 else {
1403 /* pointer, segment address pair */
1404 read_virtual_dword(i->seg(), RMAddr(i), &val32);
1407 result.xmm32u(0) = val32 & 0xFF;
1408 result.xmm32u(1) = (val32 >> 8) & 0xFF;
1409 result.xmm32u(2) = (val32 >> 16) & 0xFF;
1410 result.xmm32u(3) = val32 >> 24;
1412 /* now write result back to destination */
1413 BX_WRITE_XMM_REG(i->nnn(), result);
1414 #else
1415 BX_INFO(("PMOVZXBD_VdqWd: required SSE4, use --enable-sse option"));
1416 UndefinedOpcode(i);
1417 #endif
1420 /* 66 0F 38 32 */
1421 void BX_CPU_C::PMOVZXBQ_VdqWw(bxInstruction_c *i)
1423 #if BX_SUPPORT_SSE >= 4
1424 BX_CPU_THIS_PTR prepareSSE();
1425 BxPackedXmmRegister result;
1426 Bit16u val16;
1428 if (i->modC0())
1430 val16 = BX_READ_XMM_REG_LO_WORD(i->rm());
1432 else {
1433 /* pointer, segment address pair */
1434 read_virtual_word(i->seg(), RMAddr(i), &val16);
1437 result.xmm64u(0) = val16 & 0xFF;
1438 result.xmm64u(1) = val16 >> 8;
1440 /* now write result back to destination */
1441 BX_WRITE_XMM_REG(i->nnn(), result);
1442 #else
1443 BX_INFO(("PMOVZXBQ_VdqWw: required SSE4, use --enable-sse option"));
1444 UndefinedOpcode(i);
1445 #endif
1448 /* 66 0F 38 33 */
1449 void BX_CPU_C::PMOVZXWD_VdqWq(bxInstruction_c *i)
1451 #if BX_SUPPORT_SSE >= 4
1452 BX_CPU_THIS_PTR prepareSSE();
1453 BxPackedXmmRegister result;
1454 Bit64u val64;
1456 if (i->modC0())
1458 val64 = BX_READ_XMM_REG_LO_QWORD(i->rm());
1460 else {
1461 /* pointer, segment address pair */
1462 read_virtual_qword(i->seg(), RMAddr(i), &val64);
1465 result.xmm32u(0) = val64 & 0xFFFF;
1466 result.xmm32u(1) = (val64 >> 16) & 0xFFFF;
1467 result.xmm32u(2) = (val64 >> 32) & 0xFFFF;
1468 result.xmm32u(3) = val64 >> 48;
1470 /* now write result back to destination */
1471 BX_WRITE_XMM_REG(i->nnn(), result);
1472 #else
1473 BX_INFO(("PMOVZXWD_VdqWq: required SSE4, use --enable-sse option"));
1474 UndefinedOpcode(i);
1475 #endif
1478 /* 66 0F 38 34 */
1479 void BX_CPU_C::PMOVZXWQ_VdqWd(bxInstruction_c *i)
1481 #if BX_SUPPORT_SSE >= 4
1482 BX_CPU_THIS_PTR prepareSSE();
1483 BxPackedXmmRegister result;
1484 Bit32u val32;
1486 if (i->modC0())
1488 val32 = BX_READ_XMM_REG_LO_DWORD(i->rm());
1490 else {
1491 /* pointer, segment address pair */
1492 read_virtual_dword(i->seg(), RMAddr(i), &val32);
1495 result.xmm64u(0) = val32 & 0xFFFF;
1496 result.xmm64u(1) = val32 >> 16;
1498 /* now write result back to destination */
1499 BX_WRITE_XMM_REG(i->nnn(), result);
1500 #else
1501 BX_INFO(("PMOVZXWQ_VdqWd: required SSE4, use --enable-sse option"));
1502 UndefinedOpcode(i);
1503 #endif
1506 /* 66 0F 38 35 */
1507 void BX_CPU_C::PMOVZXDQ_VdqWq(bxInstruction_c *i)
1509 #if BX_SUPPORT_SSE >= 4
1510 BX_CPU_THIS_PTR prepareSSE();
1511 BxPackedXmmRegister result;
1512 Bit64u val64;
1514 if (i->modC0())
1516 val64 = BX_READ_XMM_REG_LO_QWORD(i->rm());
1518 else {
1519 /* pointer, segment address pair */
1520 read_virtual_qword(i->seg(), RMAddr(i), &val64);
1523 result.xmm64u(0) = val64 & 0xFFFFFFFF;
1524 result.xmm64u(1) = val64 >> 32;
1526 /* now write result back to destination */
1527 BX_WRITE_XMM_REG(i->nnn(), result);
1528 #else
1529 BX_INFO(("PMOVZXDQ_VdqWq: required SSE4, use --enable-sse option"));
1530 UndefinedOpcode(i);
1531 #endif
1534 /* 66 0F 3A 0F */
1535 void BX_CPU_C::PALIGNR_VdqWdqIb(bxInstruction_c *i)
1537 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
1538 BX_CPU_THIS_PTR prepareSSE();
1540 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
1542 /* op2 is a register or memory reference */
1543 if (i->modC0()) {
1544 op2 = BX_READ_XMM_REG(i->rm());
1546 else {
1547 /* pointer, segment address pair */
1548 readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
1551 unsigned shift = i->Ib() * 8;
1553 if(shift == 0) {
1554 result.xmm64u(0) = op2.xmm64u(0);
1555 result.xmm64u(1) = op2.xmm64u(1);
1557 else if(shift < 64) {
1558 result.xmm64u(0) = (op2.xmm64u(0) >> shift) | (op2.xmm64u(1) << (64-shift));
1559 result.xmm64u(1) = (op2.xmm64u(1) >> shift) | (op1.xmm64u(0) << (64-shift));
1561 else if(shift == 64) {
1562 result.xmm64u(0) = op2.xmm64u(1);
1563 result.xmm64u(1) = op1.xmm64u(0);
1565 else if(shift < 128) {
1566 shift -= 64;
1567 result.xmm64u(0) = (op2.xmm64u(1) >> shift) | (op1.xmm64u(0) << (64-shift));
1568 result.xmm64u(1) = (op1.xmm64u(0) >> shift) | (op1.xmm64u(1) << (64-shift));
1570 else if(shift == 128) {
1571 result.xmm64u(0) = op1.xmm64u(0);
1572 result.xmm64u(1) = op1.xmm64u(1);
1574 else if(shift < 192) {
1575 shift -= 128;
1576 result.xmm64u(0) = (op1.xmm64u(0) >> shift) | (op1.xmm64u(1) << (64-shift));
1577 result.xmm64u(1) = (op1.xmm64u(1) >> shift);
1579 else if(shift < 256) {
1580 result.xmm64u(0) = op1.xmm64u(1) >> (shift - 192);
1581 result.xmm64u(1) = 0;
1583 else {
1584 result.xmm64u(0) = 0;
1585 result.xmm64u(1) = 0;
1588 /* now write result back to destination */
1589 BX_WRITE_XMM_REG(i->nnn(), result);
1590 #else
1591 BX_INFO(("PALIGNR_VdqWdqIb: required SSE3E, use --enable-sse and --enable-sse-extension options"));
1592 UndefinedOpcode(i);
1593 #endif
1596 #endif // BX_SUPPORT_SSE >= 4 || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)