1 /////////////////////////////////////////////////////////////////////////
2 // $Id: sse_move.cc,v 1.68 2007/12/01 16:45:17 sshwarts Exp $
3 /////////////////////////////////////////////////////////////////////////
5 // Copyright (c) 2003 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 /////////////////////////////////////////////////////////////////////////
25 #define NEED_CPU_REG_SHORTCUTS 1
28 #define LOG_THIS BX_CPU_THIS_PTR
33 void BX_CPU_C::prepareSSE(void)
35 if(BX_CPU_THIS_PTR cr0
.get_TS())
36 exception(BX_NM_EXCEPTION
, 0, 0);
38 if(BX_CPU_THIS_PTR cr0
.get_EM())
39 exception(BX_UD_EXCEPTION
, 0, 0);
41 if(! (BX_CPU_THIS_PTR cr4
.get_OSFXSR()))
42 exception(BX_UD_EXCEPTION
, 0, 0);
45 #define BX_MXCSR_REGISTER (BX_CPU_THIS_PTR mxcsr.mxcsr)
47 void BX_CPU_C::print_state_SSE(void)
49 BX_DEBUG(("MXCSR: 0x%08x\n", BX_MXCSR_REGISTER
));
50 for(unsigned i
=0;i
<BX_XMM_REGISTERS
;i
++) {
51 BxPackedXmmRegister xmm
= BX_READ_XMM_REG(i
);
52 BX_DEBUG(("XMM%02u: %08x%08x:%08x%08x\n", i
,
53 xmm
.xmm32u(3), xmm
.xmm32u(2), xmm
.xmm32u(1), xmm
.xmm32u(0)));
59 /* ************************************ */
60 /* SSE: SAVE/RESTORE FPU/MMX/SSEx STATE */
61 /* ************************************ */
64 void BX_CPU_C::LDMXCSR(bxInstruction_c
*i
)
66 #if BX_SUPPORT_SSE >= 1
67 BX_CPU_THIS_PTR
prepareSSE();
71 read_virtual_dword(i
->seg(), RMAddr(i
), &new_mxcsr
);
72 if(new_mxcsr
& ~MXCSR_MASK
)
73 exception(BX_GP_EXCEPTION
, 0, 0);
75 BX_MXCSR_REGISTER
= new_mxcsr
;
77 BX_INFO(("LDMXCSR: required SSE, use --enable-sse option"));
83 void BX_CPU_C::STMXCSR(bxInstruction_c
*i
)
85 #if BX_SUPPORT_SSE >= 1
86 BX_CPU_THIS_PTR
prepareSSE();
88 Bit32u mxcsr
= BX_MXCSR_REGISTER
& MXCSR_MASK
;
89 write_virtual_dword(i
->seg(), RMAddr(i
), &mxcsr
);
91 BX_INFO(("STMXCSR: required SSE, use --enable-sse option"));
97 void BX_CPU_C::FXSAVE(bxInstruction_c
*i
)
101 BxPackedXmmRegister xmm
;
103 BX_DEBUG(("FXSAVE: save FPU/MMX/SSE state"));
106 if(BX_CPU_THIS_PTR cr0
.get_TS())
107 exception(BX_NM_EXCEPTION
, 0, 0);
109 if(BX_CPU_THIS_PTR cr0
.get_EM())
110 exception(BX_UD_EXCEPTION
, 0, 0);
113 xmm
.xmm16u(0) = BX_CPU_THIS_PTR the_i387
.get_control_word();
114 xmm
.xmm16u(1) = BX_CPU_THIS_PTR the_i387
.get_status_word ();
116 Bit16u twd
= BX_CPU_THIS_PTR the_i387
.get_tag_word(), tag_byte
= 0;
118 if((twd
& 0x0003) != 0x0003) tag_byte
|= 0x01;
119 if((twd
& 0x000c) != 0x000c) tag_byte
|= 0x02;
120 if((twd
& 0x0030) != 0x0030) tag_byte
|= 0x04;
121 if((twd
& 0x00c0) != 0x00c0) tag_byte
|= 0x08;
122 if((twd
& 0x0300) != 0x0300) tag_byte
|= 0x10;
123 if((twd
& 0x0c00) != 0x0c00) tag_byte
|= 0x20;
124 if((twd
& 0x3000) != 0x3000) tag_byte
|= 0x40;
125 if((twd
& 0xc000) != 0xc000) tag_byte
|= 0x80;
127 xmm
.xmm16u(2) = tag_byte
;
129 /* x87 FPU Opcode (16 bits) */
130 /* The lower 11 bits contain the FPU opcode, upper 5 bits are reserved */
131 xmm
.xmm16u(3) = BX_CPU_THIS_PTR the_i387
.foo
;
134 * x87 FPU IP Offset (32/64 bits)
135 * The contents of this field differ depending on the current
136 * addressing mode (16/32/64 bit) when the FXSAVE instruction was executed:
137 * + 64-bit mode - 64-bit IP offset
138 * + 32-bit mode - 32-bit IP offset
139 * + 16-bit mode - low 16 bits are IP offset; high 16 bits are reserved.
140 * x87 CS FPU IP Selector
141 * + 16 bit, in 16/32 bit mode only
143 #if BX_SUPPORT_X86_64
144 if (i
->os64L()) /* 64 bit operand size mode */
146 xmm
.xmm64u(1) = (BX_CPU_THIS_PTR the_i387
.fip
);
151 xmm
.xmm32u(2) = (BX_CPU_THIS_PTR the_i387
.fip
) & 0xffffffff;
152 xmm
.xmm32u(3) = (BX_CPU_THIS_PTR the_i387
.fcs
);
155 write_virtual_dqword_aligned(i
->seg(), RMAddr(i
), (Bit8u
*) &xmm
);
158 * x87 FPU Instruction Operand (Data) Pointer Offset (32/64 bits)
159 * The contents of this field differ depending on the current
160 * addressing mode (16/32 bit) when the FXSAVE instruction was executed:
161 * + 64-bit mode - 64-bit offset
162 * + 32-bit mode - 32-bit offset
163 * + 16-bit mode - low 16 bits are offset; high 16 bits are reserved.
164 * x87 DS FPU Instruction Operand (Data) Pointer Selector
165 * + 16 bit, in 16/32 bit mode only
167 #if BX_SUPPORT_X86_64
168 if (i
->os64L()) /* 64 bit operand size mode */
170 xmm
.xmm64u(0) = (BX_CPU_THIS_PTR the_i387
.fdp
);
175 xmm
.xmm32u(0) = (BX_CPU_THIS_PTR the_i387
.fdp
) & 0xffffffff;
176 xmm
.xmm32u(1) = (BX_CPU_THIS_PTR the_i387
.fds
);
179 #if BX_SUPPORT_SSE >= 1
180 xmm
.xmm32u(2) = BX_MXCSR_REGISTER
;
181 xmm
.xmm32u(3) = MXCSR_MASK
;
187 write_virtual_dqword_aligned(i
->seg(), RMAddr(i
) + 16, (Bit8u
*) &xmm
);
189 /* store i387 register file */
190 for(index
=0; index
< 8; index
++)
192 const floatx80
&fp
= BX_FPU_REG(index
);
194 xmm
.xmm64u(0) = fp
.fraction
;
196 xmm
.xmm16u(4) = fp
.exp
;
198 write_virtual_dqword_aligned(i
->seg(), RMAddr(i
)+index
*16+32, (Bit8u
*) &xmm
);
201 #if BX_SUPPORT_X86_64
202 if (BX_CPU_THIS_PTR efer
.ffxsr
&& CPL
== 0 && Is64BitMode())
203 return; // skip saving of the XMM state
206 #if BX_SUPPORT_SSE >= 1
207 /* store XMM register file */
208 for(index
=0; index
< BX_XMM_REGISTERS
; index
++)
210 // save XMM8-XMM15 only in 64-bit mode
211 if (index
< 8 || Is64BitMode()) {
212 write_virtual_dqword_aligned(i
->seg(),
213 RMAddr(i
)+index
*16+160, (Bit8u
*) &(BX_CPU_THIS_PTR xmm
[index
]));
218 /* do not touch reserved fields */
220 BX_INFO(("FXSAVE: required P6 support, use --enable-cpu-level=6 option"));
225 /* 0F AE Grp15 001 */
226 void BX_CPU_C::FXRSTOR(bxInstruction_c
*i
)
228 #if BX_CPU_LEVEL >= 6
229 BxPackedXmmRegister xmm
;
232 BX_DEBUG(("FXRSTOR: restore FPU/MMX/SSE state"));
235 if(BX_CPU_THIS_PTR cr0
.get_TS())
236 exception(BX_NM_EXCEPTION
, 0, 0);
238 if(BX_CPU_THIS_PTR cr0
.get_EM())
239 exception(BX_UD_EXCEPTION
, 0, 0);
242 read_virtual_dqword_aligned(i
->seg(), RMAddr(i
), (Bit8u
*) &xmm
);
244 BX_CPU_THIS_PTR the_i387
.cwd
= xmm
.xmm16u(0);
245 BX_CPU_THIS_PTR the_i387
.swd
= xmm
.xmm16u(1);
246 BX_CPU_THIS_PTR the_i387
.tos
= (xmm
.xmm16u(1) >> 11) & 0x07;
248 /* Restore x87 FPU Opcode */
249 /* The lower 11 bits contain the FPU opcode, upper 5 bits are reserved */
250 BX_CPU_THIS_PTR the_i387
.foo
= xmm
.xmm16u(3) & 0x7FF;
252 /* Restore x87 FPU IP */
253 #if BX_SUPPORT_X86_64
254 if (i
->os64L()) /* 64 bit operand size mode */
256 BX_CPU_THIS_PTR the_i387
.fip
= xmm
.xmm64u(1);
261 BX_CPU_THIS_PTR the_i387
.fip
= xmm
.xmm32u(2);
262 BX_CPU_THIS_PTR the_i387
.fcs
= xmm
.xmm16u(5);
265 Bit32u twd
= 0, tag_byte
= xmm
.xmm16u(2);
267 /* Restore x87 FPU DP */
268 read_virtual_dqword_aligned(i
->seg(), RMAddr(i
) + 16, (Bit8u
*) &xmm
);
270 #if BX_SUPPORT_X86_64
271 if (i
->os64L()) /* 64 bit operand size mode */
273 BX_CPU_THIS_PTR the_i387
.fdp
= xmm
.xmm64u(0);
278 BX_CPU_THIS_PTR the_i387
.fdp
= xmm
.xmm32u(0);
279 BX_CPU_THIS_PTR the_i387
.fds
= xmm
.xmm16u(2);
282 #if BX_SUPPORT_SSE >= 1
283 /* If the OSFXSR bit in CR4 is not set, the FXRSTOR instruction does
284 not restore the states of the XMM and MXCSR registers. */
285 if(BX_CPU_THIS_PTR cr4
.get_OSFXSR())
287 Bit32u new_mxcsr
= xmm
.xmm32u(2);
288 if(new_mxcsr
& ~MXCSR_MASK
)
289 exception(BX_GP_EXCEPTION
, 0, 0);
291 BX_MXCSR_REGISTER
= new_mxcsr
;
295 /* load i387 register file */
296 for(index
=0; index
< 8; index
++)
298 read_virtual_tword(i
->seg(), RMAddr(i
)+index
*16+32, &(BX_FPU_REG(index
)));
303 * Note that the original format for FTW can be recreated from the stored
304 * FTW valid bits and the stored 80-bit FP data (assuming the stored data
305 * was not the contents of MMX registers) using the following table:
307 | Exponent | Exponent | Fraction | J,M bits | FTW valid | x87 FTW |
308 | all 1s | all 0s | all 0s | | | |
309 -------------------------------------------------------------------
310 | 0 | 0 | 0 | 0x | 1 | S 10 |
311 | 0 | 0 | 0 | 1x | 1 | V 00 |
312 -------------------------------------------------------------------
313 | 0 | 0 | 1 | 00 | 1 | S 10 |
314 | 0 | 0 | 1 | 10 | 1 | V 00 |
315 -------------------------------------------------------------------
316 | 0 | 1 | 0 | 0x | 1 | S 10 |
317 | 0 | 1 | 0 | 1x | 1 | S 10 |
318 -------------------------------------------------------------------
319 | 0 | 1 | 1 | 00 | 1 | Z 01 |
320 | 0 | 1 | 1 | 10 | 1 | S 10 |
321 -------------------------------------------------------------------
322 | 1 | 0 | 0 | 1x | 1 | S 10 |
323 | 1 | 0 | 0 | 1x | 1 | S 10 |
324 -------------------------------------------------------------------
325 | 1 | 0 | 1 | 00 | 1 | S 10 |
326 | 1 | 0 | 1 | 10 | 1 | S 10 |
327 -------------------------------------------------------------------
328 | all combinations above | 0 | E 11 |
331 * The J-bit is defined to be the 1-bit binary integer to the left of
332 * the decimal place in the significand.
334 * The M-bit is defined to be the most significant bit of the fractional
335 * portion of the significand (i.e., the bit immediately to the right of
336 * the decimal place). When the M-bit is the most significant bit of the
337 * fractional portion of the significand, it must be 0 if the fraction
341 for(index
= 7;index
>= 0; index
--, twd
<<= 2, tag_byte
<<= 1)
343 if(tag_byte
& 0x80) {
344 const floatx80
&fpu_reg
= BX_FPU_REG(index
);
345 twd
|= FPU_tagof(fpu_reg
);
348 twd
|= FPU_Tag_Empty
;
352 BX_CPU_THIS_PTR the_i387
.twd
= (twd
>> 2);
354 #if BX_SUPPORT_X86_64
355 if (BX_CPU_THIS_PTR efer
.ffxsr
&& CPL
== 0 && Is64BitMode())
356 return; // skip restore of the XMM state
359 #if BX_SUPPORT_SSE >= 1
360 /* If the OSFXSR bit in CR4 is not set, the FXRSTOR instruction does
361 not restore the states of the XMM and MXCSR registers. */
362 if(BX_CPU_THIS_PTR cr4
.get_OSFXSR())
364 /* load XMM register file */
365 for(index
=0; index
< BX_XMM_REGISTERS
; index
++)
367 // restore XMM8-XMM15 only in 64-bit mode
368 if (index
< 8 || Is64BitMode()) {
369 read_virtual_dqword_aligned(i
->seg(),
370 RMAddr(i
)+index
*16+160, (Bit8u
*) &(BX_CPU_THIS_PTR xmm
[index
]));
377 BX_INFO(("FXRSTOR: required P6 support, use --enable-cpu-level=6 option"));
382 /* *************************** */
383 /* SSE: MEMORY MOVE OPERATIONS */
384 /* *************************** */
386 /* All these opcodes never generate SIMD floating point exeptions */
389 /* MOVUPD: 66 0F 10 */
390 /* MOVDQU: F3 0F 6F */
391 void BX_CPU_C::MOVUPS_VpsWps(bxInstruction_c
*i
)
393 #if BX_SUPPORT_SSE >= 1
394 BX_CPU_THIS_PTR
prepareSSE();
396 BxPackedXmmRegister op
;
398 /* op is a register or memory reference */
400 op
= BX_READ_XMM_REG(i
->rm());
403 /* pointer, segment address pair */
404 read_virtual_dqword(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
407 /* now write result back to destination */
408 BX_WRITE_XMM_REG(i
->nnn(), op
);
410 BX_INFO(("MOVUPS_VpsWps: required SSE, use --enable-sse option"));
416 /* MOVUPD: 66 0F 11 */
417 /* MOVDQU: F3 0F 7F */
418 void BX_CPU_C::MOVUPS_WpsVps(bxInstruction_c
*i
)
420 #if BX_SUPPORT_SSE >= 1
421 BX_CPU_THIS_PTR
prepareSSE();
423 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->nnn());
425 /* op is a register or memory reference */
427 BX_WRITE_XMM_REG(i
->rm(), op
);
430 write_virtual_dqword(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
433 BX_INFO(("MOVUPS_WpsVps: required SSE, use --enable-sse option"));
439 /* MOVAPD: 66 0F 28 */
440 /* MOVDQA: F3 0F 6F */
441 void BX_CPU_C::MOVAPS_VpsWps(bxInstruction_c
*i
)
443 #if BX_SUPPORT_SSE >= 1
444 BX_CPU_THIS_PTR
prepareSSE();
446 BxPackedXmmRegister op
;
448 /* op is a register or memory reference */
450 op
= BX_READ_XMM_REG(i
->rm());
453 /* pointer, segment address pair */
454 read_virtual_dqword_aligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
457 /* now write result back to destination */
458 BX_WRITE_XMM_REG(i
->nnn(), op
);
460 BX_INFO(("MOVAPS_VpsWps: required SSE, use --enable-sse option"));
466 /* MOVAPD: 66 0F 29 */
467 /* MOVDQA: F3 0F 7F */
468 void BX_CPU_C::MOVAPS_WpsVps(bxInstruction_c
*i
)
470 #if BX_SUPPORT_SSE >= 1
471 BX_CPU_THIS_PTR
prepareSSE();
473 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->nnn());
475 /* op is a register or memory reference */
477 BX_WRITE_XMM_REG(i
->rm(), op
);
480 write_virtual_dqword_aligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
483 BX_INFO(("MOVAPS_WpsVps: required SSE, use --enable-sse option"));
489 void BX_CPU_C::MOVSS_VssWss(bxInstruction_c
*i
)
491 #if BX_SUPPORT_SSE >= 1
492 BX_CPU_THIS_PTR
prepareSSE();
494 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->nnn());
497 /* op2 is a register or memory reference */
500 /* If the source operand is an XMM register, the high-order
501 96 bits of the destination XMM register are not modified. */
502 op
.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i
->rm());
505 /* pointer, segment address pair */
506 read_virtual_dword(i
->seg(), RMAddr(i
), &val32
);
508 /* If the source operand is a memory location, the high-order
509 96 bits of the destination XMM register are cleared to 0s */
510 op
.xmm32u(0) = val32
;
515 /* now write result back to destination */
516 BX_WRITE_XMM_REG(i
->nnn(), op
);
518 BX_INFO(("MOVSS_VssWss: required SSE, use --enable-sse option"));
524 void BX_CPU_C::MOVSS_WssVss(bxInstruction_c
*i
)
526 #if BX_SUPPORT_SSE >= 1
527 BX_CPU_THIS_PTR
prepareSSE();
529 Bit32u val32
= BX_READ_XMM_REG_LO_DWORD(i
->nnn());
531 /* destination is a register or memory reference */
534 /* If the source operand is an XMM register, the high-order
535 96 bits of the destination XMM register are not modified. */
536 BX_WRITE_XMM_REG_LO_DWORD(i
->rm(), val32
);
539 /* pointer, segment address pair */
540 write_virtual_dword(i
->seg(), RMAddr(i
), &val32
);
543 BX_INFO(("MOVSS_WssVss: required SSE, use --enable-sse option"));
549 void BX_CPU_C::MOVSD_VsdWsd(bxInstruction_c
*i
)
551 #if BX_SUPPORT_SSE >= 2
552 BX_CPU_THIS_PTR
prepareSSE();
554 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->nnn());
557 /* op2 is a register or memory reference */
560 /* If the source operand is an XMM register, the high-order
561 64 bits of the destination XMM register are not modified. */
562 op
.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i
->rm());
565 /* pointer, segment address pair */
566 read_virtual_qword(i
->seg(), RMAddr(i
), &val64
);
568 /* If the source operand is a memory location, the high-order
569 64 bits of the destination XMM register are cleared to 0s */
570 op
.xmm64u(0) = val64
;
574 /* now write result back to destination */
575 BX_WRITE_XMM_REG(i
->nnn(), op
);
577 BX_INFO(("MOVSD_VsdWsd: required SSE2, use --enable-sse option"));
583 void BX_CPU_C::MOVSD_WsdVsd(bxInstruction_c
*i
)
585 #if BX_SUPPORT_SSE >= 2
586 BX_CPU_THIS_PTR
prepareSSE();
588 Bit64u val64
= BX_READ_XMM_REG_LO_QWORD(i
->nnn());
590 /* destination is a register or memory reference */
593 /* If the source operand is an XMM register, the high-order
594 64 bits of the destination XMM register are not modified. */
595 BX_WRITE_XMM_REG_LO_QWORD(i
->rm(), val64
);
598 /* pointer, segment address pair */
599 write_virtual_qword(i
->seg(), RMAddr(i
), &val64
);
602 BX_INFO(("MOVSD_WsdVsd: required SSE2, use --enable-sse option"));
608 /* MOVLPD: 66 0F 12 */
609 void BX_CPU_C::MOVLPS_VpsMq(bxInstruction_c
*i
)
611 #if BX_SUPPORT_SSE >= 1
612 BX_CPU_THIS_PTR
prepareSSE();
615 if (i
->modC0()) /* MOVHLPS xmm1, xmm2 opcode */
617 val64
= BX_READ_XMM_REG_HI_QWORD(i
->rm());
620 /* pointer, segment address pair */
621 read_virtual_qword(i
->seg(), RMAddr(i
), &val64
);
624 /* now write result back to destination */
625 BX_WRITE_XMM_REG_LO_QWORD(i
->nnn(), val64
);
627 BX_INFO(("MOVLPS_VpsMq: required SSE, use --enable-sse option"));
633 void BX_CPU_C::MOVDDUP_VpdWq(bxInstruction_c
*i
)
635 #if BX_SUPPORT_SSE >= 3
636 BX_CPU_THIS_PTR
prepareSSE();
638 BxPackedXmmRegister op
;
642 val64
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
645 /* pointer, segment address pair */
646 read_virtual_qword(i
->seg(), RMAddr(i
), &val64
);
649 op
.xmm64u(0) = val64
;
650 op
.xmm64u(1) = val64
;
652 /* now write result back to destination */
653 BX_WRITE_XMM_REG(i
->nnn(), op
);
655 BX_INFO(("MOVDDUP_VpdWq: required SSE3, use --enable-sse option"));
661 void BX_CPU_C::MOVSLDUP_VpsWps(bxInstruction_c
*i
)
663 #if BX_SUPPORT_SSE >= 3
664 BX_CPU_THIS_PTR
prepareSSE();
665 BxPackedXmmRegister op
, result
;
667 /* op is a register or memory reference */
669 op
= BX_READ_XMM_REG(i
->rm());
672 /* pointer, segment address pair */
673 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
676 result
.xmm32u(0) = op
.xmm32u(0);
677 result
.xmm32u(1) = op
.xmm32u(0);
678 result
.xmm32u(2) = op
.xmm32u(2);
679 result
.xmm32u(3) = op
.xmm32u(2);
681 /* now write result back to destination */
682 BX_WRITE_XMM_REG(i
->nnn(), result
);
684 BX_INFO(("MOVSLDUP_VpsWps: required SSE3, use --enable-sse option"));
690 void BX_CPU_C::MOVSHDUP_VpsWps(bxInstruction_c
*i
)
692 #if BX_SUPPORT_SSE >= 3
693 BX_CPU_THIS_PTR
prepareSSE();
694 BxPackedXmmRegister op
, result
;
696 /* op is a register or memory reference */
698 op
= BX_READ_XMM_REG(i
->rm());
701 /* pointer, segment address pair */
702 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
705 result
.xmm32u(0) = op
.xmm32u(1);
706 result
.xmm32u(1) = op
.xmm32u(1);
707 result
.xmm32u(2) = op
.xmm32u(3);
708 result
.xmm32u(3) = op
.xmm32u(3);
710 /* now write result back to destination */
711 BX_WRITE_XMM_REG(i
->nnn(), result
);
713 BX_INFO(("MOVHLDUP_VpsWps: required SSE3, use --enable-sse option"));
719 /* MOVLPD: 66 0F 13 */
720 void BX_CPU_C::MOVLPS_MqVps(bxInstruction_c
*i
)
722 #if BX_SUPPORT_SSE >= 1
723 BX_CPU_THIS_PTR
prepareSSE();
724 write_virtual_qword(i
->seg(), RMAddr(i
), &BX_XMM_REG_LO_QWORD(i
->nnn()));
726 BX_INFO(("MOVLPS_MqVps: required SSE, use --enable-sse option"));
732 /* MOVHPD: 66 0F 16 */
733 void BX_CPU_C::MOVHPS_VpsMq(bxInstruction_c
*i
)
735 #if BX_SUPPORT_SSE >= 1
736 BX_CPU_THIS_PTR
prepareSSE();
739 if (i
->modC0()) /* MOVLHPS xmm1, xmm2 opcode */
741 val64
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
744 /* pointer, segment address pair */
745 read_virtual_qword(i
->seg(), RMAddr(i
), &val64
);
748 /* now write result back to destination */
749 BX_WRITE_XMM_REG_HI_QWORD(i
->nnn(), val64
);
751 BX_INFO(("MOVHPS_VpsMq: required SSE, use --enable-sse option"));
757 /* MOVHPD: 66 0F 17 */
758 void BX_CPU_C::MOVHPS_MqVps(bxInstruction_c
*i
)
760 #if BX_SUPPORT_SSE >= 1
761 BX_CPU_THIS_PTR
prepareSSE();
762 write_virtual_qword(i
->seg(), RMAddr(i
), &BX_XMM_REG_HI_QWORD(i
->nnn()));
764 BX_INFO(("MOVHPS_MqVps: required SSE, use --enable-sse option"));
770 void BX_CPU_C::LDDQU_VdqMdq(bxInstruction_c
*i
)
772 #if BX_SUPPORT_SSE >= 3
773 BX_CPU_THIS_PTR
prepareSSE();
775 BxPackedXmmRegister op
;
776 read_virtual_dqword(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
778 /* now write result back to destination */
779 BX_WRITE_XMM_REG(i
->nnn(), op
);
782 BX_INFO(("LDDQU_VdqMdq: required SSE3, use --enable-sse option"));
788 void BX_CPU_C::MASKMOVDQU_VdqUdq(bxInstruction_c
*i
)
790 #if BX_SUPPORT_SSE >= 2
791 BX_CPU_THIS_PTR
prepareSSE();
794 BX_INFO(("MASKMOVDQU_VdqUdq: unexpected memory reference"));
799 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->nnn()),
800 mask
= BX_READ_XMM_REG(i
->rm());
802 #if BX_SUPPORT_X86_64
803 if (i
->as64L()) { /* 64 bit address mode */
811 else { /* 16 bit address mode */
815 /* partial write, no data will be written to memory if mask is all 0s */
816 for(unsigned j
=0; j
<16; j
++)
818 if(mask
.xmmubyte(j
) & 0x80)
819 write_virtual_byte(BX_SEG_REG_DS
, rdi
+j
, &op
.xmmubyte(j
));
823 BX_INFO(("MASKMOVDQU_VdqUdq: required SSE2, use --enable-sse option"));
829 void BX_CPU_C::MOVMSKPS_GdVRps(bxInstruction_c
*i
)
831 #if BX_SUPPORT_SSE >= 1
832 BX_CPU_THIS_PTR
prepareSSE();
834 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->nnn());
837 if(op
.xmm32u(0) & 0x80000000) val32
|= 0x1;
838 if(op
.xmm32u(1) & 0x80000000) val32
|= 0x2;
839 if(op
.xmm32u(2) & 0x80000000) val32
|= 0x4;
840 if(op
.xmm32u(3) & 0x80000000) val32
|= 0x8;
842 BX_WRITE_32BIT_REGZ(i
->rm(), val32
);
844 BX_INFO(("MOVMSKPS_GdVRps: required SSE, use --enable-sse option"));
850 void BX_CPU_C::MOVMSKPD_GdVRpd(bxInstruction_c
*i
)
852 #if BX_SUPPORT_SSE >= 2
853 BX_CPU_THIS_PTR
prepareSSE();
855 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->nnn());
858 if(op
.xmm32u(1) & 0x80000000) val32
|= 0x1;
859 if(op
.xmm32u(3) & 0x80000000) val32
|= 0x2;
861 BX_WRITE_32BIT_REGZ(i
->rm(), val32
);
863 BX_INFO(("MOVMSKPD_GdVRpd: required SSE2, use --enable-sse option"));
869 void BX_CPU_C::MOVD_VdqEd(bxInstruction_c
*i
)
871 #if BX_SUPPORT_SSE >= 2
872 BX_CPU_THIS_PTR
prepareSSE();
874 BxPackedXmmRegister op1
;
877 /* op2 is a register or memory reference */
879 op2
= BX_READ_32BIT_REG(i
->rm());
882 /* pointer, segment address pair */
883 read_virtual_dword(i
->seg(), RMAddr(i
), &op2
);
886 op1
.xmm64u(0) = (Bit64u
)(op2
);
889 /* now write result back to destination */
890 BX_WRITE_XMM_REG(i
->nnn(), op1
);
892 BX_INFO(("MOVD_VdqEd: required SSE2, use --enable-sse option"));
897 #if BX_SUPPORT_X86_64
900 void BX_CPU_C::MOVQ_VdqEq(bxInstruction_c
*i
)
902 #if BX_SUPPORT_SSE >= 2
903 BX_CPU_THIS_PTR
prepareSSE();
905 BxPackedXmmRegister op1
;
908 /* op2 is a register or memory reference */
910 op2
= BX_READ_64BIT_REG(i
->rm());
913 /* pointer, segment address pair */
914 read_virtual_qword(i
->seg(), RMAddr(i
), &op2
);
920 /* now write result back to destination */
921 BX_WRITE_XMM_REG(i
->nnn(), op1
);
923 BX_INFO(("MOVQ_VdqEq: required SSE2, use --enable-sse option"));
931 void BX_CPU_C::MOVD_EdVd(bxInstruction_c
*i
)
933 #if BX_SUPPORT_SSE >= 2
934 BX_CPU_THIS_PTR
prepareSSE();
936 Bit32u op2
= BX_READ_XMM_REG_LO_DWORD(i
->nnn());
938 /* destination is a register or memory reference */
940 BX_WRITE_32BIT_REGZ(i
->rm(), op2
);
943 /* pointer, segment address pair */
944 write_virtual_dword(i
->seg(), RMAddr(i
), &op2
);
947 BX_INFO(("MOVD_EdVd: required SSE2, use --enable-sse option"));
952 #if BX_SUPPORT_X86_64
955 void BX_CPU_C::MOVQ_EqVq(bxInstruction_c
*i
)
957 #if BX_SUPPORT_SSE >= 2
958 BX_CPU_THIS_PTR
prepareSSE();
960 Bit64u op2
= BX_READ_XMM_REG_LO_QWORD(i
->nnn());
962 /* destination is a register or memory reference */
964 BX_WRITE_64BIT_REG(i
->rm(), op2
);
967 /* pointer, segment address pair */
968 write_virtual_qword(i
->seg(), RMAddr(i
), &op2
);
971 BX_INFO(("MOVQ_EqVq: required SSE2, use --enable-sse option"));
979 void BX_CPU_C::MOVQ_VqWq(bxInstruction_c
*i
)
981 #if BX_SUPPORT_SSE >= 2
982 BX_CPU_THIS_PTR
prepareSSE();
984 BxPackedXmmRegister op
;
988 op
.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i
->rm());
991 /* pointer, segment address pair */
992 read_virtual_qword(i
->seg(), RMAddr(i
), &val64
);
993 op
.xmm64u(0) = val64
;
996 /* zero-extension to 128 bit */
999 /* now write result back to destination */
1000 BX_WRITE_XMM_REG(i
->nnn(), op
);
1002 BX_INFO(("MOVQ_VqWq: required SSE2, use --enable-sse option"));
1008 void BX_CPU_C::MOVQ_WqVq(bxInstruction_c
*i
)
1010 #if BX_SUPPORT_SSE >= 2
1011 BX_CPU_THIS_PTR
prepareSSE();
1013 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->nnn());
1017 op
.xmm64u(1) = 0; /* zero-extension to 128 bits */
1018 BX_WRITE_XMM_REG(i
->rm(), op
);
1021 write_virtual_qword(i
->seg(), RMAddr(i
), &(op
.xmm64u(0)));
1024 BX_INFO(("MOVQ_WqVq: required SSE2, use --enable-sse option"));
1030 void BX_CPU_C::MOVDQ2Q_PqVRq(bxInstruction_c
*i
)
1032 #if BX_SUPPORT_SSE >= 2
1033 BX_CPU_THIS_PTR
prepareSSE();
1034 BX_CPU_THIS_PTR
prepareFPU2MMX();
1036 BxPackedMmxRegister mm
;
1037 MMXUQ(mm
) = BX_READ_XMM_REG_LO_QWORD(i
->nnn());
1039 BX_WRITE_MMX_REG(i
->rm(), mm
);
1041 BX_INFO(("MOVDQ2Q_PqVRq: required SSE2, use --enable-sse option"));
1047 void BX_CPU_C::MOVQ2DQ_VdqQq(bxInstruction_c
*i
)
1049 #if BX_SUPPORT_SSE >= 2
1050 BX_CPU_THIS_PTR
prepareSSE();
1051 BX_CPU_THIS_PTR
prepareFPU2MMX();
1053 BxPackedXmmRegister op
;
1054 BxPackedMmxRegister mm
= BX_READ_MMX_REG(i
->nnn());
1056 op
.xmm64u(0) = MMXUQ(mm
);
1059 BX_WRITE_XMM_REG(i
->rm(), op
);
1061 BX_INFO(("MOVQ2DQ_VdqQq: required SSE2, use --enable-sse option"));
1067 void BX_CPU_C::PMOVMSKB_GdUdq(bxInstruction_c
*i
)
1069 #if BX_SUPPORT_SSE >= 2
1070 BX_CPU_THIS_PTR
prepareSSE();
1072 BxPackedXmmRegister op
= BX_READ_XMM_REG(i
->rm());
1075 if(op
.xmmubyte(0x0) & 0x80) result
|= 0x0001;
1076 if(op
.xmmubyte(0x1) & 0x80) result
|= 0x0002;
1077 if(op
.xmmubyte(0x2) & 0x80) result
|= 0x0004;
1078 if(op
.xmmubyte(0x3) & 0x80) result
|= 0x0008;
1079 if(op
.xmmubyte(0x4) & 0x80) result
|= 0x0010;
1080 if(op
.xmmubyte(0x5) & 0x80) result
|= 0x0020;
1081 if(op
.xmmubyte(0x6) & 0x80) result
|= 0x0040;
1082 if(op
.xmmubyte(0x7) & 0x80) result
|= 0x0080;
1083 if(op
.xmmubyte(0x8) & 0x80) result
|= 0x0100;
1084 if(op
.xmmubyte(0x9) & 0x80) result
|= 0x0200;
1085 if(op
.xmmubyte(0xA) & 0x80) result
|= 0x0400;
1086 if(op
.xmmubyte(0xB) & 0x80) result
|= 0x0800;
1087 if(op
.xmmubyte(0xC) & 0x80) result
|= 0x1000;
1088 if(op
.xmmubyte(0xD) & 0x80) result
|= 0x2000;
1089 if(op
.xmmubyte(0xE) & 0x80) result
|= 0x4000;
1090 if(op
.xmmubyte(0xF) & 0x80) result
|= 0x8000;
1092 /* now write result back to destination */
1093 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
1096 BX_INFO(("PMOVMSKB_GdUdq: required SSE2, use --enable-sse option"));
1101 /* **************************** */
1102 /* SSE: STORE DATA NON-TEMPORAL */
1103 /* **************************** */
1106 void BX_CPU_C::MOVNTI_MdGd(bxInstruction_c
*i
)
1108 #if BX_SUPPORT_SSE >= 2
1109 Bit32u val32
= BX_READ_32BIT_REG(i
->nnn());
1110 write_virtual_dword(i
->seg(), RMAddr(i
), &val32
);
1112 BX_INFO(("MOVNTI_MdGd: required SSE2, use --enable-sse option"));
1117 #if BX_SUPPORT_X86_64
1120 void BX_CPU_C::MOVNTI_MqGq(bxInstruction_c
*i
)
1122 #if BX_SUPPORT_SSE >= 2
1123 Bit64u val64
= BX_READ_64BIT_REG(i
->nnn());
1124 write_virtual_qword(i
->seg(), RMAddr(i
), &val64
);
1126 BX_INFO(("MOVNTI_MqGq: required SSE2, use --enable-sse option"));
1133 /* MOVNTPS: 0F 2B */
1134 /* MOVNTPD: 66 0F 2B */
1135 /* MOVNTDQ: 66 0F E7 */
1136 void BX_CPU_C::MOVNTPS_MpsVps(bxInstruction_c
*i
)
1138 #if BX_SUPPORT_SSE >= 1
1139 BX_CPU_THIS_PTR
prepareSSE();
1140 write_virtual_dqword_aligned(i
->seg(), RMAddr(i
), (Bit8u
*)(&BX_READ_XMM_REG(i
->nnn())));
1142 BX_INFO(("MOVNTPS_MpsVps: required SSE, use --enable-sse option"));
1147 /* ************************** */
1148 /* 3-BYTE-OPCODE INSTRUCTIONS */
1149 /* ************************** */
1151 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
1154 void BX_CPU_C::PMOVSXBW_VdqWq(bxInstruction_c
*i
)
1156 #if BX_SUPPORT_SSE >= 4
1157 BX_CPU_THIS_PTR
prepareSSE();
1158 BxPackedXmmRegister result
;
1163 val64
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
1166 /* pointer, segment address pair */
1167 read_virtual_qword(i
->seg(), RMAddr(i
), &val64
);
1170 result
.xmm16u(0) = (Bit8s
) (val64
& 0xFF);
1171 result
.xmm16u(1) = (Bit8s
) ((val64
>> 8) & 0xFF);
1172 result
.xmm16u(2) = (Bit8s
) ((val64
>> 16) & 0xFF);
1173 result
.xmm16u(3) = (Bit8s
) ((val64
>> 24) & 0xFF);
1174 result
.xmm16u(4) = (Bit8s
) ((val64
>> 32) & 0xFF);
1175 result
.xmm16u(5) = (Bit8s
) ((val64
>> 40) & 0xFF);
1176 result
.xmm16u(6) = (Bit8s
) ((val64
>> 48) & 0xFF);
1177 result
.xmm16u(7) = (Bit8s
) (val64
>> 56);
1179 /* now write result back to destination */
1180 BX_WRITE_XMM_REG(i
->nnn(), result
);
1182 BX_INFO(("PMOVSXBW_VdqWq: required SSE4, use --enable-sse option"));
1188 void BX_CPU_C::PMOVSXBD_VdqWd(bxInstruction_c
*i
)
1190 #if BX_SUPPORT_SSE >= 4
1191 BX_CPU_THIS_PTR
prepareSSE();
1192 BxPackedXmmRegister result
;
1197 val32
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
1200 /* pointer, segment address pair */
1201 read_virtual_dword(i
->seg(), RMAddr(i
), &val32
);
1204 result
.xmm32u(0) = (Bit8s
) (val32
& 0xFF);
1205 result
.xmm32u(1) = (Bit8s
) ((val32
>> 8) & 0xFF);
1206 result
.xmm32u(2) = (Bit8s
) ((val32
>> 16) & 0xFF);
1207 result
.xmm32u(3) = (Bit8s
) (val32
>> 24);
1209 /* now write result back to destination */
1210 BX_WRITE_XMM_REG(i
->nnn(), result
);
1212 BX_INFO(("PMOVSXBD_VdqWd: required SSE4, use --enable-sse option"));
1218 void BX_CPU_C::PMOVSXBQ_VdqWw(bxInstruction_c
*i
)
1220 #if BX_SUPPORT_SSE >= 4
1221 BX_CPU_THIS_PTR
prepareSSE();
1222 BxPackedXmmRegister result
;
1227 val16
= BX_READ_XMM_REG_LO_WORD(i
->rm());
1230 /* pointer, segment address pair */
1231 read_virtual_word(i
->seg(), RMAddr(i
), &val16
);
1234 result
.xmm64u(0) = (Bit8s
) (val16
& 0xFF);
1235 result
.xmm64u(1) = (Bit8s
) (val16
>> 8);
1237 /* now write result back to destination */
1238 BX_WRITE_XMM_REG(i
->nnn(), result
);
1240 BX_INFO(("PMOVSXBQ_VdqWw: required SSE4, use --enable-sse option"));
1246 void BX_CPU_C::PMOVSXWD_VdqWq(bxInstruction_c
*i
)
1248 #if BX_SUPPORT_SSE >= 4
1249 BX_CPU_THIS_PTR
prepareSSE();
1250 BxPackedXmmRegister result
;
1255 val64
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
1258 /* pointer, segment address pair */
1259 read_virtual_qword(i
->seg(), RMAddr(i
), &val64
);
1262 result
.xmm32u(0) = (Bit16s
) (val64
& 0xFFFF);
1263 result
.xmm32u(1) = (Bit16s
) ((val64
>> 16) & 0xFFFF);
1264 result
.xmm32u(2) = (Bit16s
) ((val64
>> 32) & 0xFFFF);
1265 result
.xmm32u(3) = (Bit16s
) (val64
>> 48);
1267 /* now write result back to destination */
1268 BX_WRITE_XMM_REG(i
->nnn(), result
);
1270 BX_INFO(("PMOVSXWD_VdqWq: required SSE4, use --enable-sse option"));
1276 void BX_CPU_C::PMOVSXWQ_VdqWd(bxInstruction_c
*i
)
1278 #if BX_SUPPORT_SSE >= 4
1279 BX_CPU_THIS_PTR
prepareSSE();
1280 BxPackedXmmRegister result
;
1285 val32
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
1288 /* pointer, segment address pair */
1289 read_virtual_dword(i
->seg(), RMAddr(i
), &val32
);
1292 result
.xmm64u(0) = (Bit16s
) (val32
& 0xFFFF);
1293 result
.xmm64u(1) = (Bit16s
) (val32
>> 16);
1295 /* now write result back to destination */
1296 BX_WRITE_XMM_REG(i
->nnn(), result
);
1298 BX_INFO(("PMOVSXWQ_VdqWd: required SSE4, use --enable-sse option"));
1304 void BX_CPU_C::PMOVSXDQ_VdqWq(bxInstruction_c
*i
)
1306 #if BX_SUPPORT_SSE >= 4
1307 BX_CPU_THIS_PTR
prepareSSE();
1308 BxPackedXmmRegister result
;
1313 val64
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
1316 /* pointer, segment address pair */
1317 read_virtual_qword(i
->seg(), RMAddr(i
), &val64
);
1320 result
.xmm64u(0) = (Bit32s
) (val64
& 0xFFFFFFFF);
1321 result
.xmm64u(1) = (Bit32s
) (val64
>> 32);
1323 /* now write result back to destination */
1324 BX_WRITE_XMM_REG(i
->nnn(), result
);
1326 BX_INFO(("PMOVSXDQ_VdqWq: required SSE4, use --enable-sse option"));
1332 void BX_CPU_C::MOVNTDQA_VdqMdq(bxInstruction_c
*i
)
1334 #if BX_SUPPORT_SSE >= 4
1335 /* source must be memory reference */
1337 BX_INFO(("MOVNTDQA_VdqMdq: must be memory reference"));
1341 BX_CPU_THIS_PTR
prepareSSE();
1343 BxPackedXmmRegister op
;
1345 read_virtual_dqword_aligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op
);
1347 /* now write result back to destination */
1348 BX_WRITE_XMM_REG(i
->nnn(), op
);
1351 BX_INFO(("MOVNTDQA_VdqMdq: required SSE4, use --enable-sse option"));
1357 void BX_CPU_C::PMOVZXBW_VdqWq(bxInstruction_c
*i
)
1359 #if BX_SUPPORT_SSE >= 4
1360 BX_CPU_THIS_PTR
prepareSSE();
1361 BxPackedXmmRegister result
;
1366 val64
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
1369 /* pointer, segment address pair */
1370 read_virtual_qword(i
->seg(), RMAddr(i
), &val64
);
1373 result
.xmm16u(0) = val64
& 0xFF;
1374 result
.xmm16u(1) = (val64
>> 8) & 0xFF;
1375 result
.xmm16u(2) = (val64
>> 16) & 0xFF;
1376 result
.xmm16u(3) = (val64
>> 24) & 0xFF;
1377 result
.xmm16u(4) = (val64
>> 32) & 0xFF;
1378 result
.xmm16u(5) = (val64
>> 40) & 0xFF;
1379 result
.xmm16u(6) = (val64
>> 48) & 0xFF;
1380 result
.xmm16u(7) = val64
>> 56;
1382 /* now write result back to destination */
1383 BX_WRITE_XMM_REG(i
->nnn(), result
);
1385 BX_INFO(("PMOVZXBW_VdqWq: required SSE4, use --enable-sse option"));
1391 void BX_CPU_C::PMOVZXBD_VdqWd(bxInstruction_c
*i
)
1393 #if BX_SUPPORT_SSE >= 4
1394 BX_CPU_THIS_PTR
prepareSSE();
1395 BxPackedXmmRegister result
;
1400 val32
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
1403 /* pointer, segment address pair */
1404 read_virtual_dword(i
->seg(), RMAddr(i
), &val32
);
1407 result
.xmm32u(0) = val32
& 0xFF;
1408 result
.xmm32u(1) = (val32
>> 8) & 0xFF;
1409 result
.xmm32u(2) = (val32
>> 16) & 0xFF;
1410 result
.xmm32u(3) = val32
>> 24;
1412 /* now write result back to destination */
1413 BX_WRITE_XMM_REG(i
->nnn(), result
);
1415 BX_INFO(("PMOVZXBD_VdqWd: required SSE4, use --enable-sse option"));
1421 void BX_CPU_C::PMOVZXBQ_VdqWw(bxInstruction_c
*i
)
1423 #if BX_SUPPORT_SSE >= 4
1424 BX_CPU_THIS_PTR
prepareSSE();
1425 BxPackedXmmRegister result
;
1430 val16
= BX_READ_XMM_REG_LO_WORD(i
->rm());
1433 /* pointer, segment address pair */
1434 read_virtual_word(i
->seg(), RMAddr(i
), &val16
);
1437 result
.xmm64u(0) = val16
& 0xFF;
1438 result
.xmm64u(1) = val16
>> 8;
1440 /* now write result back to destination */
1441 BX_WRITE_XMM_REG(i
->nnn(), result
);
1443 BX_INFO(("PMOVZXBQ_VdqWw: required SSE4, use --enable-sse option"));
1449 void BX_CPU_C::PMOVZXWD_VdqWq(bxInstruction_c
*i
)
1451 #if BX_SUPPORT_SSE >= 4
1452 BX_CPU_THIS_PTR
prepareSSE();
1453 BxPackedXmmRegister result
;
1458 val64
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
1461 /* pointer, segment address pair */
1462 read_virtual_qword(i
->seg(), RMAddr(i
), &val64
);
1465 result
.xmm32u(0) = val64
& 0xFFFF;
1466 result
.xmm32u(1) = (val64
>> 16) & 0xFFFF;
1467 result
.xmm32u(2) = (val64
>> 32) & 0xFFFF;
1468 result
.xmm32u(3) = val64
>> 48;
1470 /* now write result back to destination */
1471 BX_WRITE_XMM_REG(i
->nnn(), result
);
1473 BX_INFO(("PMOVZXWD_VdqWq: required SSE4, use --enable-sse option"));
1479 void BX_CPU_C::PMOVZXWQ_VdqWd(bxInstruction_c
*i
)
1481 #if BX_SUPPORT_SSE >= 4
1482 BX_CPU_THIS_PTR
prepareSSE();
1483 BxPackedXmmRegister result
;
1488 val32
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
1491 /* pointer, segment address pair */
1492 read_virtual_dword(i
->seg(), RMAddr(i
), &val32
);
1495 result
.xmm64u(0) = val32
& 0xFFFF;
1496 result
.xmm64u(1) = val32
>> 16;
1498 /* now write result back to destination */
1499 BX_WRITE_XMM_REG(i
->nnn(), result
);
1501 BX_INFO(("PMOVZXWQ_VdqWd: required SSE4, use --enable-sse option"));
1507 void BX_CPU_C::PMOVZXDQ_VdqWq(bxInstruction_c
*i
)
1509 #if BX_SUPPORT_SSE >= 4
1510 BX_CPU_THIS_PTR
prepareSSE();
1511 BxPackedXmmRegister result
;
1516 val64
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
1519 /* pointer, segment address pair */
1520 read_virtual_qword(i
->seg(), RMAddr(i
), &val64
);
1523 result
.xmm64u(0) = val64
& 0xFFFFFFFF;
1524 result
.xmm64u(1) = val64
>> 32;
1526 /* now write result back to destination */
1527 BX_WRITE_XMM_REG(i
->nnn(), result
);
1529 BX_INFO(("PMOVZXDQ_VdqWq: required SSE4, use --enable-sse option"));
1535 void BX_CPU_C::PALIGNR_VdqWdqIb(bxInstruction_c
*i
)
1537 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
1538 BX_CPU_THIS_PTR
prepareSSE();
1540 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
1542 /* op2 is a register or memory reference */
1544 op2
= BX_READ_XMM_REG(i
->rm());
1547 /* pointer, segment address pair */
1548 readVirtualDQwordAligned(i
->seg(), RMAddr(i
), (Bit8u
*) &op2
);
1551 unsigned shift
= i
->Ib() * 8;
1554 result
.xmm64u(0) = op2
.xmm64u(0);
1555 result
.xmm64u(1) = op2
.xmm64u(1);
1557 else if(shift
< 64) {
1558 result
.xmm64u(0) = (op2
.xmm64u(0) >> shift
) | (op2
.xmm64u(1) << (64-shift
));
1559 result
.xmm64u(1) = (op2
.xmm64u(1) >> shift
) | (op1
.xmm64u(0) << (64-shift
));
1561 else if(shift
== 64) {
1562 result
.xmm64u(0) = op2
.xmm64u(1);
1563 result
.xmm64u(1) = op1
.xmm64u(0);
1565 else if(shift
< 128) {
1567 result
.xmm64u(0) = (op2
.xmm64u(1) >> shift
) | (op1
.xmm64u(0) << (64-shift
));
1568 result
.xmm64u(1) = (op1
.xmm64u(0) >> shift
) | (op1
.xmm64u(1) << (64-shift
));
1570 else if(shift
== 128) {
1571 result
.xmm64u(0) = op1
.xmm64u(0);
1572 result
.xmm64u(1) = op1
.xmm64u(1);
1574 else if(shift
< 192) {
1576 result
.xmm64u(0) = (op1
.xmm64u(0) >> shift
) | (op1
.xmm64u(1) << (64-shift
));
1577 result
.xmm64u(1) = (op1
.xmm64u(1) >> shift
);
1579 else if(shift
< 256) {
1580 result
.xmm64u(0) = op1
.xmm64u(1) >> (shift
- 192);
1581 result
.xmm64u(1) = 0;
1584 result
.xmm64u(0) = 0;
1585 result
.xmm64u(1) = 0;
1588 /* now write result back to destination */
1589 BX_WRITE_XMM_REG(i
->nnn(), result
);
1591 BX_INFO(("PALIGNR_VdqWdqIb: required SSE3E, use --enable-sse and --enable-sse-extension options"));
1596 #endif // BX_SUPPORT_SSE >= 4 || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)