1 /////////////////////////////////////////////////////////////////////////
2 // $Id: sse_pfp.cc,v 1.51 2008/10/08 10:51:38 sshwarts Exp $
3 /////////////////////////////////////////////////////////////////////////
5 // Copyright (c) 2003 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 /////////////////////////////////////////////////////////////////////////
24 #define NEED_CPU_REG_SHORTCUTS 1
27 #define LOG_THIS BX_CPU_THIS_PTR
31 #include "fpu/softfloat-specialize.h"
33 void BX_CPU_C::check_exceptionsSSE(int exceptions_flags
)
35 int unmasked
= ~(MXCSR
.get_exceptions_masks()) & exceptions_flags
;
36 MXCSR
.set_exceptions(exceptions_flags
);
40 if(BX_CPU_THIS_PTR cr4
.get_OSXMMEXCPT())
41 exception(BX_XM_EXCEPTION
, 0, 0);
43 exception(BX_UD_EXCEPTION
, 0, 0);
47 BX_CPP_INLINE
void mxcsr_to_softfloat_status_word(float_status_t
&status
, bx_mxcsr_t mxcsr
)
49 status
.float_exception_flags
= 0; // clear exceptions before execution
50 status
.float_nan_handling_mode
= float_first_operand_nan
;
51 status
.float_rounding_mode
= mxcsr
.get_rounding_mode();
52 // if underflow is masked and FUZ is 1, set it to 1, else to 0
53 status
.flush_underflow_to_zero
=
54 (mxcsr
.get_flush_masked_underflow() && mxcsr
.get_UM()) ? 1 : 0;
57 /* Comparison predicate for CMPSS/CMPPS instructions */
58 static float32_compare_method compare32
[4] = {
65 #if BX_SUPPORT_SSE >= 2
66 /* Comparison predicate for CMPSD/CMPPD instructions */
67 static float64_compare_method compare64
[4] = {
75 #endif // BX_SUPPORT_SSE
79 * Convert two 32bit signed integers from MMX/MEM to two single precision FP
80 * When a conversion is inexact, the value returned is rounded according
81 * to rounding control bits in MXCSR register.
82 * Possible floating point exceptions: #P
84 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPI2PS_VpsQq(bxInstruction_c
*i
)
86 #if BX_SUPPORT_SSE >= 1
87 BX_CPU_THIS_PTR
prepareSSE();
89 /* check floating point status word for a pending FPU exceptions */
90 FPU_check_pending_exceptions();
92 BxPackedMmxRegister op
;
93 BxPackedXmmRegister result
;
95 /* op is a register or memory reference */
97 op
= BX_READ_MMX_REG(i
->rm());
100 // do not cause transition to MMX state if no MMX register touched
101 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
102 /* pointer, segment address pair */
103 MMXUQ(op
) = read_virtual_qword(i
->seg(), eaddr
);
106 BX_CPU_THIS_PTR
prepareFPU2MMX(); /* FPU2MMX state transition */
108 float_status_t status_word
;
109 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
111 result
.xmm32u(0) = int32_to_float32(MMXUD0(op
), status_word
);
112 result
.xmm32u(1) = int32_to_float32(MMXUD1(op
), status_word
);
114 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
115 BX_WRITE_XMM_REG_LO_QWORD(i
->nnn(), result
.xmm64u(0));
117 BX_INFO(("CVTPI2PS_VpsQq: required SSE, use --enable-sse option"));
118 exception(BX_UD_EXCEPTION
, 0, 0);
124 * Convert two 32bit signed integers from MMX/MEM to two double precision FP
125 * Possible floating point exceptions: -
127 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPI2PD_VpdQq(bxInstruction_c
*i
)
129 #if BX_SUPPORT_SSE >= 2
130 BX_CPU_THIS_PTR
prepareSSE();
132 /* check floating point status word for a pending FPU exceptions */
133 FPU_check_pending_exceptions();
135 BxPackedMmxRegister op
;
136 BxPackedXmmRegister result
;
138 /* op is a register or memory reference */
140 op
= BX_READ_MMX_REG(i
->rm());
143 // do not cause transition to MMX state if no MMX register touched
144 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
145 /* pointer, segment address pair */
146 MMXUQ(op
) = read_virtual_qword(i
->seg(), eaddr
);
149 BX_CPU_THIS_PTR
prepareFPU2MMX(); /* FPU2MMX state transition */
151 result
.xmm64u(0) = int32_to_float64(MMXUD0(op
));
152 result
.xmm64u(1) = int32_to_float64(MMXUD1(op
));
154 BX_WRITE_XMM_REG(i
->nnn(), result
);
156 BX_INFO(("CVTPI2PD_VpdQd: required SSE2, use --enable-sse option"));
157 exception(BX_UD_EXCEPTION
, 0, 0);
163 * Convert one 32bit signed integer to one double precision FP
164 * Possible floating point exceptions: -
166 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSI2SD_VsdEd(bxInstruction_c
*i
)
168 #if BX_SUPPORT_SSE >= 2
169 BX_CPU_THIS_PTR
prepareSSE();
171 float_status_t status_word
;
172 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
175 #if BX_SUPPORT_X86_64
176 if (i
->os64L()) /* 64 bit operand size mode */
180 /* op is a register or memory reference */
182 op
= BX_READ_64BIT_REG(i
->rm());
185 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
186 /* pointer, segment address pair */
187 op
= read_virtual_qword_64(i
->seg(), eaddr
);
190 result
= int64_to_float64(op
, status_word
);
197 /* op is a register or memory reference */
199 op
= BX_READ_32BIT_REG(i
->rm());
202 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
203 /* pointer, segment address pair */
204 op
= read_virtual_dword(i
->seg(), eaddr
);
207 result
= int32_to_float64(op
);
210 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
211 BX_WRITE_XMM_REG_LO_QWORD(i
->nnn(), result
);
213 BX_INFO(("CVTSI2SD_VsdEd: required SSE2, use --enable-sse option"));
214 exception(BX_UD_EXCEPTION
, 0, 0);
220 * Convert one 32bit signed integer to one single precision FP
221 * When a conversion is inexact, the value returned is rounded according
222 * to rounding control bits in MXCSR register.
223 * Possible floating point exceptions: #P
225 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSI2SS_VssEd(bxInstruction_c
*i
)
227 #if BX_SUPPORT_SSE >= 1
228 BX_CPU_THIS_PTR
prepareSSE();
230 float_status_t status_word
;
231 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
234 #if BX_SUPPORT_X86_64
235 if (i
->os64L()) /* 64 bit operand size mode */
239 /* op is a register or memory reference */
241 op
= BX_READ_64BIT_REG(i
->rm());
244 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
245 /* pointer, segment address pair */
246 op
= read_virtual_qword_64(i
->seg(), eaddr
);
249 result
= int64_to_float32(op
, status_word
);
256 /* op is a register or memory reference */
258 op
= BX_READ_32BIT_REG(i
->rm());
261 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
262 /* pointer, segment address pair */
263 op
= read_virtual_dword(i
->seg(), eaddr
);
266 result
= int32_to_float32(op
, status_word
);
269 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
270 BX_WRITE_XMM_REG_LO_DWORD(i
->nnn(), result
);
272 BX_INFO(("CVTSI2SS_VssEd: required SSE, use --enable-sse option"));
273 exception(BX_UD_EXCEPTION
, 0, 0);
279 * Convert two single precision FP numbers to two signed doubleword integers
280 * in MMX using truncation if the conversion is inexact
281 * Possible floating point exceptions: #I, #P
283 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPS2PI_PqWps(bxInstruction_c
*i
)
285 #if BX_SUPPORT_SSE >= 1
286 BX_CPU_THIS_PTR
prepareSSE();
288 /* check floating point status word for a pending FPU exceptions */
289 FPU_check_pending_exceptions();
292 BxPackedMmxRegister result
;
294 /* op is a register or memory reference */
296 op
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
299 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
300 /* pointer, segment address pair */
301 op
= read_virtual_qword(i
->seg(), eaddr
);
304 BX_CPU_THIS_PTR
prepareFPU2MMX(); /* FPU2MMX state transition */
306 float_status_t status_word
;
307 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
309 float32 r0
= (float32
)(op
& 0xFFFFFFFF);
310 float32 r1
= (float32
)(op
>> 32);
312 if (MXCSR
.get_DAZ()) {
313 r0
= float32_denormal_to_zero(r0
);
314 r1
= float32_denormal_to_zero(r1
);
317 MMXUD0(result
) = float32_to_int32_round_to_zero(r0
, status_word
);
318 MMXUD1(result
) = float32_to_int32_round_to_zero(r1
, status_word
);
320 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
321 BX_WRITE_MMX_REG(i
->nnn(), result
);
323 BX_INFO(("CVTTPS2PI_PqWps: required SSE, use --enable-sse option"));
324 exception(BX_UD_EXCEPTION
, 0, 0);
330 * Convert two double precision FP numbers to two signed doubleword integers
331 * in MMX using truncation if the conversion is inexact
332 * Possible floating point exceptions: #I, #P
334 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPD2PI_PqWpd(bxInstruction_c
*i
)
336 #if BX_SUPPORT_SSE >= 2
337 BX_CPU_THIS_PTR
prepareSSE();
339 /* check floating point status word for a pending FPU exceptions */
340 FPU_check_pending_exceptions();
342 BxPackedXmmRegister op
;
343 BxPackedMmxRegister result
;
345 /* op is a register or memory reference */
347 op
= BX_READ_XMM_REG(i
->rm());
350 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
351 /* pointer, segment address pair */
352 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
355 BX_CPU_THIS_PTR
prepareFPU2MMX(); /* FPU2MMX state transition */
357 float_status_t status_word
;
358 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
360 if (MXCSR
.get_DAZ()) {
361 op
.xmm64u(0) = float64_denormal_to_zero(op
.xmm64u(0));
362 op
.xmm64u(1) = float64_denormal_to_zero(op
.xmm64u(1));
365 MMXUD0(result
) = float64_to_int32_round_to_zero(op
.xmm64u(0), status_word
);
366 MMXUD1(result
) = float64_to_int32_round_to_zero(op
.xmm64u(1), status_word
);
368 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
369 BX_WRITE_MMX_REG(i
->nnn(), result
);
371 BX_INFO(("CVTTPD2PI_PqWpd: required SSE2, use --enable-sse option"));
372 exception(BX_UD_EXCEPTION
, 0, 0);
378 * Convert one double precision FP number to doubleword integer using
379 * truncation if the conversion is inexact
380 * Possible floating point exceptions: #I, #P
382 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSD2SI_GdWsd(bxInstruction_c
*i
)
384 #if BX_SUPPORT_SSE >= 2
385 BX_CPU_THIS_PTR
prepareSSE();
389 /* op is a register or memory reference */
391 op
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
394 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
395 /* pointer, segment address pair */
396 op
= read_virtual_qword(i
->seg(), eaddr
);
399 float_status_t status_word
;
400 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
402 if (MXCSR
.get_DAZ()) op
= float64_denormal_to_zero(op
);
404 #if BX_SUPPORT_X86_64
405 if (i
->os64L()) /* 64 bit operand size mode */
407 Bit64u result
= float64_to_int64_round_to_zero(op
, status_word
);
408 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
409 BX_WRITE_64BIT_REG(i
->nnn(), result
);
414 Bit32u result
= float64_to_int32_round_to_zero(op
, status_word
);
415 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
416 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
420 BX_INFO(("CVTTSD2SI_GdWsd: required SSE2, use --enable-sse option"));
421 exception(BX_UD_EXCEPTION
, 0, 0);
427 * Convert one single precision FP number to doubleword integer using
428 * truncation if the conversion is inexact
429 * Possible floating point exceptions: #I, #P
431 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSS2SI_GdWss(bxInstruction_c
*i
)
433 #if BX_SUPPORT_SSE >= 1
434 BX_CPU_THIS_PTR
prepareSSE();
438 /* op is a register or memory reference */
440 op
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
443 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
444 /* pointer, segment address pair */
445 op
= read_virtual_dword(i
->seg(), eaddr
);
448 float_status_t status_word
;
449 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
451 if (MXCSR
.get_DAZ()) op
= float32_denormal_to_zero(op
);
453 #if BX_SUPPORT_X86_64
454 if (i
->os64L()) /* 64 bit operand size mode */
456 Bit64u result
= float32_to_int64_round_to_zero(op
, status_word
);
457 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
458 BX_WRITE_64BIT_REG(i
->nnn(), result
);
463 Bit32u result
= float32_to_int32_round_to_zero(op
, status_word
);
464 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
465 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
469 BX_INFO(("CVTTSS2SI_GdWss: required SSE, use --enable-sse option"));
470 exception(BX_UD_EXCEPTION
, 0, 0);
476 * Convert two single precision FP numbers to two signed doubleword integers
477 * in MMX register. When a conversion is inexact, the value returned is
478 * rounded according to rounding control bits in MXCSR register.
479 * Possible floating point exceptions: #I, #P
481 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2PI_PqWps(bxInstruction_c
*i
)
483 #if BX_SUPPORT_SSE >= 1
484 BX_CPU_THIS_PTR
prepareSSE();
486 /* check floating point status word for a pending FPU exceptions */
487 FPU_check_pending_exceptions();
490 BxPackedMmxRegister result
;
492 /* op is a register or memory reference */
494 op
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
497 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
498 /* pointer, segment address pair */
499 op
= read_virtual_qword(i
->seg(), eaddr
);
502 BX_CPU_THIS_PTR
prepareFPU2MMX(); /* FPU2MMX state transition */
504 float_status_t status_word
;
505 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
507 float32 r0
= (float32
)(op
& 0xFFFFFFFF);
508 float32 r1
= (float32
)(op
>> 32);
510 if (MXCSR
.get_DAZ()) {
511 r0
= float32_denormal_to_zero(r0
);
512 r1
= float32_denormal_to_zero(r1
);
515 MMXUD0(result
) = float32_to_int32(r0
, status_word
);
516 MMXUD1(result
) = float32_to_int32(r1
, status_word
);
518 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
519 BX_WRITE_MMX_REG(i
->nnn(), result
);
521 BX_INFO(("CVTPS2PI_PqWps: required SSE, use --enable-sse option"));
522 exception(BX_UD_EXCEPTION
, 0, 0);
528 * Convert two double precision FP numbers to two signed doubleword integers
529 * in MMX register. When a conversion is inexact, the value returned is
530 * rounded according to rounding control bits in MXCSR register.
531 * Possible floating point exceptions: #I, #P
533 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2PI_PqWpd(bxInstruction_c
*i
)
535 #if BX_SUPPORT_SSE >= 2
536 BX_CPU_THIS_PTR
prepareSSE();
538 /* check floating point status word for a pending FPU exceptions */
539 FPU_check_pending_exceptions();
541 BxPackedXmmRegister op
;
542 BxPackedMmxRegister result
;
544 /* op is a register or memory reference */
546 op
= BX_READ_XMM_REG(i
->rm());
549 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
550 /* pointer, segment address pair */
551 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
554 BX_CPU_THIS_PTR
prepareFPU2MMX(); /* FPU2MMX state transition */
556 float_status_t status_word
;
557 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
559 if (MXCSR
.get_DAZ()) {
560 op
.xmm64u(0) = float64_denormal_to_zero(op
.xmm64u(0));
561 op
.xmm64u(1) = float64_denormal_to_zero(op
.xmm64u(1));
564 MMXUD0(result
) = float64_to_int32(op
.xmm64u(0), status_word
);
565 MMXUD1(result
) = float64_to_int32(op
.xmm64u(1), status_word
);
567 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
568 BX_WRITE_MMX_REG(i
->nnn(), result
);
570 BX_INFO(("CVTPD2PI_PqWpd: required SSE2, use --enable-sse option"));
571 exception(BX_UD_EXCEPTION
, 0, 0);
577 * Convert one double precision FP number to doubleword integer
578 * When a conversion is inexact, the value returned is rounded according
579 * to rounding control bits in MXCSR register.
580 * Possible floating point exceptions: #I, #P
582 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSD2SI_GdWsd(bxInstruction_c
*i
)
584 #if BX_SUPPORT_SSE >= 2
585 BX_CPU_THIS_PTR
prepareSSE();
589 /* op is a register or memory reference */
591 op
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
594 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
595 /* pointer, segment address pair */
596 op
= read_virtual_qword(i
->seg(), eaddr
);
599 float_status_t status_word
;
600 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
601 if (MXCSR
.get_DAZ()) op
= float64_denormal_to_zero(op
);
603 #if BX_SUPPORT_X86_64
604 if (i
->os64L()) /* 64 bit operand size mode */
606 Bit64u result
= float64_to_int64(op
, status_word
);
607 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
608 BX_WRITE_64BIT_REG(i
->nnn(), result
);
613 Bit32u result
= float64_to_int32(op
, status_word
);
614 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
615 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
619 BX_INFO(("CVTSD2SI_GdWsd: required SSE2, use --enable-sse option"));
620 exception(BX_UD_EXCEPTION
, 0, 0);
626 * Convert one single precision FP number to doubleword integer.
627 * When a conversion is inexact, the value returned is rounded according
628 * to rounding control bits in MXCSR register.
629 * Possible floating point exceptions: #I, #P
631 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSS2SI_GdWss(bxInstruction_c
*i
)
633 #if BX_SUPPORT_SSE >= 1
634 BX_CPU_THIS_PTR
prepareSSE();
638 /* op is a register or memory reference */
640 op
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
643 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
644 /* pointer, segment address pair */
645 op
= read_virtual_dword(i
->seg(), eaddr
);
648 float_status_t status_word
;
649 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
650 if (MXCSR
.get_DAZ()) op
= float32_denormal_to_zero(op
);
652 #if BX_SUPPORT_X86_64
653 if (i
->os64L()) /* 64 bit operand size mode */
655 Bit64u result
= float32_to_int64(op
, status_word
);
656 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
657 BX_WRITE_64BIT_REG(i
->nnn(), result
);
662 Bit32u result
= float32_to_int32(op
, status_word
);
663 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
664 BX_WRITE_32BIT_REGZ(i
->nnn(), result
);
668 BX_INFO(("CVTSS2SI_GdWss: required SSE, use --enable-sse option"));
669 exception(BX_UD_EXCEPTION
, 0, 0);
675 * Convert two single precision FP numbers to two double precision FP numbers
676 * Possible floating point exceptions: #I, #D
678 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2PD_VpsWps(bxInstruction_c
*i
)
680 #if BX_SUPPORT_SSE >= 2
681 BX_CPU_THIS_PTR
prepareSSE();
684 BxPackedXmmRegister result
;
686 /* op is a register or memory reference */
688 op
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
691 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
692 /* pointer, segment address pair */
693 op
= read_virtual_qword(i
->seg(), eaddr
);
696 float_status_t status_word
;
697 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
699 float32 r0
= (float32
)(op
& 0xFFFFFFFF);
700 float32 r1
= (float32
)(op
>> 32);
702 if (MXCSR
.get_DAZ()) {
703 r0
= float32_denormal_to_zero(r0
);
704 r1
= float32_denormal_to_zero(r1
);
707 result
.xmm64u(0) = float32_to_float64(r0
, status_word
);
708 result
.xmm64u(1) = float32_to_float64(r1
, status_word
);
710 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
711 BX_WRITE_XMM_REG(i
->nnn(), result
);
714 BX_INFO(("CVTPS2PD_VpsWps: required SSE2, use --enable-sse option"));
715 exception(BX_UD_EXCEPTION
, 0, 0);
721 * Convert two double precision FP numbers to two single precision FP.
722 * When a conversion is inexact, the value returned is rounded according
723 * to rounding control bits in MXCSR register.
724 * Possible floating point exceptions: #I, #D, #O, #I, #P
726 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2PS_VpdWpd(bxInstruction_c
*i
)
728 #if BX_SUPPORT_SSE >= 2
729 BX_CPU_THIS_PTR
prepareSSE();
731 BxPackedXmmRegister op
, result
;
733 /* op is a register or memory reference */
735 op
= BX_READ_XMM_REG(i
->rm());
738 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
739 /* pointer, segment address pair */
740 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
743 float_status_t status_word
;
744 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
748 op
.xmm64u(0) = float64_denormal_to_zero(op
.xmm64u(0));
749 op
.xmm64u(1) = float64_denormal_to_zero(op
.xmm64u(1));
752 result
.xmm32u(0) = float64_to_float32(op
.xmm64u(0), status_word
);
753 result
.xmm32u(1) = float64_to_float32(op
.xmm64u(1), status_word
);
754 result
.xmm64u(1) = 0;
756 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
757 BX_WRITE_XMM_REG(i
->nnn(), result
);
760 BX_INFO(("CVTPD2PS_VpdWpd: required SSE2, use --enable-sse option"));
761 exception(BX_UD_EXCEPTION
, 0, 0);
767 * Convert one double precision FP number to one single precision FP.
768 * When a conversion is inexact, the value returned is rounded according
769 * to rounding control bits in MXCSR register.
770 * Possible floating point exceptions: #I, #D, #O, #I, #P
772 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSD2SS_VsdWsd(bxInstruction_c
*i
)
774 #if BX_SUPPORT_SSE >= 2
775 BX_CPU_THIS_PTR
prepareSSE();
780 /* op is a register or memory reference */
782 op
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
785 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
786 /* pointer, segment address pair */
787 op
= read_virtual_qword(i
->seg(), eaddr
);
790 float_status_t status_word
;
791 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
792 if (MXCSR
.get_DAZ()) op
= float64_denormal_to_zero(op
);
793 result
= float64_to_float32(op
, status_word
);
794 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
795 BX_WRITE_XMM_REG_LO_DWORD(i
->nnn(), result
);
798 BX_INFO(("CVTSD2SS_VsdWsd: required SSE2, use --enable-sse option"));
799 exception(BX_UD_EXCEPTION
, 0, 0);
805 * Convert one single precision FP number to one double precision FP.
806 * Possible floating point exceptions: #I, #D
808 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSS2SD_VssWss(bxInstruction_c
*i
)
810 #if BX_SUPPORT_SSE >= 2
811 BX_CPU_THIS_PTR
prepareSSE();
816 /* op is a register or memory reference */
818 op
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
821 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
822 /* pointer, segment address pair */
823 op
= read_virtual_dword(i
->seg(), eaddr
);
826 float_status_t status_word
;
827 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
828 if (MXCSR
.get_DAZ()) op
= float32_denormal_to_zero(op
);
829 result
= float32_to_float64(op
, status_word
);
830 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
831 BX_WRITE_XMM_REG_LO_QWORD(i
->nnn(), result
);
834 BX_INFO(("CVTSS2SD_VssWss: required SSE2, use --enable-sse option"));
835 exception(BX_UD_EXCEPTION
, 0, 0);
841 * Convert four signed integers to four single precision FP numbers.
842 * When a conversion is inexact, the value returned is rounded according
843 * to rounding control bits in MXCSR register.
844 * Possible floating point exceptions: #P
846 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTDQ2PS_VpsWdq(bxInstruction_c
*i
)
848 #if BX_SUPPORT_SSE >= 2
849 BX_CPU_THIS_PTR
prepareSSE();
851 BxPackedXmmRegister op
;
853 /* op is a register or memory reference */
855 op
= BX_READ_XMM_REG(i
->rm());
858 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
859 /* pointer, segment address pair */
860 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
863 BX_CPU_THIS_PTR
prepareFPU2MMX(); /* FPU2MMX state transition */
865 float_status_t status_word
;
866 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
868 op
.xmm32u(0) = int32_to_float32(op
.xmm32u(0), status_word
);
869 op
.xmm32u(1) = int32_to_float32(op
.xmm32u(1), status_word
);
870 op
.xmm32u(2) = int32_to_float32(op
.xmm32u(2), status_word
);
871 op
.xmm32u(3) = int32_to_float32(op
.xmm32u(3), status_word
);
873 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
874 BX_WRITE_XMM_REG(i
->nnn(), op
);
876 BX_INFO(("CVTDQ2PS_VpsWdq: required SSE2, use --enable-sse option"));
877 exception(BX_UD_EXCEPTION
, 0, 0);
883 * Convert four single precision FP to four doubleword integers.
884 * When a conversion is inexact, the value returned is rounded according
885 * to rounding control bits in MXCSR register.
886 * Possible floating point exceptions: #I, #P
888 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2DQ_VdqWps(bxInstruction_c
*i
)
890 #if BX_SUPPORT_SSE >= 2
891 BX_CPU_THIS_PTR
prepareSSE();
893 BxPackedXmmRegister op
;
895 /* op is a register or memory reference */
897 op
= BX_READ_XMM_REG(i
->rm());
900 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
901 /* pointer, segment address pair */
902 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
905 float_status_t status_word
;
906 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
908 if (MXCSR
.get_DAZ()) {
909 op
.xmm32u(0) = float32_denormal_to_zero(op
.xmm32u(0));
910 op
.xmm32u(1) = float32_denormal_to_zero(op
.xmm32u(1));
911 op
.xmm32u(2) = float32_denormal_to_zero(op
.xmm32u(2));
912 op
.xmm32u(3) = float32_denormal_to_zero(op
.xmm32u(3));
915 op
.xmm32u(0) = float32_to_int32(op
.xmm32u(0), status_word
);
916 op
.xmm32u(1) = float32_to_int32(op
.xmm32u(1), status_word
);
917 op
.xmm32u(2) = float32_to_int32(op
.xmm32u(2), status_word
);
918 op
.xmm32u(3) = float32_to_int32(op
.xmm32u(3), status_word
);
920 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
921 BX_WRITE_XMM_REG(i
->nnn(), op
);
923 BX_INFO(("CVTPS2DQ_VdqWps: required SSE2, use --enable-sse option"));
924 exception(BX_UD_EXCEPTION
, 0, 0);
930 * Convert four single precision FP to four doubleword integers using
931 * truncation if the conversion is inexact.
932 * Possible floating point exceptions: #I, #P
934 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPS2DQ_VdqWps(bxInstruction_c
*i
)
936 #if BX_SUPPORT_SSE >= 2
937 BX_CPU_THIS_PTR
prepareSSE();
939 BxPackedXmmRegister op
;
941 /* op is a register or memory reference */
943 op
= BX_READ_XMM_REG(i
->rm());
946 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
947 /* pointer, segment address pair */
948 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
951 float_status_t status_word
;
952 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
954 if (MXCSR
.get_DAZ()) {
955 op
.xmm32u(0) = float32_denormal_to_zero(op
.xmm32u(0));
956 op
.xmm32u(1) = float32_denormal_to_zero(op
.xmm32u(1));
957 op
.xmm32u(2) = float32_denormal_to_zero(op
.xmm32u(2));
958 op
.xmm32u(3) = float32_denormal_to_zero(op
.xmm32u(3));
961 op
.xmm32u(0) = float32_to_int32_round_to_zero(op
.xmm32u(0), status_word
);
962 op
.xmm32u(1) = float32_to_int32_round_to_zero(op
.xmm32u(1), status_word
);
963 op
.xmm32u(2) = float32_to_int32_round_to_zero(op
.xmm32u(2), status_word
);
964 op
.xmm32u(3) = float32_to_int32_round_to_zero(op
.xmm32u(3), status_word
);
966 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
967 BX_WRITE_XMM_REG(i
->nnn(), op
);
969 BX_INFO(("CVTTPS2DQ_VdqWps: required SSE2, use --enable-sse option"));
970 exception(BX_UD_EXCEPTION
, 0, 0);
976 * Convert two double precision FP to two signed doubleword integers using
977 * truncation if the conversion is inexact.
978 * Possible floating point exceptions: #I, #P
980 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPD2DQ_VqWpd(bxInstruction_c
*i
)
982 #if BX_SUPPORT_SSE >= 2
983 BX_CPU_THIS_PTR
prepareSSE();
985 BxPackedXmmRegister op
, result
;
987 /* op is a register or memory reference */
989 op
= BX_READ_XMM_REG(i
->rm());
992 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
993 /* pointer, segment address pair */
994 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
997 float_status_t status_word
;
998 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1000 if (MXCSR
.get_DAZ()) {
1001 op
.xmm64u(0) = float64_denormal_to_zero(op
.xmm64u(0));
1002 op
.xmm64u(1) = float64_denormal_to_zero(op
.xmm64u(1));
1005 result
.xmm32u(0) = float64_to_int32_round_to_zero(op
.xmm64u(0), status_word
);
1006 result
.xmm32u(1) = float64_to_int32_round_to_zero(op
.xmm64u(1), status_word
);
1007 result
.xmm64u(1) = 0;
1009 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1010 BX_WRITE_XMM_REG(i
->nnn(), result
);
1012 BX_INFO(("CVTTPD2DQ_VqWpd: required SSE2, use --enable-sse option"));
1013 exception(BX_UD_EXCEPTION
, 0, 0);
1019 * Convert two double precision FP to two signed doubleword integers.
1020 * When a conversion is inexact, the value returned is rounded according
1021 * to rounding control bits in MXCSR register.
1022 * Possible floating point exceptions: #I, #P
1024 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2DQ_VqWpd(bxInstruction_c
*i
)
1026 #if BX_SUPPORT_SSE >= 2
1027 BX_CPU_THIS_PTR
prepareSSE();
1029 BxPackedXmmRegister op
, result
;
1031 /* op is a register or memory reference */
1033 op
= BX_READ_XMM_REG(i
->rm());
1036 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1037 /* pointer, segment address pair */
1038 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
1041 float_status_t status_word
;
1042 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1044 if (MXCSR
.get_DAZ()) {
1045 op
.xmm64u(0) = float64_denormal_to_zero(op
.xmm64u(0));
1046 op
.xmm64u(1) = float64_denormal_to_zero(op
.xmm64u(1));
1049 result
.xmm32u(0) = float64_to_int32(op
.xmm64u(0), status_word
);
1050 result
.xmm32u(1) = float64_to_int32(op
.xmm64u(1), status_word
);
1051 result
.xmm64u(1) = 0;
1053 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1054 BX_WRITE_XMM_REG(i
->nnn(), result
);
1056 BX_INFO(("CVTPD2DQ_VqWpd: required SSE2, use --enable-sse option"));
1057 exception(BX_UD_EXCEPTION
, 0, 0);
1063 * Convert two 32bit signed integers from XMM/MEM to two double precision FP
1064 * Possible floating point exceptions: -
1066 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTDQ2PD_VpdWq(bxInstruction_c
*i
)
1068 #if BX_SUPPORT_SSE >= 2
1069 BX_CPU_THIS_PTR
prepareSSE();
1072 BxPackedXmmRegister result
;
1074 /* op is a register or memory reference */
1076 op
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
1079 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1080 /* pointer, segment address pair */
1081 op
= read_virtual_qword(i
->seg(), eaddr
);
1084 Bit32u r0
= (Bit32u
)(op
& 0xFFFFFFFF);
1085 Bit32u r1
= (Bit32u
)(op
>> 32);
1087 result
.xmm64u(0) = int32_to_float64(r0
);
1088 result
.xmm64u(1) = int32_to_float64(r1
);
1090 BX_WRITE_XMM_REG(i
->nnn(), result
);
1092 BX_INFO(("CVTDQ2PD_VpdWq: required SSE2, use --enable-sse option"));
1093 exception(BX_UD_EXCEPTION
, 0, 0);
1099 * Compare two single precision FP numbers and set EFLAGS accordintly.
1100 * Possible floating point exceptions: #I, #D
1102 void BX_CPP_AttrRegparmN(1) BX_CPU_C::UCOMISS_VssWss(bxInstruction_c
*i
)
1104 #if BX_SUPPORT_SSE >= 1
1105 BX_CPU_THIS_PTR
prepareSSE();
1107 float32 op1
= BX_READ_XMM_REG_LO_DWORD(i
->nnn()), op2
;
1109 /* op2 is a register or memory reference */
1111 op2
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
1114 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1115 /* pointer, segment address pair */
1116 op2
= read_virtual_dword(i
->seg(), eaddr
);
1119 float_status_t status_word
;
1120 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1122 if (MXCSR
.get_DAZ())
1124 op1
= float32_denormal_to_zero(op1
);
1125 op2
= float32_denormal_to_zero(op2
);
1128 int rc
= float32_compare_quiet(op1
, op2
, status_word
);
1129 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1130 BX_CPU_THIS_PTR
write_eflags_fpu_compare(rc
);
1132 BX_INFO(("UCOMISS_VssWss: required SSE, use --enable-sse option"));
1133 exception(BX_UD_EXCEPTION
, 0, 0);
1139 * Compare two double precision FP numbers and set EFLAGS accordintly.
1140 * Possible floating point exceptions: #I, #D
1142 void BX_CPP_AttrRegparmN(1) BX_CPU_C::UCOMISD_VsdWsd(bxInstruction_c
*i
)
1144 #if BX_SUPPORT_SSE >= 2
1145 BX_CPU_THIS_PTR
prepareSSE();
1147 float64 op1
= BX_READ_XMM_REG_LO_QWORD(i
->nnn()), op2
;
1149 /* op2 is a register or memory reference */
1151 op2
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
1154 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1155 /* pointer, segment address pair */
1156 op2
= read_virtual_qword(i
->seg(), eaddr
);
1159 float_status_t status_word
;
1160 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1162 if (MXCSR
.get_DAZ())
1164 op1
= float64_denormal_to_zero(op1
);
1165 op2
= float64_denormal_to_zero(op2
);
1168 int rc
= float64_compare_quiet(op1
, op2
, status_word
);
1169 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1170 BX_CPU_THIS_PTR
write_eflags_fpu_compare(rc
);
1172 BX_INFO(("UCOMISD_VsdWsd: required SSE2, use --enable-sse option"));
1173 exception(BX_UD_EXCEPTION
, 0, 0);
1179 * Compare two single precision FP numbers and set EFLAGS accordintly.
1180 * Possible floating point exceptions: #I, #D
1182 void BX_CPP_AttrRegparmN(1) BX_CPU_C::COMISS_VpsWps(bxInstruction_c
*i
)
1184 #if BX_SUPPORT_SSE >= 1
1185 BX_CPU_THIS_PTR
prepareSSE();
1187 float32 op1
= BX_READ_XMM_REG_LO_DWORD(i
->nnn()), op2
;
1189 /* op2 is a register or memory reference */
1191 op2
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
1194 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1195 /* pointer, segment address pair */
1196 op2
= read_virtual_dword(i
->seg(), eaddr
);
1199 float_status_t status_word
;
1200 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1202 if (MXCSR
.get_DAZ())
1204 op1
= float32_denormal_to_zero(op1
);
1205 op2
= float32_denormal_to_zero(op2
);
1208 int rc
= float32_compare(op1
, op2
, status_word
);
1209 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1210 BX_CPU_THIS_PTR
write_eflags_fpu_compare(rc
);
1212 BX_INFO(("COMISS_VpsWps: required SSE, use --enable-sse option"));
1213 exception(BX_UD_EXCEPTION
, 0, 0);
1219 * Compare two double precision FP numbers and set EFLAGS accordintly.
1220 * Possible floating point exceptions: #I, #D
1222 void BX_CPP_AttrRegparmN(1) BX_CPU_C::COMISD_VpdWpd(bxInstruction_c
*i
)
1224 #if BX_SUPPORT_SSE >= 2
1225 BX_CPU_THIS_PTR
prepareSSE();
1227 float64 op1
= BX_READ_XMM_REG_LO_QWORD(i
->nnn()), op2
;
1229 /* op2 is a register or memory reference */
1231 op2
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
1234 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1235 /* pointer, segment address pair */
1236 op2
= read_virtual_qword(i
->seg(), eaddr
);
1239 float_status_t status_word
;
1240 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1242 if (MXCSR
.get_DAZ())
1244 op1
= float64_denormal_to_zero(op1
);
1245 op2
= float64_denormal_to_zero(op2
);
1248 int rc
= float64_compare(op1
, op2
, status_word
);
1249 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1250 BX_CPU_THIS_PTR
write_eflags_fpu_compare(rc
);
1252 BX_INFO(("COMISD_VpdWpd: required SSE2, use --enable-sse option"));
1253 exception(BX_UD_EXCEPTION
, 0, 0);
1259 * Square Root packed single precision.
1260 * Possible floating point exceptions: #I, #D, #P
1262 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTPS_VpsWps(bxInstruction_c
*i
)
1264 #if BX_SUPPORT_SSE >= 1
1265 BX_CPU_THIS_PTR
prepareSSE();
1267 BxPackedXmmRegister op
;
1269 /* op is a register or memory reference */
1271 op
= BX_READ_XMM_REG(i
->rm());
1274 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1275 /* pointer, segment address pair */
1276 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
1279 float_status_t status_word
;
1280 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1282 if (MXCSR
.get_DAZ())
1284 op
.xmm32u(0) = float32_denormal_to_zero(op
.xmm32u(0));
1285 op
.xmm32u(1) = float32_denormal_to_zero(op
.xmm32u(1));
1286 op
.xmm32u(2) = float32_denormal_to_zero(op
.xmm32u(2));
1287 op
.xmm32u(3) = float32_denormal_to_zero(op
.xmm32u(3));
1290 op
.xmm32u(0) = float32_sqrt(op
.xmm32u(0), status_word
);
1291 op
.xmm32u(1) = float32_sqrt(op
.xmm32u(1), status_word
);
1292 op
.xmm32u(2) = float32_sqrt(op
.xmm32u(2), status_word
);
1293 op
.xmm32u(3) = float32_sqrt(op
.xmm32u(3), status_word
);
1295 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1296 BX_WRITE_XMM_REG(i
->nnn(), op
);
1299 BX_INFO(("SQRTPS_VpsWps: required SSE, use --enable-sse option"));
1300 exception(BX_UD_EXCEPTION
, 0, 0);
1306 * Square Root packed double precision.
1307 * Possible floating point exceptions: #I, #D, #P
1309 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTPD_VpdWpd(bxInstruction_c
*i
)
1311 #if BX_SUPPORT_SSE >= 2
1312 BX_CPU_THIS_PTR
prepareSSE();
1314 BxPackedXmmRegister op
;
1316 /* op is a register or memory reference */
1318 op
= BX_READ_XMM_REG(i
->rm());
1321 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1322 /* pointer, segment address pair */
1323 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
1326 float_status_t status_word
;
1327 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1329 if (MXCSR
.get_DAZ())
1331 op
.xmm64u(0) = float64_denormal_to_zero(op
.xmm64u(0));
1332 op
.xmm64u(1) = float64_denormal_to_zero(op
.xmm64u(1));
1335 op
.xmm64u(0) = float64_sqrt(op
.xmm64u(0), status_word
);
1336 op
.xmm64u(1) = float64_sqrt(op
.xmm64u(1), status_word
);
1338 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1339 BX_WRITE_XMM_REG(i
->nnn(), op
);
1342 BX_INFO(("SQRTPD_VpdWpd: required SSE2, use --enable-sse option"));
1343 exception(BX_UD_EXCEPTION
, 0, 0);
1349 * Square Root scalar double precision.
1350 * Possible floating point exceptions: #I, #D, #P
1352 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTSD_VsdWsd(bxInstruction_c
*i
)
1354 #if BX_SUPPORT_SSE >= 2
1355 BX_CPU_THIS_PTR
prepareSSE();
1359 /* op is a register or memory reference */
1361 op
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
1364 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1365 /* pointer, segment address pair */
1366 op
= read_virtual_qword(i
->seg(), eaddr
);
1369 float_status_t status_word
;
1370 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1371 if (MXCSR
.get_DAZ()) op
= float64_denormal_to_zero(op
);
1372 op
= float64_sqrt(op
, status_word
);
1373 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1374 BX_WRITE_XMM_REG_LO_QWORD(i
->nnn(), op
);
1377 BX_INFO(("SQRTSD_VsdWsd: required SSE2, use --enable-sse option"));
1378 exception(BX_UD_EXCEPTION
, 0, 0);
1384 * Square Root scalar single precision.
1385 * Possible floating point exceptions: #I, #D, #P
1387 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTSS_VssWss(bxInstruction_c
*i
)
1389 #if BX_SUPPORT_SSE >= 1
1390 BX_CPU_THIS_PTR
prepareSSE();
1394 /* op is a register or memory reference */
1396 op
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
1399 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1400 /* pointer, segment address pair */
1401 op
= read_virtual_dword(i
->seg(), eaddr
);
1404 float_status_t status_word
;
1405 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1406 if (MXCSR
.get_DAZ()) op
= float32_denormal_to_zero(op
);
1407 op
= float32_sqrt(op
, status_word
);
1408 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1409 BX_WRITE_XMM_REG_LO_DWORD(i
->nnn(), op
);
1412 BX_INFO(("SQRTSS_VssWss: required SSE, use --enable-sse option"));
1413 exception(BX_UD_EXCEPTION
, 0, 0);
1419 * Add packed single precision FP numbers from XMM2/MEM to XMM1.
1420 * Possible floating point exceptions: #I, #D, #O, #U, #P
1422 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDPS_VpsWps(bxInstruction_c
*i
)
1424 #if BX_SUPPORT_SSE >= 1
1425 BX_CPU_THIS_PTR
prepareSSE();
1427 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1429 /* op2 is a register or memory reference */
1431 op2
= BX_READ_XMM_REG(i
->rm());
1434 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1435 /* pointer, segment address pair */
1436 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1439 float_status_t status_word
;
1440 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1442 if (MXCSR
.get_DAZ()) {
1443 op1
.xmm32u(0) = float32_denormal_to_zero(op1
.xmm32u(0));
1444 op1
.xmm32u(1) = float32_denormal_to_zero(op1
.xmm32u(1));
1445 op1
.xmm32u(2) = float32_denormal_to_zero(op1
.xmm32u(2));
1446 op1
.xmm32u(3) = float32_denormal_to_zero(op1
.xmm32u(3));
1448 op2
.xmm32u(0) = float32_denormal_to_zero(op2
.xmm32u(0));
1449 op2
.xmm32u(1) = float32_denormal_to_zero(op2
.xmm32u(1));
1450 op2
.xmm32u(2) = float32_denormal_to_zero(op2
.xmm32u(2));
1451 op2
.xmm32u(3) = float32_denormal_to_zero(op2
.xmm32u(3));
1454 op1
.xmm32u(0) = float32_add(op1
.xmm32u(0), op2
.xmm32u(0), status_word
);
1455 op1
.xmm32u(1) = float32_add(op1
.xmm32u(1), op2
.xmm32u(1), status_word
);
1456 op1
.xmm32u(2) = float32_add(op1
.xmm32u(2), op2
.xmm32u(2), status_word
);
1457 op1
.xmm32u(3) = float32_add(op1
.xmm32u(3), op2
.xmm32u(3), status_word
);
1459 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1460 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1463 BX_INFO(("ADDPS_VpsWps: required SSE, use --enable-sse option"));
1464 exception(BX_UD_EXCEPTION
, 0, 0);
1470 * Add packed double precision FP numbers from XMM2/MEM to XMM1.
1471 * Possible floating point exceptions: #I, #D, #O, #U, #P
1473 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDPD_VpdWpd(bxInstruction_c
*i
)
1475 #if BX_SUPPORT_SSE >= 2
1476 BX_CPU_THIS_PTR
prepareSSE();
1478 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1480 /* op2 is a register or memory reference */
1482 op2
= BX_READ_XMM_REG(i
->rm());
1485 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1486 /* pointer, segment address pair */
1487 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1490 float_status_t status_word
;
1491 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1493 if (MXCSR
.get_DAZ())
1495 op1
.xmm64u(0) = float64_denormal_to_zero(op1
.xmm64u(0));
1496 op1
.xmm64u(1) = float64_denormal_to_zero(op1
.xmm64u(1));
1497 op2
.xmm64u(0) = float64_denormal_to_zero(op2
.xmm64u(0));
1498 op2
.xmm64u(1) = float64_denormal_to_zero(op2
.xmm64u(1));
1501 op1
.xmm64u(0) = float64_add(op1
.xmm64u(0), op2
.xmm64u(0), status_word
);
1502 op1
.xmm64u(1) = float64_add(op1
.xmm64u(1), op2
.xmm64u(1), status_word
);
1504 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1505 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1508 BX_INFO(("ADDPD_VpdWpd: required SSE2, use --enable-sse option"));
1509 exception(BX_UD_EXCEPTION
, 0, 0);
1515 * Add the lower double precision FP number from XMM2/MEM to XMM1.
1516 * Possible floating point exceptions: #I, #D, #O, #U, #P
1518 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSD_VsdWsd(bxInstruction_c
*i
)
1520 #if BX_SUPPORT_SSE >= 2
1521 BX_CPU_THIS_PTR
prepareSSE();
1523 float64 op1
= BX_READ_XMM_REG_LO_QWORD(i
->nnn()), op2
;
1525 /* op2 is a register or memory reference */
1527 op2
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
1530 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1531 /* pointer, segment address pair */
1532 op2
= read_virtual_qword(i
->seg(), eaddr
);
1535 float_status_t status_word
;
1536 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1538 if (MXCSR
.get_DAZ())
1540 op1
= float64_denormal_to_zero(op1
);
1541 op2
= float64_denormal_to_zero(op2
);
1544 op1
= float64_add(op1
, op2
, status_word
);
1545 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1546 BX_WRITE_XMM_REG_LO_QWORD(i
->nnn(), op1
);
1549 BX_INFO(("ADDSD_VsdWsd: required SSE2, use --enable-sse option"));
1550 exception(BX_UD_EXCEPTION
, 0, 0);
1556 * Add the lower single precision FP number from XMM2/MEM to XMM1.
1557 * Possible floating point exceptions: #I, #D, #O, #U, #P
1559 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSS_VssWss(bxInstruction_c
*i
)
1561 #if BX_SUPPORT_SSE >= 1
1562 BX_CPU_THIS_PTR
prepareSSE();
1564 float32 op1
= BX_READ_XMM_REG_LO_DWORD(i
->nnn()), op2
;
1566 /* op2 is a register or memory reference */
1568 op2
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
1571 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1572 /* pointer, segment address pair */
1573 op2
= read_virtual_dword(i
->seg(), eaddr
);
1576 float_status_t status_word
;
1577 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1579 if (MXCSR
.get_DAZ())
1581 op1
= float32_denormal_to_zero(op1
);
1582 op2
= float32_denormal_to_zero(op2
);
1585 op1
= float32_add(op1
, op2
, status_word
);
1586 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1587 BX_WRITE_XMM_REG_LO_DWORD(i
->nnn(), op1
);
1590 BX_INFO(("ADDSS_VssWss: required SSE, use --enable-sse option"));
1591 exception(BX_UD_EXCEPTION
, 0, 0);
1597 * Multiply packed single precision FP numbers from XMM2/MEM to XMM1.
1598 * Possible floating point exceptions: #I, #D, #O, #U, #P
1600 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULPS_VpsWps(bxInstruction_c
*i
)
1602 #if BX_SUPPORT_SSE >= 1
1603 BX_CPU_THIS_PTR
prepareSSE();
1605 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1607 /* op2 is a register or memory reference */
1609 op2
= BX_READ_XMM_REG(i
->rm());
1612 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1613 /* pointer, segment address pair */
1614 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1617 float_status_t status_word
;
1618 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1620 if (MXCSR
.get_DAZ()) {
1621 op1
.xmm32u(0) = float32_denormal_to_zero(op1
.xmm32u(0));
1622 op1
.xmm32u(1) = float32_denormal_to_zero(op1
.xmm32u(1));
1623 op1
.xmm32u(2) = float32_denormal_to_zero(op1
.xmm32u(2));
1624 op1
.xmm32u(3) = float32_denormal_to_zero(op1
.xmm32u(3));
1626 op2
.xmm32u(0) = float32_denormal_to_zero(op2
.xmm32u(0));
1627 op2
.xmm32u(1) = float32_denormal_to_zero(op2
.xmm32u(1));
1628 op2
.xmm32u(2) = float32_denormal_to_zero(op2
.xmm32u(2));
1629 op2
.xmm32u(3) = float32_denormal_to_zero(op2
.xmm32u(3));
1632 op1
.xmm32u(0) = float32_mul(op1
.xmm32u(0), op2
.xmm32u(0), status_word
);
1633 op1
.xmm32u(1) = float32_mul(op1
.xmm32u(1), op2
.xmm32u(1), status_word
);
1634 op1
.xmm32u(2) = float32_mul(op1
.xmm32u(2), op2
.xmm32u(2), status_word
);
1635 op1
.xmm32u(3) = float32_mul(op1
.xmm32u(3), op2
.xmm32u(3), status_word
);
1637 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1638 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1641 BX_INFO(("MULPS_VpsWps: required SSE, use --enable-sse option"));
1642 exception(BX_UD_EXCEPTION
, 0, 0);
1648 * Multiply packed double precision FP numbers from XMM2/MEM to XMM1.
1649 * Possible floating point exceptions: #I, #D, #O, #U, #P
1651 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULPD_VpdWpd(bxInstruction_c
*i
)
1653 #if BX_SUPPORT_SSE >= 2
1654 BX_CPU_THIS_PTR
prepareSSE();
1656 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1658 /* op2 is a register or memory reference */
1660 op2
= BX_READ_XMM_REG(i
->rm());
1663 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1664 /* pointer, segment address pair */
1665 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1668 float_status_t status_word
;
1669 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1671 if (MXCSR
.get_DAZ())
1673 op1
.xmm64u(0) = float64_denormal_to_zero(op1
.xmm64u(0));
1674 op1
.xmm64u(1) = float64_denormal_to_zero(op1
.xmm64u(1));
1675 op2
.xmm64u(0) = float64_denormal_to_zero(op2
.xmm64u(0));
1676 op2
.xmm64u(1) = float64_denormal_to_zero(op2
.xmm64u(1));
1679 op1
.xmm64u(0) = float64_mul(op1
.xmm64u(0), op2
.xmm64u(0), status_word
);
1680 op1
.xmm64u(1) = float64_mul(op1
.xmm64u(1), op2
.xmm64u(1), status_word
);
1682 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1683 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1686 BX_INFO(("MULPD_VpdWpd: required SSE2, use --enable-sse option"));
1687 exception(BX_UD_EXCEPTION
, 0, 0);
1693 * Multiply the lower double precision FP number from XMM2/MEM to XMM1.
1694 * Possible floating point exceptions: #I, #D, #O, #U, #P
1696 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULSD_VsdWsd(bxInstruction_c
*i
)
1698 #if BX_SUPPORT_SSE >= 2
1699 BX_CPU_THIS_PTR
prepareSSE();
1701 float64 op1
= BX_READ_XMM_REG_LO_QWORD(i
->nnn()), op2
;
1703 /* op2 is a register or memory reference */
1705 op2
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
1708 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1709 /* pointer, segment address pair */
1710 op2
= read_virtual_qword(i
->seg(), eaddr
);
1713 float_status_t status_word
;
1714 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1716 if (MXCSR
.get_DAZ())
1718 op1
= float64_denormal_to_zero(op1
);
1719 op2
= float64_denormal_to_zero(op2
);
1722 op1
= float64_mul(op1
, op2
, status_word
);
1723 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1724 BX_WRITE_XMM_REG_LO_QWORD(i
->nnn(), op1
);
1727 BX_INFO(("MULSD_VsdWsd: required SSE2, use --enable-sse option"));
1728 exception(BX_UD_EXCEPTION
, 0, 0);
1734 * Multiply the lower single precision FP number from XMM2/MEM to XMM1.
1735 * Possible floating point exceptions: #I, #D, #O, #U, #P
1737 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULSS_VssWss(bxInstruction_c
*i
)
1739 #if BX_SUPPORT_SSE >= 1
1740 BX_CPU_THIS_PTR
prepareSSE();
1742 float32 op1
= BX_READ_XMM_REG_LO_DWORD(i
->nnn()), op2
;
1744 /* op2 is a register or memory reference */
1746 op2
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
1749 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1750 /* pointer, segment address pair */
1751 op2
= read_virtual_dword(i
->seg(), eaddr
);
1754 float_status_t status_word
;
1755 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1757 if (MXCSR
.get_DAZ())
1759 op1
= float32_denormal_to_zero(op1
);
1760 op2
= float32_denormal_to_zero(op2
);
1763 op1
= float32_mul(op1
, op2
, status_word
);
1764 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1765 BX_WRITE_XMM_REG_LO_DWORD(i
->nnn(), op1
);
1768 BX_INFO(("MULSS_VssWss: required SSE, use --enable-sse option"));
1769 exception(BX_UD_EXCEPTION
, 0, 0);
1775 * Subtract packed single precision FP numbers from XMM2/MEM to XMM1.
1776 * Possible floating point exceptions: #I, #D, #O, #U, #P
1778 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBPS_VpsWps(bxInstruction_c
*i
)
1780 #if BX_SUPPORT_SSE >= 1
1781 BX_CPU_THIS_PTR
prepareSSE();
1783 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1785 /* op2 is a register or memory reference */
1787 op2
= BX_READ_XMM_REG(i
->rm());
1790 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1791 /* pointer, segment address pair */
1792 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1795 float_status_t status_word
;
1796 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1798 if (MXCSR
.get_DAZ()) {
1799 op1
.xmm32u(0) = float32_denormal_to_zero(op1
.xmm32u(0));
1800 op1
.xmm32u(1) = float32_denormal_to_zero(op1
.xmm32u(1));
1801 op1
.xmm32u(2) = float32_denormal_to_zero(op1
.xmm32u(2));
1802 op1
.xmm32u(3) = float32_denormal_to_zero(op1
.xmm32u(3));
1804 op2
.xmm32u(0) = float32_denormal_to_zero(op2
.xmm32u(0));
1805 op2
.xmm32u(1) = float32_denormal_to_zero(op2
.xmm32u(1));
1806 op2
.xmm32u(2) = float32_denormal_to_zero(op2
.xmm32u(2));
1807 op2
.xmm32u(3) = float32_denormal_to_zero(op2
.xmm32u(3));
1810 op1
.xmm32u(0) = float32_sub(op1
.xmm32u(0), op2
.xmm32u(0), status_word
);
1811 op1
.xmm32u(1) = float32_sub(op1
.xmm32u(1), op2
.xmm32u(1), status_word
);
1812 op1
.xmm32u(2) = float32_sub(op1
.xmm32u(2), op2
.xmm32u(2), status_word
);
1813 op1
.xmm32u(3) = float32_sub(op1
.xmm32u(3), op2
.xmm32u(3), status_word
);
1815 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1816 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1819 BX_INFO(("SUBPS_VpsWps: required SSE, use --enable-sse option"));
1820 exception(BX_UD_EXCEPTION
, 0, 0);
1826 * Subtract packed double precision FP numbers from XMM2/MEM to XMM1.
1827 * Possible floating point exceptions: #I, #D, #O, #U, #P
1829 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBPD_VpdWpd(bxInstruction_c
*i
)
1831 #if BX_SUPPORT_SSE >= 2
1832 BX_CPU_THIS_PTR
prepareSSE();
1834 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1836 /* op2 is a register or memory reference */
1838 op2
= BX_READ_XMM_REG(i
->rm());
1841 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1842 /* pointer, segment address pair */
1843 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1846 float_status_t status_word
;
1847 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1849 if (MXCSR
.get_DAZ())
1851 op1
.xmm64u(0) = float64_denormal_to_zero(op1
.xmm64u(0));
1852 op1
.xmm64u(1) = float64_denormal_to_zero(op1
.xmm64u(1));
1853 op2
.xmm64u(0) = float64_denormal_to_zero(op2
.xmm64u(0));
1854 op2
.xmm64u(1) = float64_denormal_to_zero(op2
.xmm64u(1));
1857 op1
.xmm64u(0) = float64_sub(op1
.xmm64u(0), op2
.xmm64u(0), status_word
);
1858 op1
.xmm64u(1) = float64_sub(op1
.xmm64u(1), op2
.xmm64u(1), status_word
);
1860 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1861 BX_WRITE_XMM_REG(i
->nnn(), op1
);
1864 BX_INFO(("SUBPD_VpdWpd: required SSE2, use --enable-sse option"));
1865 exception(BX_UD_EXCEPTION
, 0, 0);
1871 * Subtract the lower double precision FP number from XMM2/MEM to XMM1.
1872 * Possible floating point exceptions: #I, #D, #O, #U, #P
1874 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBSD_VsdWsd(bxInstruction_c
*i
)
1876 #if BX_SUPPORT_SSE >= 2
1877 BX_CPU_THIS_PTR
prepareSSE();
1879 float64 op1
= BX_READ_XMM_REG_LO_QWORD(i
->nnn()), op2
;
1881 /* op2 is a register or memory reference */
1883 op2
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
1886 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1887 /* pointer, segment address pair */
1888 op2
= read_virtual_qword(i
->seg(), eaddr
);
1891 float_status_t status_word
;
1892 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1894 if (MXCSR
.get_DAZ())
1896 op1
= float64_denormal_to_zero(op1
);
1897 op2
= float64_denormal_to_zero(op2
);
1900 op1
= float64_sub(op1
, op2
, status_word
);
1901 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1902 BX_WRITE_XMM_REG_LO_QWORD(i
->nnn(), op1
);
1905 BX_INFO(("SUBSD_VsdWsd: required SSE2, use --enable-sse option"));
1906 exception(BX_UD_EXCEPTION
, 0, 0);
1912 * Subtract the lower single precision FP number from XMM2/MEM to XMM1.
1913 * Possible floating point exceptions: #I, #D, #O, #U, #P
1915 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBSS_VssWss(bxInstruction_c
*i
)
1917 #if BX_SUPPORT_SSE >= 1
1918 BX_CPU_THIS_PTR
prepareSSE();
1920 float32 op1
= BX_READ_XMM_REG_LO_DWORD(i
->nnn()), op2
;
1922 /* op2 is a register or memory reference */
1924 op2
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
1927 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1928 /* pointer, segment address pair */
1929 op2
= read_virtual_dword(i
->seg(), eaddr
);
1932 float_status_t status_word
;
1933 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1935 if (MXCSR
.get_DAZ())
1937 op1
= float32_denormal_to_zero(op1
);
1938 op2
= float32_denormal_to_zero(op2
);
1941 op1
= float32_sub(op1
, op2
, status_word
);
1942 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1943 BX_WRITE_XMM_REG_LO_DWORD(i
->nnn(), op1
);
1946 BX_INFO(("SUBSS_VssWss: required SSE, use --enable-sse option"));
1947 exception(BX_UD_EXCEPTION
, 0, 0);
1953 * Calculate the minimum single precision FP between XMM2/MEM to XMM1.
1954 * Possible floating point exceptions: #I, #D
1956 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINPS_VpsWps(bxInstruction_c
*i
)
1958 #if BX_SUPPORT_SSE >= 1
1959 BX_CPU_THIS_PTR
prepareSSE();
1961 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
1963 /* op2 is a register or memory reference */
1965 op2
= BX_READ_XMM_REG(i
->rm());
1968 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
1969 /* pointer, segment address pair */
1970 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
1973 float_status_t status_word
;
1974 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
1977 if (MXCSR
.get_DAZ()) {
1978 op1
.xmm32u(0) = float32_denormal_to_zero(op1
.xmm32u(0));
1979 op1
.xmm32u(1) = float32_denormal_to_zero(op1
.xmm32u(1));
1980 op1
.xmm32u(2) = float32_denormal_to_zero(op1
.xmm32u(2));
1981 op1
.xmm32u(3) = float32_denormal_to_zero(op1
.xmm32u(3));
1983 op2
.xmm32u(0) = float32_denormal_to_zero(op2
.xmm32u(0));
1984 op2
.xmm32u(1) = float32_denormal_to_zero(op2
.xmm32u(1));
1985 op2
.xmm32u(2) = float32_denormal_to_zero(op2
.xmm32u(2));
1986 op2
.xmm32u(3) = float32_denormal_to_zero(op2
.xmm32u(3));
1989 rc
= float32_compare(op1
.xmm32u(0), op2
.xmm32u(0), status_word
);
1990 op1
.xmm32u(0) = (rc
== float_relation_less
) ? op1
.xmm32u(0) : op2
.xmm32u(0);
1991 rc
= float32_compare(op1
.xmm32u(1), op2
.xmm32u(1), status_word
);
1992 op1
.xmm32u(1) = (rc
== float_relation_less
) ? op1
.xmm32u(1) : op2
.xmm32u(1);
1993 rc
= float32_compare(op1
.xmm32u(2), op2
.xmm32u(2), status_word
);
1994 op1
.xmm32u(2) = (rc
== float_relation_less
) ? op1
.xmm32u(2) : op2
.xmm32u(2);
1995 rc
= float32_compare(op1
.xmm32u(3), op2
.xmm32u(3), status_word
);
1996 op1
.xmm32u(3) = (rc
== float_relation_less
) ? op1
.xmm32u(3) : op2
.xmm32u(3);
1998 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
1999 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2002 BX_INFO(("MINPS_VpsWps: required SSE, use --enable-sse option"));
2003 exception(BX_UD_EXCEPTION
, 0, 0);
2009 * Calculate the minimum double precision FP between XMM2/MEM to XMM1.
2010 * Possible floating point exceptions: #I, #D
2012 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINPD_VpdWpd(bxInstruction_c
*i
)
2014 #if BX_SUPPORT_SSE >= 2
2015 BX_CPU_THIS_PTR
prepareSSE();
2017 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2019 /* op2 is a register or memory reference */
2021 op2
= BX_READ_XMM_REG(i
->rm());
2024 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2025 /* pointer, segment address pair */
2026 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2029 float_status_t status_word
;
2030 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2033 if (MXCSR
.get_DAZ())
2035 op1
.xmm64u(0) = float64_denormal_to_zero(op1
.xmm64u(0));
2036 op1
.xmm64u(1) = float64_denormal_to_zero(op1
.xmm64u(1));
2037 op2
.xmm64u(0) = float64_denormal_to_zero(op2
.xmm64u(0));
2038 op2
.xmm64u(1) = float64_denormal_to_zero(op2
.xmm64u(1));
2041 rc
= float64_compare(op1
.xmm64u(0), op2
.xmm64u(0), status_word
);
2042 op1
.xmm64u(0) = (rc
== float_relation_less
) ? op1
.xmm64u(0) : op2
.xmm64u(0);
2043 rc
= float64_compare(op1
.xmm64u(1), op2
.xmm64u(1), status_word
);
2044 op1
.xmm64u(1) = (rc
== float_relation_less
) ? op1
.xmm64u(1) : op2
.xmm64u(1);
2046 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2047 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2050 BX_INFO(("MINPD_VpdWpd: required SSE2, use --enable-sse option"));
2051 exception(BX_UD_EXCEPTION
, 0, 0);
2057 * Calculate the minimum scalar double precision FP between XMM2/MEM to XMM1.
2058 * Possible floating point exceptions: #I, #D
2060 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINSD_VsdWsd(bxInstruction_c
*i
)
2062 #if BX_SUPPORT_SSE >= 2
2063 BX_CPU_THIS_PTR
prepareSSE();
2065 float64 op1
= BX_READ_XMM_REG_LO_QWORD(i
->nnn()), op2
;
2067 /* op2 is a register or memory reference */
2069 op2
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
2072 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2073 /* pointer, segment address pair */
2074 op2
= read_virtual_qword(i
->seg(), eaddr
);
2077 float_status_t status_word
;
2078 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2080 if (MXCSR
.get_DAZ())
2082 op1
= float64_denormal_to_zero(op1
);
2083 op2
= float64_denormal_to_zero(op2
);
2086 int rc
= float64_compare(op1
, op2
, status_word
);
2087 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2088 BX_WRITE_XMM_REG_LO_QWORD(i
->nnn(),
2089 (rc
== float_relation_less
) ? op1
: op2
);
2092 BX_INFO(("MINSD_VsdWsd: required SSE2, use --enable-sse option"));
2093 exception(BX_UD_EXCEPTION
, 0, 0);
2099 * Calculate the minimum scalar single precision FP between XMM2/MEM to XMM1.
2100 * Possible floating point exceptions: #I, #D
2102 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINSS_VssWss(bxInstruction_c
*i
)
2104 #if BX_SUPPORT_SSE >= 1
2105 BX_CPU_THIS_PTR
prepareSSE();
2107 float32 op1
= BX_READ_XMM_REG_LO_DWORD(i
->nnn()), op2
;
2109 /* op2 is a register or memory reference */
2111 op2
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
2114 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2115 /* pointer, segment address pair */
2116 op2
= read_virtual_dword(i
->seg(), eaddr
);
2119 float_status_t status_word
;
2120 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2122 if (MXCSR
.get_DAZ())
2124 op1
= float32_denormal_to_zero(op1
);
2125 op2
= float32_denormal_to_zero(op2
);
2128 int rc
= float32_compare(op1
, op2
, status_word
);
2129 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2130 BX_WRITE_XMM_REG_LO_DWORD(i
->nnn(),
2131 (rc
== float_relation_less
) ? op1
: op2
);
2134 BX_INFO(("MINSS_VssWss: required SSE, use --enable-sse option"));
2135 exception(BX_UD_EXCEPTION
, 0, 0);
2141 * Divide packed single precision FP numbers from XMM2/MEM to XMM1.
2142 * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
2144 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVPS_VpsWps(bxInstruction_c
*i
)
2146 #if BX_SUPPORT_SSE >= 1
2147 BX_CPU_THIS_PTR
prepareSSE();
2149 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2151 /* op2 is a register or memory reference */
2153 op2
= BX_READ_XMM_REG(i
->rm());
2156 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2157 /* pointer, segment address pair */
2158 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2161 float_status_t status_word
;
2162 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2164 if (MXCSR
.get_DAZ()) {
2165 op1
.xmm32u(0) = float32_denormal_to_zero(op1
.xmm32u(0));
2166 op1
.xmm32u(1) = float32_denormal_to_zero(op1
.xmm32u(1));
2167 op1
.xmm32u(2) = float32_denormal_to_zero(op1
.xmm32u(2));
2168 op1
.xmm32u(3) = float32_denormal_to_zero(op1
.xmm32u(3));
2170 op2
.xmm32u(0) = float32_denormal_to_zero(op2
.xmm32u(0));
2171 op2
.xmm32u(1) = float32_denormal_to_zero(op2
.xmm32u(1));
2172 op2
.xmm32u(2) = float32_denormal_to_zero(op2
.xmm32u(2));
2173 op2
.xmm32u(3) = float32_denormal_to_zero(op2
.xmm32u(3));
2176 op1
.xmm32u(0) = float32_div(op1
.xmm32u(0), op2
.xmm32u(0), status_word
);
2177 op1
.xmm32u(1) = float32_div(op1
.xmm32u(1), op2
.xmm32u(1), status_word
);
2178 op1
.xmm32u(2) = float32_div(op1
.xmm32u(2), op2
.xmm32u(2), status_word
);
2179 op1
.xmm32u(3) = float32_div(op1
.xmm32u(3), op2
.xmm32u(3), status_word
);
2181 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2182 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2185 BX_INFO(("DIVPS_VpsWps: required SSE, use --enable-sse option"));
2186 exception(BX_UD_EXCEPTION
, 0, 0);
2192 * Divide packed double precision FP numbers from XMM2/MEM to XMM1.
2193 * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
2195 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVPD_VpdWpd(bxInstruction_c
*i
)
2197 #if BX_SUPPORT_SSE >= 2
2198 BX_CPU_THIS_PTR
prepareSSE();
2200 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2202 /* op2 is a register or memory reference */
2204 op2
= BX_READ_XMM_REG(i
->rm());
2207 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2208 /* pointer, segment address pair */
2209 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2212 float_status_t status_word
;
2213 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2215 if (MXCSR
.get_DAZ())
2217 op1
.xmm64u(0) = float64_denormal_to_zero(op1
.xmm64u(0));
2218 op1
.xmm64u(1) = float64_denormal_to_zero(op1
.xmm64u(1));
2219 op2
.xmm64u(0) = float64_denormal_to_zero(op2
.xmm64u(0));
2220 op2
.xmm64u(1) = float64_denormal_to_zero(op2
.xmm64u(1));
2223 op1
.xmm64u(0) = float64_div(op1
.xmm64u(0), op2
.xmm64u(0), status_word
);
2224 op1
.xmm64u(1) = float64_div(op1
.xmm64u(1), op2
.xmm64u(1), status_word
);
2226 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2227 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2230 BX_INFO(("DIVPD_VpdWpd: required SSE2, use --enable-sse option"));
2231 exception(BX_UD_EXCEPTION
, 0, 0);
2237 * Divide the lower double precision FP number from XMM2/MEM to XMM1.
2238 * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
2240 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVSD_VsdWsd(bxInstruction_c
*i
)
2242 #if BX_SUPPORT_SSE >= 2
2243 BX_CPU_THIS_PTR
prepareSSE();
2245 float64 op1
= BX_READ_XMM_REG_LO_QWORD(i
->nnn()), op2
;
2247 /* op2 is a register or memory reference */
2249 op2
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
2252 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2253 /* pointer, segment address pair */
2254 op2
= read_virtual_qword(i
->seg(), eaddr
);
2257 float_status_t status_word
;
2258 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2260 if (MXCSR
.get_DAZ())
2262 op1
= float64_denormal_to_zero(op1
);
2263 op2
= float64_denormal_to_zero(op2
);
2266 op1
= float64_div(op1
, op2
, status_word
);
2267 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2268 BX_WRITE_XMM_REG_LO_QWORD(i
->nnn(), op1
);
2271 BX_INFO(("DIVSD_VsdWsd: required SSE2, use --enable-sse option"));
2272 exception(BX_UD_EXCEPTION
, 0, 0);
2278 * Divide the lower single precision FP number from XMM2/MEM to XMM1.
2279 * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
2281 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVSS_VssWss(bxInstruction_c
*i
)
2283 #if BX_SUPPORT_SSE >= 1
2284 BX_CPU_THIS_PTR
prepareSSE();
2286 float32 op1
= BX_READ_XMM_REG_LO_DWORD(i
->nnn()), op2
;
2288 /* op2 is a register or memory reference */
2290 op2
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
2293 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2294 /* pointer, segment address pair */
2295 op2
= read_virtual_dword(i
->seg(), eaddr
);
2298 float_status_t status_word
;
2299 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2301 if (MXCSR
.get_DAZ())
2303 op1
= float32_denormal_to_zero(op1
);
2304 op2
= float32_denormal_to_zero(op2
);
2307 op1
= float32_div(op1
, op2
, status_word
);
2308 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2309 BX_WRITE_XMM_REG_LO_DWORD(i
->nnn(), op1
);
2312 BX_INFO(("DIVSS_VssWss: required SSE, use --enable-sse option"));
2313 exception(BX_UD_EXCEPTION
, 0, 0);
2319 * Calculate the maximum single precision FP between XMM2/MEM to XMM1.
2320 * Possible floating point exceptions: #I, #D
2322 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXPS_VpsWps(bxInstruction_c
*i
)
2324 #if BX_SUPPORT_SSE >= 1
2325 BX_CPU_THIS_PTR
prepareSSE();
2327 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2329 /* op2 is a register or memory reference */
2331 op2
= BX_READ_XMM_REG(i
->rm());
2334 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2335 /* pointer, segment address pair */
2336 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2339 float_status_t status_word
;
2340 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2343 if (MXCSR
.get_DAZ()) {
2344 op1
.xmm32u(0) = float32_denormal_to_zero(op1
.xmm32u(0));
2345 op1
.xmm32u(1) = float32_denormal_to_zero(op1
.xmm32u(1));
2346 op1
.xmm32u(2) = float32_denormal_to_zero(op1
.xmm32u(2));
2347 op1
.xmm32u(3) = float32_denormal_to_zero(op1
.xmm32u(3));
2349 op2
.xmm32u(0) = float32_denormal_to_zero(op2
.xmm32u(0));
2350 op2
.xmm32u(1) = float32_denormal_to_zero(op2
.xmm32u(1));
2351 op2
.xmm32u(2) = float32_denormal_to_zero(op2
.xmm32u(2));
2352 op2
.xmm32u(3) = float32_denormal_to_zero(op2
.xmm32u(3));
2355 rc
= float32_compare(op1
.xmm32u(0), op2
.xmm32u(0), status_word
);
2356 op1
.xmm32u(0) = (rc
== float_relation_greater
) ? op1
.xmm32u(0) : op2
.xmm32u(0);
2357 rc
= float32_compare(op1
.xmm32u(1), op2
.xmm32u(1), status_word
);
2358 op1
.xmm32u(1) = (rc
== float_relation_greater
) ? op1
.xmm32u(1) : op2
.xmm32u(1);
2359 rc
= float32_compare(op1
.xmm32u(2), op2
.xmm32u(2), status_word
);
2360 op1
.xmm32u(2) = (rc
== float_relation_greater
) ? op1
.xmm32u(2) : op2
.xmm32u(2);
2361 rc
= float32_compare(op1
.xmm32u(3), op2
.xmm32u(3), status_word
);
2362 op1
.xmm32u(3) = (rc
== float_relation_greater
) ? op1
.xmm32u(3) : op2
.xmm32u(3);
2364 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2365 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2368 BX_INFO(("MAXPS_VpsWps: required SSE, use --enable-sse option"));
2369 exception(BX_UD_EXCEPTION
, 0, 0);
2375 * Calculate the maximum double precision FP between XMM2/MEM to XMM1.
2376 * Possible floating point exceptions: #I, #D
2378 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXPD_VpdWpd(bxInstruction_c
*i
)
2380 #if BX_SUPPORT_SSE >= 2
2381 BX_CPU_THIS_PTR
prepareSSE();
2383 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2385 /* op2 is a register or memory reference */
2387 op2
= BX_READ_XMM_REG(i
->rm());
2390 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2391 /* pointer, segment address pair */
2392 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2395 float_status_t status_word
;
2396 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2399 if (MXCSR
.get_DAZ())
2401 op1
.xmm64u(0) = float64_denormal_to_zero(op1
.xmm64u(0));
2402 op1
.xmm64u(1) = float64_denormal_to_zero(op1
.xmm64u(1));
2403 op2
.xmm64u(0) = float64_denormal_to_zero(op2
.xmm64u(0));
2404 op2
.xmm64u(1) = float64_denormal_to_zero(op2
.xmm64u(1));
2407 rc
= float64_compare(op1
.xmm64u(0), op2
.xmm64u(0), status_word
);
2408 op1
.xmm64u(0) = (rc
== float_relation_greater
) ? op1
.xmm64u(0) : op2
.xmm64u(0);
2409 rc
= float64_compare(op1
.xmm64u(1), op2
.xmm64u(1), status_word
);
2410 op1
.xmm64u(1) = (rc
== float_relation_greater
) ? op1
.xmm64u(1) : op2
.xmm64u(1);
2412 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2413 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2416 BX_INFO(("MAXPD_VpdWpd: required SSE2, use --enable-sse option"));
2417 exception(BX_UD_EXCEPTION
, 0, 0);
2423 * Calculate the maximum scalar double precision FP between XMM2/MEM to XMM1.
2424 * Possible floating point exceptions: #I, #D
2426 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXSD_VsdWsd(bxInstruction_c
*i
)
2428 #if BX_SUPPORT_SSE >= 2
2429 BX_CPU_THIS_PTR
prepareSSE();
2431 float64 op1
= BX_READ_XMM_REG_LO_QWORD(i
->nnn()), op2
;
2433 /* op2 is a register or memory reference */
2435 op2
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
2438 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2439 /* pointer, segment address pair */
2440 op2
= read_virtual_qword(i
->seg(), eaddr
);
2443 float_status_t status_word
;
2444 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2446 if (MXCSR
.get_DAZ())
2448 op1
= float64_denormal_to_zero(op1
);
2449 op2
= float64_denormal_to_zero(op2
);
2452 int rc
= float64_compare(op1
, op2
, status_word
);
2453 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2454 BX_WRITE_XMM_REG_LO_QWORD(i
->nnn(),
2455 (rc
== float_relation_greater
) ? op1
: op2
);
2458 BX_INFO(("MAXSD_VsdWsd: required SSE2, use --enable-sse option"));
2459 exception(BX_UD_EXCEPTION
, 0, 0);
2465 * Calculate the maxumim scalar single precision FP between XMM2/MEM to XMM1.
2466 * Possible floating point exceptions: #I, #D
2468 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXSS_VssWss(bxInstruction_c
*i
)
2470 #if BX_SUPPORT_SSE >= 1
2471 BX_CPU_THIS_PTR
prepareSSE();
2473 float32 op1
= BX_READ_XMM_REG_LO_DWORD(i
->nnn()), op2
;
2475 /* op2 is a register or memory reference */
2477 op2
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
2480 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2481 /* pointer, segment address pair */
2482 op2
= read_virtual_dword(i
->seg(), eaddr
);
2485 float_status_t status_word
;
2486 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2488 if (MXCSR
.get_DAZ())
2490 op1
= float32_denormal_to_zero(op1
);
2491 op2
= float32_denormal_to_zero(op2
);
2494 int rc
= float32_compare(op1
, op2
, status_word
);
2495 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2496 BX_WRITE_XMM_REG_LO_DWORD(i
->nnn(),
2497 (rc
== float_relation_greater
) ? op1
: op2
);
2500 BX_INFO(("MAXSS_VssWss: required SSE, use --enable-sse option"));
2501 exception(BX_UD_EXCEPTION
, 0, 0);
2507 * Add horizontally packed double precision FP in XMM2/MEM from XMM1.
2508 * Possible floating point exceptions: #I, #D, #O, #U, #P
2510 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HADDPD_VpdWpd(bxInstruction_c
*i
)
2512 #if BX_SUPPORT_SSE >= 3
2513 BX_CPU_THIS_PTR
prepareSSE();
2515 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2517 /* op2 is a register or memory reference */
2519 op2
= BX_READ_XMM_REG(i
->rm());
2522 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2523 /* pointer, segment address pair */
2524 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2527 float_status_t status_word
;
2528 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2530 if (MXCSR
.get_DAZ())
2532 op1
.xmm64u(0) = float64_denormal_to_zero(op1
.xmm64u(0));
2533 op1
.xmm64u(1) = float64_denormal_to_zero(op1
.xmm64u(1));
2534 op2
.xmm64u(0) = float64_denormal_to_zero(op2
.xmm64u(0));
2535 op2
.xmm64u(1) = float64_denormal_to_zero(op2
.xmm64u(1));
2538 op1
.xmm64u(0) = float64_add(op1
.xmm64u(0), op1
.xmm64u(1), status_word
);
2539 op1
.xmm64u(1) = float64_add(op2
.xmm64u(0), op2
.xmm64u(1), status_word
);
2541 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2542 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2545 BX_INFO(("HADDPD_VpdWpd: required SSE3, use --enable-sse option"));
2546 exception(BX_UD_EXCEPTION
, 0, 0);
2552 * Add horizontally packed single precision FP in XMM2/MEM from XMM1.
2553 * Possible floating point exceptions: #I, #D, #O, #U, #P
2555 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HADDPS_VpsWps(bxInstruction_c
*i
)
2557 #if BX_SUPPORT_SSE >= 3
2558 BX_CPU_THIS_PTR
prepareSSE();
2560 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2562 /* op2 is a register or memory reference */
2564 op2
= BX_READ_XMM_REG(i
->rm());
2567 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2568 /* pointer, segment address pair */
2569 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2572 float_status_t status_word
;
2573 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2575 if (MXCSR
.get_DAZ()) {
2576 op1
.xmm32u(0) = float32_denormal_to_zero(op1
.xmm32u(0));
2577 op1
.xmm32u(1) = float32_denormal_to_zero(op1
.xmm32u(1));
2578 op1
.xmm32u(2) = float32_denormal_to_zero(op1
.xmm32u(2));
2579 op1
.xmm32u(3) = float32_denormal_to_zero(op1
.xmm32u(3));
2581 op2
.xmm32u(0) = float32_denormal_to_zero(op2
.xmm32u(0));
2582 op2
.xmm32u(1) = float32_denormal_to_zero(op2
.xmm32u(1));
2583 op2
.xmm32u(2) = float32_denormal_to_zero(op2
.xmm32u(2));
2584 op2
.xmm32u(3) = float32_denormal_to_zero(op2
.xmm32u(3));
2587 op1
.xmm32u(0) = float32_add(op1
.xmm32u(0), op1
.xmm32u(1), status_word
);
2588 op1
.xmm32u(1) = float32_add(op1
.xmm32u(2), op1
.xmm32u(3), status_word
);
2589 op1
.xmm32u(2) = float32_add(op2
.xmm32u(0), op2
.xmm32u(1), status_word
);
2590 op1
.xmm32u(3) = float32_add(op2
.xmm32u(2), op2
.xmm32u(3), status_word
);
2592 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2593 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2596 BX_INFO(("HADDPS_VpsWps: required SSE3, use --enable-sse option"));
2597 exception(BX_UD_EXCEPTION
, 0, 0);
2603 * Subtract horizontally packed double precision FP in XMM2/MEM from XMM1.
2604 * Possible floating point exceptions: #I, #D, #O, #U, #P
2606 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HSUBPD_VpdWpd(bxInstruction_c
*i
)
2608 #if BX_SUPPORT_SSE >= 3
2609 BX_CPU_THIS_PTR
prepareSSE();
2611 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2613 /* op2 is a register or memory reference */
2615 op2
= BX_READ_XMM_REG(i
->rm());
2618 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2619 /* pointer, segment address pair */
2620 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2623 float_status_t status_word
;
2624 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2626 if (MXCSR
.get_DAZ())
2628 op1
.xmm64u(0) = float64_denormal_to_zero(op1
.xmm64u(0));
2629 op1
.xmm64u(1) = float64_denormal_to_zero(op1
.xmm64u(1));
2630 op2
.xmm64u(0) = float64_denormal_to_zero(op2
.xmm64u(0));
2631 op2
.xmm64u(1) = float64_denormal_to_zero(op2
.xmm64u(1));
2634 op1
.xmm64u(0) = float64_sub(op1
.xmm64u(0), op1
.xmm64u(1), status_word
);
2635 op1
.xmm64u(1) = float64_sub(op2
.xmm64u(0), op2
.xmm64u(1), status_word
);
2637 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2638 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2641 BX_INFO(("HSUBPD_VpdWpd: required SSE3, use --enable-sse option"));
2642 exception(BX_UD_EXCEPTION
, 0, 0);
2648 * Subtract horizontally packed single precision FP in XMM2/MEM from XMM1.
2649 * Possible floating point exceptions: #I, #D, #O, #U, #P
2651 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HSUBPS_VpsWps(bxInstruction_c
*i
)
2653 #if BX_SUPPORT_SSE >= 3
2654 BX_CPU_THIS_PTR
prepareSSE();
2656 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2658 /* op2 is a register or memory reference */
2660 op2
= BX_READ_XMM_REG(i
->rm());
2663 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2664 /* pointer, segment address pair */
2665 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2668 float_status_t status_word
;
2669 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2671 if (MXCSR
.get_DAZ()) {
2672 op1
.xmm32u(0) = float32_denormal_to_zero(op1
.xmm32u(0));
2673 op1
.xmm32u(1) = float32_denormal_to_zero(op1
.xmm32u(1));
2674 op1
.xmm32u(2) = float32_denormal_to_zero(op1
.xmm32u(2));
2675 op1
.xmm32u(3) = float32_denormal_to_zero(op1
.xmm32u(3));
2677 op2
.xmm32u(0) = float32_denormal_to_zero(op2
.xmm32u(0));
2678 op2
.xmm32u(1) = float32_denormal_to_zero(op2
.xmm32u(1));
2679 op2
.xmm32u(2) = float32_denormal_to_zero(op2
.xmm32u(2));
2680 op2
.xmm32u(3) = float32_denormal_to_zero(op2
.xmm32u(3));
2683 op1
.xmm32u(0) = float32_sub(op1
.xmm32u(0), op1
.xmm32u(1), status_word
);
2684 op1
.xmm32u(1) = float32_sub(op1
.xmm32u(2), op1
.xmm32u(3), status_word
);
2685 op1
.xmm32u(2) = float32_sub(op2
.xmm32u(0), op2
.xmm32u(1), status_word
);
2686 op1
.xmm32u(3) = float32_sub(op2
.xmm32u(2), op2
.xmm32u(3), status_word
);
2688 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2689 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2692 BX_INFO(("HSUBPS_VpsWps: required SSE3, use --enable-sse option"));
2693 exception(BX_UD_EXCEPTION
, 0, 0);
2699 * Compare packed single precision FP values using Ib as comparison predicate.
2700 * Possible floating point exceptions: #I, #D
2702 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPPS_VpsWpsIb(bxInstruction_c
*i
)
2704 #if BX_SUPPORT_SSE >= 1
2705 BX_CPU_THIS_PTR
prepareSSE();
2707 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2709 /* op2 is a register or memory reference */
2711 op2
= BX_READ_XMM_REG(i
->rm());
2714 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2715 /* pointer, segment address pair */
2716 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2719 float_status_t status
;
2720 mxcsr_to_softfloat_status_word(status
, MXCSR
);
2723 /* mask used bits, ignore reserved */
2725 BX_ERROR(("CMPPS_VpsWpsIb: unrecognized predicate %u", i
->Ib()));
2729 if (MXCSR
.get_DAZ()) {
2730 op1
.xmm32u(0) = float32_denormal_to_zero(op1
.xmm32u(0));
2731 op1
.xmm32u(1) = float32_denormal_to_zero(op1
.xmm32u(1));
2732 op1
.xmm32u(2) = float32_denormal_to_zero(op1
.xmm32u(2));
2733 op1
.xmm32u(3) = float32_denormal_to_zero(op1
.xmm32u(3));
2735 op2
.xmm32u(0) = float32_denormal_to_zero(op2
.xmm32u(0));
2736 op2
.xmm32u(1) = float32_denormal_to_zero(op2
.xmm32u(1));
2737 op2
.xmm32u(2) = float32_denormal_to_zero(op2
.xmm32u(2));
2738 op2
.xmm32u(3) = float32_denormal_to_zero(op2
.xmm32u(3));
2744 compare32
[ib
](op1
.xmm32u(0), op2
.xmm32u(0), status
) ? 0xFFFFFFFF : 0;
2746 compare32
[ib
](op1
.xmm32u(1), op2
.xmm32u(1), status
) ? 0xFFFFFFFF : 0;
2748 compare32
[ib
](op1
.xmm32u(2), op2
.xmm32u(2), status
) ? 0xFFFFFFFF : 0;
2750 compare32
[ib
](op1
.xmm32u(3), op2
.xmm32u(3), status
) ? 0xFFFFFFFF : 0;
2757 compare32
[ib
](op1
.xmm32u(0), op2
.xmm32u(0), status
) ? 0 : 0xFFFFFFFF;
2759 compare32
[ib
](op1
.xmm32u(1), op2
.xmm32u(1), status
) ? 0 : 0xFFFFFFFF;
2761 compare32
[ib
](op1
.xmm32u(2), op2
.xmm32u(2), status
) ? 0 : 0xFFFFFFFF;
2763 compare32
[ib
](op1
.xmm32u(3), op2
.xmm32u(3), status
) ? 0 : 0xFFFFFFFF;
2766 BX_CPU_THIS_PTR
check_exceptionsSSE(status
.float_exception_flags
);
2767 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2770 BX_INFO(("CMPPS_VpsWpsIb: required SSE, use --enable-sse option"));
2771 exception(BX_UD_EXCEPTION
, 0, 0);
2777 * Compare packed double precision FP values using Ib as comparison predicate.
2778 * Possible floating point exceptions: #I, #D
2780 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPPD_VpdWpdIb(bxInstruction_c
*i
)
2782 #if BX_SUPPORT_SSE >= 2
2783 BX_CPU_THIS_PTR
prepareSSE();
2785 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2787 /* op2 is a register or memory reference */
2789 op2
= BX_READ_XMM_REG(i
->rm());
2792 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2793 /* pointer, segment address pair */
2794 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2797 float_status_t status
;
2798 mxcsr_to_softfloat_status_word(status
, MXCSR
);
2801 /* mask used bits, ignore reserved */
2803 BX_ERROR(("CMPPD_VpdWpdIb: unrecognized predicate %u", i
->Ib()));
2807 if (MXCSR
.get_DAZ())
2809 op1
.xmm64u(0) = float64_denormal_to_zero(op1
.xmm64u(0));
2810 op1
.xmm64u(1) = float64_denormal_to_zero(op1
.xmm64u(1));
2811 op2
.xmm64u(0) = float64_denormal_to_zero(op2
.xmm64u(0));
2812 op2
.xmm64u(1) = float64_denormal_to_zero(op2
.xmm64u(1));
2817 op1
.xmm64u(0) = compare64
[ib
](op1
.xmm64u(0), op2
.xmm64u(0), status
) ?
2818 BX_CONST64(0xFFFFFFFFFFFFFFFF) : 0;
2819 op1
.xmm64u(1) = compare64
[ib
](op1
.xmm64u(1), op2
.xmm64u(1), status
) ?
2820 BX_CONST64(0xFFFFFFFFFFFFFFFF) : 0;
2826 op1
.xmm64u(0) = compare64
[ib
](op1
.xmm64u(0), op2
.xmm64u(0), status
) ?
2827 0 : BX_CONST64(0xFFFFFFFFFFFFFFFF);
2828 op1
.xmm64u(1) = compare64
[ib
](op1
.xmm64u(1), op2
.xmm64u(1), status
) ?
2829 0 : BX_CONST64(0xFFFFFFFFFFFFFFFF);
2832 BX_CPU_THIS_PTR
check_exceptionsSSE(status
.float_exception_flags
);
2833 BX_WRITE_XMM_REG(i
->nnn(), op1
);
2836 BX_INFO(("CMPPD_VpdWpdIb: required SSE2, use --enable-sse option"));
2837 exception(BX_UD_EXCEPTION
, 0, 0);
2843 * Compare double precision FP values using Ib as comparison predicate.
2844 * Possible floating point exceptions: #I, #D
2846 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPSD_VsdWsdIb(bxInstruction_c
*i
)
2848 #if BX_SUPPORT_SSE >= 2
2849 BX_CPU_THIS_PTR
prepareSSE();
2851 float64 op1
= BX_READ_XMM_REG_LO_QWORD(i
->nnn()), op2
, result
= 0;
2853 /* op2 is a register or memory reference */
2855 op2
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
2858 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2859 /* pointer, segment address pair */
2860 op2
= read_virtual_qword(i
->seg(), eaddr
);
2863 float_status_t status_word
;
2864 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2867 /* mask used bits, ignore reserved */
2869 BX_ERROR(("CMPSD_VsdWsdIb: unrecognized predicate %u", i
->Ib()));
2873 if (MXCSR
.get_DAZ())
2875 op1
= float64_denormal_to_zero(op1
);
2876 op2
= float64_denormal_to_zero(op2
);
2880 if(compare64
[ib
](op1
, op2
, status_word
)) {
2881 result
= BX_CONST64(0xFFFFFFFFFFFFFFFF);
2886 if(compare64
[ib
-4](op1
, op2
, status_word
)) {
2889 result
= BX_CONST64(0xFFFFFFFFFFFFFFFF);
2893 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2894 BX_WRITE_XMM_REG_LO_QWORD(i
->nnn(), result
);
2896 BX_INFO(("CMPSD_VsdWsdIb: required SSE2, use --enable-sse option"));
2897 exception(BX_UD_EXCEPTION
, 0, 0);
2903 * Compare single precision FP values using Ib as comparison predicate.
2904 * Possible floating point exceptions: #I, #D
2906 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPSS_VssWssIb(bxInstruction_c
*i
)
2908 #if BX_SUPPORT_SSE >= 1
2909 BX_CPU_THIS_PTR
prepareSSE();
2911 float32 op1
= BX_READ_XMM_REG_LO_DWORD(i
->nnn()), op2
, result
= 0;
2913 /* op2 is a register or memory reference */
2915 op2
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
2918 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2919 /* pointer, segment address pair */
2920 op2
= read_virtual_dword(i
->seg(), eaddr
);
2923 float_status_t status_word
;
2924 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2927 /* mask used bits, ignore reserved */
2929 BX_ERROR(("CMPSS_VssWssIb: unrecognized predicate %u", i
->Ib()));
2933 if (MXCSR
.get_DAZ())
2935 op1
= float32_denormal_to_zero(op1
);
2936 op2
= float32_denormal_to_zero(op2
);
2940 if(compare32
[ib
](op1
, op2
, status_word
)) {
2941 result
= 0xFFFFFFFF;
2946 if(compare32
[ib
-4](op1
, op2
, status_word
)) {
2949 result
= 0xFFFFFFFF;
2953 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2954 BX_WRITE_XMM_REG_LO_DWORD(i
->nnn(), result
);
2956 BX_INFO(("CMPSS_VssWssIb: required SSE, use --enable-sse option"));
2957 exception(BX_UD_EXCEPTION
, 0, 0);
2963 * Add/Subtract packed double precision FP numbers from XMM2/MEM to XMM1.
2964 * Possible floating point exceptions: #I, #D, #O, #U, #P
2966 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSUBPD_VpdWpd(bxInstruction_c
*i
)
2968 #if BX_SUPPORT_SSE >= 3
2969 BX_CPU_THIS_PTR
prepareSSE();
2971 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
2973 /* op2 is a register or memory reference */
2975 op2
= BX_READ_XMM_REG(i
->rm());
2978 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
2979 /* pointer, segment address pair */
2980 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
2983 float_status_t status_word
;
2984 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
2986 if (MXCSR
.get_DAZ())
2988 op1
.xmm64u(0) = float64_denormal_to_zero(op1
.xmm64u(0));
2989 op1
.xmm64u(1) = float64_denormal_to_zero(op1
.xmm64u(1));
2990 op2
.xmm64u(0) = float64_denormal_to_zero(op2
.xmm64u(0));
2991 op2
.xmm64u(1) = float64_denormal_to_zero(op2
.xmm64u(1));
2994 op1
.xmm64u(0) = float64_sub(op1
.xmm64u(0), op2
.xmm64u(0), status_word
);
2995 op1
.xmm64u(1) = float64_add(op1
.xmm64u(1), op2
.xmm64u(1), status_word
);
2997 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
2998 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3001 BX_INFO(("ADDSUBPD_VpdWpd: required SSE3, use --enable-sse option"));
3002 exception(BX_UD_EXCEPTION
, 0, 0);
3008 * Add/Substract packed single precision FP numbers from XMM2/MEM to XMM1.
3009 * Possible floating point exceptions: #I, #D, #O, #U, #P
3011 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSUBPS_VpsWps(bxInstruction_c
*i
)
3013 #if BX_SUPPORT_SSE >= 3
3014 BX_CPU_THIS_PTR
prepareSSE();
3016 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
3018 /* op2 is a register or memory reference */
3020 op2
= BX_READ_XMM_REG(i
->rm());
3023 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3024 /* pointer, segment address pair */
3025 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3028 float_status_t status_word
;
3029 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
3031 if (MXCSR
.get_DAZ()) {
3032 op1
.xmm32u(0) = float32_denormal_to_zero(op1
.xmm32u(0));
3033 op1
.xmm32u(1) = float32_denormal_to_zero(op1
.xmm32u(1));
3034 op1
.xmm32u(2) = float32_denormal_to_zero(op1
.xmm32u(2));
3035 op1
.xmm32u(3) = float32_denormal_to_zero(op1
.xmm32u(3));
3037 op2
.xmm32u(0) = float32_denormal_to_zero(op2
.xmm32u(0));
3038 op2
.xmm32u(1) = float32_denormal_to_zero(op2
.xmm32u(1));
3039 op2
.xmm32u(2) = float32_denormal_to_zero(op2
.xmm32u(2));
3040 op2
.xmm32u(3) = float32_denormal_to_zero(op2
.xmm32u(3));
3043 op1
.xmm32u(0) = float32_sub(op1
.xmm32u(0), op2
.xmm32u(0), status_word
);
3044 op1
.xmm32u(1) = float32_add(op1
.xmm32u(1), op2
.xmm32u(1), status_word
);
3045 op1
.xmm32u(2) = float32_sub(op1
.xmm32u(2), op2
.xmm32u(2), status_word
);
3046 op1
.xmm32u(3) = float32_add(op1
.xmm32u(3), op2
.xmm32u(3), status_word
);
3048 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
3049 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3052 BX_INFO(("ADDSUBPS_VpsWps: required SSE3, use --enable-sse option"));
3053 exception(BX_UD_EXCEPTION
, 0, 0);
3057 // for 3-byte opcodes
3058 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
3061 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDPS_VpsWpsIb(bxInstruction_c
*i
)
3063 #if BX_SUPPORT_SSE >= 4
3064 BX_CPU_THIS_PTR
prepareSSE();
3066 BxPackedXmmRegister op
;
3068 /* op is a register or memory reference */
3070 op
= BX_READ_XMM_REG(i
->rm());
3073 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3074 /* pointer, segment address pair */
3075 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
3078 float_status_t status_word
;
3079 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
3080 Bit8u control
= i
->Ib();
3082 // override MXCSR rounding mode with control coming from imm8
3083 if ((control
& 0x4) == 0)
3084 status_word
.float_rounding_mode
= control
& 0x3;
3086 if (MXCSR
.get_DAZ()) {
3087 op
.xmm32u(0) = float32_denormal_to_zero(op
.xmm32u(0));
3088 op
.xmm32u(1) = float32_denormal_to_zero(op
.xmm32u(1));
3089 op
.xmm32u(2) = float32_denormal_to_zero(op
.xmm32u(2));
3090 op
.xmm32u(3) = float32_denormal_to_zero(op
.xmm32u(3));
3093 for (unsigned j
=0; j
< 4; j
++) {
3094 if (float32_is_nan(op
.xmm32u(j
))) {
3095 op
.xmm32u(j
) = propagateFloat32NaN(op
.xmm32u(j
), status_word
);
3098 op
.xmm32u(j
) = float32_to_int32(op
.xmm32u(j
), status_word
);
3099 op
.xmm32u(j
) = int32_to_float32(op
.xmm32u(j
), status_word
);
3103 // ignore precision exception result
3105 status_word
.float_exception_flags
&= ~float_flag_inexact
;
3107 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
3108 BX_WRITE_XMM_REG(i
->nnn(), op
);
3110 BX_INFO(("ROUNDPS_VpsWpsIb: required SSE4, use --enable-sse option"));
3111 exception(BX_UD_EXCEPTION
, 0, 0);
3116 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDPD_VpdWpdIb(bxInstruction_c
*i
)
3118 #if BX_SUPPORT_SSE >= 4
3119 BX_CPU_THIS_PTR
prepareSSE();
3121 BxPackedXmmRegister op
;
3123 /* op is a register or memory reference */
3125 op
= BX_READ_XMM_REG(i
->rm());
3128 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3129 /* pointer, segment address pair */
3130 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
3133 float_status_t status_word
;
3134 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
3135 Bit8u control
= i
->Ib();
3137 // override MXCSR rounding mode with control coming from imm8
3138 if ((control
& 0x4) == 0)
3139 status_word
.float_rounding_mode
= control
& 0x3;
3141 if (MXCSR
.get_DAZ()) {
3142 op
.xmm64u(0) = float64_denormal_to_zero(op
.xmm64u(0));
3143 op
.xmm64u(1) = float64_denormal_to_zero(op
.xmm64u(1));
3146 for (unsigned j
=0; j
< 2; j
++) {
3147 if (float64_is_nan(op
.xmm64u(j
))) {
3148 op
.xmm64u(j
) = propagateFloat64NaN(op
.xmm64u(j
), status_word
);
3151 op
.xmm64u(j
) = float64_to_int64(op
.xmm64u(j
), status_word
);
3152 op
.xmm64u(j
) = int64_to_float64(op
.xmm64u(j
), status_word
);
3156 // ignore precision exception result
3158 status_word
.float_exception_flags
&= ~float_flag_inexact
;
3160 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
3161 BX_WRITE_XMM_REG(i
->nnn(), op
);
3163 BX_INFO(("ROUNDPD_VpdWpdIb: required SSE4, use --enable-sse option"));
3164 exception(BX_UD_EXCEPTION
, 0, 0);
3169 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDSS_VssWssIb(bxInstruction_c
*i
)
3171 #if BX_SUPPORT_SSE >= 4
3172 BX_CPU_THIS_PTR
prepareSSE();
3176 /* op is a register or memory reference */
3178 op
= BX_READ_XMM_REG_LO_DWORD(i
->rm());
3181 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3182 /* pointer, segment address pair */
3183 op
= read_virtual_dword(i
->seg(), eaddr
);
3186 float_status_t status_word
;
3187 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
3188 Bit8u control
= i
->Ib();
3190 // override MXCSR rounding mode with control coming from imm8
3191 if ((control
& 0x4) == 0)
3192 status_word
.float_rounding_mode
= control
& 0x3;
3194 if (MXCSR
.get_DAZ()) op
= float32_denormal_to_zero(op
);
3196 if (float32_is_nan(op
)) {
3197 op
= propagateFloat32NaN(op
, status_word
);
3200 op
= int32_to_float32(float32_to_int32(op
, status_word
), status_word
);
3203 // ignore precision exception result
3205 status_word
.float_exception_flags
&= ~float_flag_inexact
;
3207 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
3208 BX_WRITE_XMM_REG_LO_DWORD(i
->nnn(), op
);
3210 BX_INFO(("ROUNDSS_VssWssIb: required SSE4, use --enable-sse option"));
3211 exception(BX_UD_EXCEPTION
, 0, 0);
3216 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDSD_VsdWsdIb(bxInstruction_c
*i
)
3218 #if BX_SUPPORT_SSE >= 4
3219 BX_CPU_THIS_PTR
prepareSSE();
3223 /* op is a register or memory reference */
3225 op
= BX_READ_XMM_REG_LO_QWORD(i
->rm());
3228 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3229 /* pointer, segment address pair */
3230 op
= read_virtual_qword(i
->seg(), eaddr
);
3233 float_status_t status_word
;
3234 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
3235 Bit8u control
= i
->Ib();
3237 // override MXCSR rounding mode with control coming from imm8
3238 if ((control
& 0x4) == 0)
3239 status_word
.float_rounding_mode
= control
& 0x3;
3241 if (MXCSR
.get_DAZ()) op
= float64_denormal_to_zero(op
);
3243 if (float64_is_nan(op
))
3244 op
= propagateFloat64NaN(op
, status_word
);
3246 op
= int64_to_float64(float64_to_int64(op
, status_word
), status_word
);
3248 // ignore precision exception result
3250 status_word
.float_exception_flags
&= ~float_flag_inexact
;
3252 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
3253 BX_WRITE_XMM_REG_LO_QWORD(i
->nnn(), op
);
3255 BX_INFO(("ROUNDSD_VsdWsdIb: required SSE4, use --enable-sse option"));
3256 exception(BX_UD_EXCEPTION
, 0, 0);
3260 /* Opcode: 66 0F 3A 40
3261 * Selectively multiply packed SP floating-point values from xmm1 with
3262 * packed SP floating-point values from xmm2, add and selectively
3263 * store the packed SP floating-point values or zero values to xmm1
3265 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPS_VpsWpsIb(bxInstruction_c
*i
)
3267 #if BX_SUPPORT_SSE >= 4
3268 BX_CPU_THIS_PTR
prepareSSE();
3270 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, tmp
;
3271 Bit8u mask
= i
->Ib();
3273 /* op2 is a register or memory reference */
3275 op2
= BX_READ_XMM_REG(i
->rm());
3278 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3279 /* pointer, segment address pair */
3280 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3283 float_status_t status_word
;
3284 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
3286 if (MXCSR
.get_DAZ()) {
3287 op1
.xmm32u(0) = float32_denormal_to_zero(op1
.xmm32u(0));
3288 op1
.xmm32u(1) = float32_denormal_to_zero(op1
.xmm32u(1));
3289 op1
.xmm32u(2) = float32_denormal_to_zero(op1
.xmm32u(2));
3290 op1
.xmm32u(3) = float32_denormal_to_zero(op1
.xmm32u(3));
3292 op2
.xmm32u(0) = float32_denormal_to_zero(op2
.xmm32u(0));
3293 op2
.xmm32u(1) = float32_denormal_to_zero(op2
.xmm32u(1));
3294 op2
.xmm32u(2) = float32_denormal_to_zero(op2
.xmm32u(2));
3295 op2
.xmm32u(3) = float32_denormal_to_zero(op2
.xmm32u(3));
3298 tmp
.xmm64u(0) = tmp
.xmm64u(1) = 0;
3301 tmp
.xmm32u(0) = float32_mul(op1
.xmm32u(0), op2
.xmm32u(0), status_word
);
3303 tmp
.xmm32u(1) = float32_mul(op1
.xmm32u(1), op2
.xmm32u(1), status_word
);
3305 tmp
.xmm32u(2) = float32_mul(op1
.xmm32u(2), op2
.xmm32u(2), status_word
);
3307 tmp
.xmm32u(3) = float32_mul(op1
.xmm32u(3), op2
.xmm32u(3), status_word
);
3309 float32 r1
= float32_add(tmp
.xmm32u(0), tmp
.xmm32u(1), status_word
);
3310 float32 r2
= float32_add(tmp
.xmm32u(2), tmp
.xmm32u(3), status_word
);
3311 float32 r
= float32_add(r1
, r2
, status_word
);
3313 op1
.xmm64u(0) = op1
.xmm64u(1) = 0;
3315 if (mask
& 0x01) op1
.xmm32u(0) = r
;
3316 if (mask
& 0x02) op1
.xmm32u(1) = r
;
3317 if (mask
& 0x04) op1
.xmm32u(2) = r
;
3318 if (mask
& 0x08) op1
.xmm32u(3) = r
;
3320 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
3321 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3323 BX_INFO(("DPPS_VpsWpsIb: required SSE4, use --enable-sse option"));
3324 exception(BX_UD_EXCEPTION
, 0, 0);
3328 /* Opcode: 66 0F 3A 41
3329 * Selectively multiply packed DP floating-point values from xmm1 with
3330 * packed DP floating-point values from xmm2, add and selectively
3331 * store the packed DP floating-point values or zero values to xmm1
3333 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPD_VpdWpdIb(bxInstruction_c
*i
)
3335 #if BX_SUPPORT_SSE >= 4
3336 BX_CPU_THIS_PTR
prepareSSE();
3338 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, tmp
;
3339 Bit8u mask
= i
->Ib();
3341 /* op2 is a register or memory reference */
3343 op2
= BX_READ_XMM_REG(i
->rm());
3346 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
3347 /* pointer, segment address pair */
3348 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
3351 float_status_t status_word
;
3352 mxcsr_to_softfloat_status_word(status_word
, MXCSR
);
3354 if (MXCSR
.get_DAZ()) {
3355 op1
.xmm64u(0) = float64_denormal_to_zero(op1
.xmm64u(0));
3356 op1
.xmm64u(1) = float64_denormal_to_zero(op1
.xmm64u(1));
3358 op2
.xmm64u(0) = float64_denormal_to_zero(op2
.xmm64u(0));
3359 op2
.xmm64u(1) = float64_denormal_to_zero(op2
.xmm64u(1));
3362 tmp
.xmm64u(0) = tmp
.xmm64u(1) = 0;
3365 tmp
.xmm64u(0) = float64_mul(op1
.xmm64u(0), op2
.xmm64u(0), status_word
);
3367 tmp
.xmm64u(1) = float64_mul(op1
.xmm64u(1), op2
.xmm64u(1), status_word
);
3369 float64 result
= float64_add(tmp
.xmm64u(0), tmp
.xmm64u(1), status_word
);
3371 op1
.xmm64u(0) = op1
.xmm64u(1) = 0;
3373 if (mask
& 0x01) op1
.xmm64u(0) = result
;
3374 if (mask
& 0x02) op1
.xmm64u(1) = result
;
3376 BX_CPU_THIS_PTR
check_exceptionsSSE(status_word
.float_exception_flags
);
3377 BX_WRITE_XMM_REG(i
->nnn(), op1
);
3379 BX_INFO(("DPPD_VpdWpdIb: required SSE4, use --enable-sse option"));
3380 exception(BX_UD_EXCEPTION
, 0, 0);
3384 #endif // BX_SUPPORT_SSE >= 4 || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)