- added instructions how to update the online documentation
[bochs-mirror.git] / cpu / sse_pfp.cc
blob451014efc191b35cdf6f4a34c19e57c77f34d4fd
1 /////////////////////////////////////////////////////////////////////////
2 // $Id: sse_pfp.cc,v 1.51 2008/10/08 10:51:38 sshwarts Exp $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 // Copyright (c) 2003 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 /////////////////////////////////////////////////////////////////////////
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
29 #if BX_SUPPORT_SSE
31 #include "fpu/softfloat-specialize.h"
33 void BX_CPU_C::check_exceptionsSSE(int exceptions_flags)
35 int unmasked = ~(MXCSR.get_exceptions_masks()) & exceptions_flags;
36 MXCSR.set_exceptions(exceptions_flags);
38 if (unmasked)
40 if(BX_CPU_THIS_PTR cr4.get_OSXMMEXCPT())
41 exception(BX_XM_EXCEPTION, 0, 0);
42 else
43 exception(BX_UD_EXCEPTION, 0, 0);
47 BX_CPP_INLINE void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mxcsr)
49 status.float_exception_flags = 0; // clear exceptions before execution
50 status.float_nan_handling_mode = float_first_operand_nan;
51 status.float_rounding_mode = mxcsr.get_rounding_mode();
52 // if underflow is masked and FUZ is 1, set it to 1, else to 0
53 status.flush_underflow_to_zero =
54 (mxcsr.get_flush_masked_underflow() && mxcsr.get_UM()) ? 1 : 0;
57 /* Comparison predicate for CMPSS/CMPPS instructions */
58 static float32_compare_method compare32[4] = {
59 float32_eq,
60 float32_lt,
61 float32_le,
62 float32_unordered
65 #if BX_SUPPORT_SSE >= 2
66 /* Comparison predicate for CMPSD/CMPPD instructions */
67 static float64_compare_method compare64[4] = {
68 float64_eq,
69 float64_lt,
70 float64_le,
71 float64_unordered
73 #endif
75 #endif // BX_SUPPORT_SSE
78 * Opcode: 0F 2A
79 * Convert two 32bit signed integers from MMX/MEM to two single precision FP
80 * When a conversion is inexact, the value returned is rounded according
81 * to rounding control bits in MXCSR register.
82 * Possible floating point exceptions: #P
84 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPI2PS_VpsQq(bxInstruction_c *i)
86 #if BX_SUPPORT_SSE >= 1
87 BX_CPU_THIS_PTR prepareSSE();
89 /* check floating point status word for a pending FPU exceptions */
90 FPU_check_pending_exceptions();
92 BxPackedMmxRegister op;
93 BxPackedXmmRegister result;
95 /* op is a register or memory reference */
96 if (i->modC0()) {
97 op = BX_READ_MMX_REG(i->rm());
99 else {
100 // do not cause transition to MMX state if no MMX register touched
101 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
102 /* pointer, segment address pair */
103 MMXUQ(op) = read_virtual_qword(i->seg(), eaddr);
106 BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX state transition */
108 float_status_t status_word;
109 mxcsr_to_softfloat_status_word(status_word, MXCSR);
111 result.xmm32u(0) = int32_to_float32(MMXUD0(op), status_word);
112 result.xmm32u(1) = int32_to_float32(MMXUD1(op), status_word);
114 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
115 BX_WRITE_XMM_REG_LO_QWORD(i->nnn(), result.xmm64u(0));
116 #else
117 BX_INFO(("CVTPI2PS_VpsQq: required SSE, use --enable-sse option"));
118 exception(BX_UD_EXCEPTION, 0, 0);
119 #endif
123 * Opcode: 66 0F 2A
124 * Convert two 32bit signed integers from MMX/MEM to two double precision FP
125 * Possible floating point exceptions: -
127 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPI2PD_VpdQq(bxInstruction_c *i)
129 #if BX_SUPPORT_SSE >= 2
130 BX_CPU_THIS_PTR prepareSSE();
132 /* check floating point status word for a pending FPU exceptions */
133 FPU_check_pending_exceptions();
135 BxPackedMmxRegister op;
136 BxPackedXmmRegister result;
138 /* op is a register or memory reference */
139 if (i->modC0()) {
140 op = BX_READ_MMX_REG(i->rm());
142 else {
143 // do not cause transition to MMX state if no MMX register touched
144 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
145 /* pointer, segment address pair */
146 MMXUQ(op) = read_virtual_qword(i->seg(), eaddr);
149 BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX state transition */
151 result.xmm64u(0) = int32_to_float64(MMXUD0(op));
152 result.xmm64u(1) = int32_to_float64(MMXUD1(op));
154 BX_WRITE_XMM_REG(i->nnn(), result);
155 #else
156 BX_INFO(("CVTPI2PD_VpdQd: required SSE2, use --enable-sse option"));
157 exception(BX_UD_EXCEPTION, 0, 0);
158 #endif
162 * Opcode: F2 0F 2A
163 * Convert one 32bit signed integer to one double precision FP
164 * Possible floating point exceptions: -
166 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSI2SD_VsdEd(bxInstruction_c *i)
168 #if BX_SUPPORT_SSE >= 2
169 BX_CPU_THIS_PTR prepareSSE();
171 float_status_t status_word;
172 mxcsr_to_softfloat_status_word(status_word, MXCSR);
173 float64 result;
175 #if BX_SUPPORT_X86_64
176 if (i->os64L()) /* 64 bit operand size mode */
178 Bit64u op;
180 /* op is a register or memory reference */
181 if (i->modC0()) {
182 op = BX_READ_64BIT_REG(i->rm());
184 else {
185 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
186 /* pointer, segment address pair */
187 op = read_virtual_qword_64(i->seg(), eaddr);
190 result = int64_to_float64(op, status_word);
192 else
193 #endif
195 Bit32u op;
197 /* op is a register or memory reference */
198 if (i->modC0()) {
199 op = BX_READ_32BIT_REG(i->rm());
201 else {
202 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
203 /* pointer, segment address pair */
204 op = read_virtual_dword(i->seg(), eaddr);
207 result = int32_to_float64(op);
210 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
211 BX_WRITE_XMM_REG_LO_QWORD(i->nnn(), result);
212 #else
213 BX_INFO(("CVTSI2SD_VsdEd: required SSE2, use --enable-sse option"));
214 exception(BX_UD_EXCEPTION, 0, 0);
215 #endif
219 * Opcode: F3 0F 2A
220 * Convert one 32bit signed integer to one single precision FP
221 * When a conversion is inexact, the value returned is rounded according
222 * to rounding control bits in MXCSR register.
223 * Possible floating point exceptions: #P
225 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSI2SS_VssEd(bxInstruction_c *i)
227 #if BX_SUPPORT_SSE >= 1
228 BX_CPU_THIS_PTR prepareSSE();
230 float_status_t status_word;
231 mxcsr_to_softfloat_status_word(status_word, MXCSR);
232 float32 result;
234 #if BX_SUPPORT_X86_64
235 if (i->os64L()) /* 64 bit operand size mode */
237 Bit64u op;
239 /* op is a register or memory reference */
240 if (i->modC0()) {
241 op = BX_READ_64BIT_REG(i->rm());
243 else {
244 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
245 /* pointer, segment address pair */
246 op = read_virtual_qword_64(i->seg(), eaddr);
249 result = int64_to_float32(op, status_word);
251 else
252 #endif
254 Bit32u op;
256 /* op is a register or memory reference */
257 if (i->modC0()) {
258 op = BX_READ_32BIT_REG(i->rm());
260 else {
261 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
262 /* pointer, segment address pair */
263 op = read_virtual_dword(i->seg(), eaddr);
266 result = int32_to_float32(op, status_word);
269 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
270 BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), result);
271 #else
272 BX_INFO(("CVTSI2SS_VssEd: required SSE, use --enable-sse option"));
273 exception(BX_UD_EXCEPTION, 0, 0);
274 #endif
278 * Opcode: 0F 2C
279 * Convert two single precision FP numbers to two signed doubleword integers
280 * in MMX using truncation if the conversion is inexact
281 * Possible floating point exceptions: #I, #P
283 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPS2PI_PqWps(bxInstruction_c *i)
285 #if BX_SUPPORT_SSE >= 1
286 BX_CPU_THIS_PTR prepareSSE();
288 /* check floating point status word for a pending FPU exceptions */
289 FPU_check_pending_exceptions();
291 Bit64u op;
292 BxPackedMmxRegister result;
294 /* op is a register or memory reference */
295 if (i->modC0()) {
296 op = BX_READ_XMM_REG_LO_QWORD(i->rm());
298 else {
299 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
300 /* pointer, segment address pair */
301 op = read_virtual_qword(i->seg(), eaddr);
304 BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX state transition */
306 float_status_t status_word;
307 mxcsr_to_softfloat_status_word(status_word, MXCSR);
309 float32 r0 = (float32)(op & 0xFFFFFFFF);
310 float32 r1 = (float32)(op >> 32);
312 if (MXCSR.get_DAZ()) {
313 r0 = float32_denormal_to_zero(r0);
314 r1 = float32_denormal_to_zero(r1);
317 MMXUD0(result) = float32_to_int32_round_to_zero(r0, status_word);
318 MMXUD1(result) = float32_to_int32_round_to_zero(r1, status_word);
320 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
321 BX_WRITE_MMX_REG(i->nnn(), result);
322 #else
323 BX_INFO(("CVTTPS2PI_PqWps: required SSE, use --enable-sse option"));
324 exception(BX_UD_EXCEPTION, 0, 0);
325 #endif
329 * Opcode: 66 0F 2C
330 * Convert two double precision FP numbers to two signed doubleword integers
331 * in MMX using truncation if the conversion is inexact
332 * Possible floating point exceptions: #I, #P
334 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPD2PI_PqWpd(bxInstruction_c *i)
336 #if BX_SUPPORT_SSE >= 2
337 BX_CPU_THIS_PTR prepareSSE();
339 /* check floating point status word for a pending FPU exceptions */
340 FPU_check_pending_exceptions();
342 BxPackedXmmRegister op;
343 BxPackedMmxRegister result;
345 /* op is a register or memory reference */
346 if (i->modC0()) {
347 op = BX_READ_XMM_REG(i->rm());
349 else {
350 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
351 /* pointer, segment address pair */
352 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
355 BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX state transition */
357 float_status_t status_word;
358 mxcsr_to_softfloat_status_word(status_word, MXCSR);
360 if (MXCSR.get_DAZ()) {
361 op.xmm64u(0) = float64_denormal_to_zero(op.xmm64u(0));
362 op.xmm64u(1) = float64_denormal_to_zero(op.xmm64u(1));
365 MMXUD0(result) = float64_to_int32_round_to_zero(op.xmm64u(0), status_word);
366 MMXUD1(result) = float64_to_int32_round_to_zero(op.xmm64u(1), status_word);
368 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
369 BX_WRITE_MMX_REG(i->nnn(), result);
370 #else
371 BX_INFO(("CVTTPD2PI_PqWpd: required SSE2, use --enable-sse option"));
372 exception(BX_UD_EXCEPTION, 0, 0);
373 #endif
377 * Opcode: F2 0F 2C
378 * Convert one double precision FP number to doubleword integer using
379 * truncation if the conversion is inexact
380 * Possible floating point exceptions: #I, #P
382 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSD2SI_GdWsd(bxInstruction_c *i)
384 #if BX_SUPPORT_SSE >= 2
385 BX_CPU_THIS_PTR prepareSSE();
387 float64 op;
389 /* op is a register or memory reference */
390 if (i->modC0()) {
391 op = BX_READ_XMM_REG_LO_QWORD(i->rm());
393 else {
394 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
395 /* pointer, segment address pair */
396 op = read_virtual_qword(i->seg(), eaddr);
399 float_status_t status_word;
400 mxcsr_to_softfloat_status_word(status_word, MXCSR);
402 if (MXCSR.get_DAZ()) op = float64_denormal_to_zero(op);
404 #if BX_SUPPORT_X86_64
405 if (i->os64L()) /* 64 bit operand size mode */
407 Bit64u result = float64_to_int64_round_to_zero(op, status_word);
408 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
409 BX_WRITE_64BIT_REG(i->nnn(), result);
411 else
412 #endif
414 Bit32u result = float64_to_int32_round_to_zero(op, status_word);
415 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
416 BX_WRITE_32BIT_REGZ(i->nnn(), result);
419 #else
420 BX_INFO(("CVTTSD2SI_GdWsd: required SSE2, use --enable-sse option"));
421 exception(BX_UD_EXCEPTION, 0, 0);
422 #endif
426 * Opcode: F3 0F 2C
427 * Convert one single precision FP number to doubleword integer using
428 * truncation if the conversion is inexact
429 * Possible floating point exceptions: #I, #P
431 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSS2SI_GdWss(bxInstruction_c *i)
433 #if BX_SUPPORT_SSE >= 1
434 BX_CPU_THIS_PTR prepareSSE();
436 float32 op;
438 /* op is a register or memory reference */
439 if (i->modC0()) {
440 op = BX_READ_XMM_REG_LO_DWORD(i->rm());
442 else {
443 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
444 /* pointer, segment address pair */
445 op = read_virtual_dword(i->seg(), eaddr);
448 float_status_t status_word;
449 mxcsr_to_softfloat_status_word(status_word, MXCSR);
451 if (MXCSR.get_DAZ()) op = float32_denormal_to_zero(op);
453 #if BX_SUPPORT_X86_64
454 if (i->os64L()) /* 64 bit operand size mode */
456 Bit64u result = float32_to_int64_round_to_zero(op, status_word);
457 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
458 BX_WRITE_64BIT_REG(i->nnn(), result);
460 else
461 #endif
463 Bit32u result = float32_to_int32_round_to_zero(op, status_word);
464 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
465 BX_WRITE_32BIT_REGZ(i->nnn(), result);
468 #else
469 BX_INFO(("CVTTSS2SI_GdWss: required SSE, use --enable-sse option"));
470 exception(BX_UD_EXCEPTION, 0, 0);
471 #endif
475 * Opcode: 0F 2D
476 * Convert two single precision FP numbers to two signed doubleword integers
477 * in MMX register. When a conversion is inexact, the value returned is
478 * rounded according to rounding control bits in MXCSR register.
479 * Possible floating point exceptions: #I, #P
481 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2PI_PqWps(bxInstruction_c *i)
483 #if BX_SUPPORT_SSE >= 1
484 BX_CPU_THIS_PTR prepareSSE();
486 /* check floating point status word for a pending FPU exceptions */
487 FPU_check_pending_exceptions();
489 Bit64u op;
490 BxPackedMmxRegister result;
492 /* op is a register or memory reference */
493 if (i->modC0()) {
494 op = BX_READ_XMM_REG_LO_QWORD(i->rm());
496 else {
497 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
498 /* pointer, segment address pair */
499 op = read_virtual_qword(i->seg(), eaddr);
502 BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX state transition */
504 float_status_t status_word;
505 mxcsr_to_softfloat_status_word(status_word, MXCSR);
507 float32 r0 = (float32)(op & 0xFFFFFFFF);
508 float32 r1 = (float32)(op >> 32);
510 if (MXCSR.get_DAZ()) {
511 r0 = float32_denormal_to_zero(r0);
512 r1 = float32_denormal_to_zero(r1);
515 MMXUD0(result) = float32_to_int32(r0, status_word);
516 MMXUD1(result) = float32_to_int32(r1, status_word);
518 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
519 BX_WRITE_MMX_REG(i->nnn(), result);
520 #else
521 BX_INFO(("CVTPS2PI_PqWps: required SSE, use --enable-sse option"));
522 exception(BX_UD_EXCEPTION, 0, 0);
523 #endif
527 * Opcode: 66 0F 2D
528 * Convert two double precision FP numbers to two signed doubleword integers
529 * in MMX register. When a conversion is inexact, the value returned is
530 * rounded according to rounding control bits in MXCSR register.
531 * Possible floating point exceptions: #I, #P
533 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2PI_PqWpd(bxInstruction_c *i)
535 #if BX_SUPPORT_SSE >= 2
536 BX_CPU_THIS_PTR prepareSSE();
538 /* check floating point status word for a pending FPU exceptions */
539 FPU_check_pending_exceptions();
541 BxPackedXmmRegister op;
542 BxPackedMmxRegister result;
544 /* op is a register or memory reference */
545 if (i->modC0()) {
546 op = BX_READ_XMM_REG(i->rm());
548 else {
549 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
550 /* pointer, segment address pair */
551 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
554 BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX state transition */
556 float_status_t status_word;
557 mxcsr_to_softfloat_status_word(status_word, MXCSR);
559 if (MXCSR.get_DAZ()) {
560 op.xmm64u(0) = float64_denormal_to_zero(op.xmm64u(0));
561 op.xmm64u(1) = float64_denormal_to_zero(op.xmm64u(1));
564 MMXUD0(result) = float64_to_int32(op.xmm64u(0), status_word);
565 MMXUD1(result) = float64_to_int32(op.xmm64u(1), status_word);
567 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
568 BX_WRITE_MMX_REG(i->nnn(), result);
569 #else
570 BX_INFO(("CVTPD2PI_PqWpd: required SSE2, use --enable-sse option"));
571 exception(BX_UD_EXCEPTION, 0, 0);
572 #endif
576 * Opcode: F2 0F 2D
577 * Convert one double precision FP number to doubleword integer
578 * When a conversion is inexact, the value returned is rounded according
579 * to rounding control bits in MXCSR register.
580 * Possible floating point exceptions: #I, #P
582 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSD2SI_GdWsd(bxInstruction_c *i)
584 #if BX_SUPPORT_SSE >= 2
585 BX_CPU_THIS_PTR prepareSSE();
587 float64 op;
589 /* op is a register or memory reference */
590 if (i->modC0()) {
591 op = BX_READ_XMM_REG_LO_QWORD(i->rm());
593 else {
594 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
595 /* pointer, segment address pair */
596 op = read_virtual_qword(i->seg(), eaddr);
599 float_status_t status_word;
600 mxcsr_to_softfloat_status_word(status_word, MXCSR);
601 if (MXCSR.get_DAZ()) op = float64_denormal_to_zero(op);
603 #if BX_SUPPORT_X86_64
604 if (i->os64L()) /* 64 bit operand size mode */
606 Bit64u result = float64_to_int64(op, status_word);
607 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
608 BX_WRITE_64BIT_REG(i->nnn(), result);
610 else
611 #endif
613 Bit32u result = float64_to_int32(op, status_word);
614 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
615 BX_WRITE_32BIT_REGZ(i->nnn(), result);
618 #else
619 BX_INFO(("CVTSD2SI_GdWsd: required SSE2, use --enable-sse option"));
620 exception(BX_UD_EXCEPTION, 0, 0);
621 #endif
625 * Opcode: F3 0F 2D
626 * Convert one single precision FP number to doubleword integer.
627 * When a conversion is inexact, the value returned is rounded according
628 * to rounding control bits in MXCSR register.
629 * Possible floating point exceptions: #I, #P
631 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSS2SI_GdWss(bxInstruction_c *i)
633 #if BX_SUPPORT_SSE >= 1
634 BX_CPU_THIS_PTR prepareSSE();
636 float32 op;
638 /* op is a register or memory reference */
639 if (i->modC0()) {
640 op = BX_READ_XMM_REG_LO_DWORD(i->rm());
642 else {
643 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
644 /* pointer, segment address pair */
645 op = read_virtual_dword(i->seg(), eaddr);
648 float_status_t status_word;
649 mxcsr_to_softfloat_status_word(status_word, MXCSR);
650 if (MXCSR.get_DAZ()) op = float32_denormal_to_zero(op);
652 #if BX_SUPPORT_X86_64
653 if (i->os64L()) /* 64 bit operand size mode */
655 Bit64u result = float32_to_int64(op, status_word);
656 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
657 BX_WRITE_64BIT_REG(i->nnn(), result);
659 else
660 #endif
662 Bit32u result = float32_to_int32(op, status_word);
663 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
664 BX_WRITE_32BIT_REGZ(i->nnn(), result);
667 #else
668 BX_INFO(("CVTSS2SI_GdWss: required SSE, use --enable-sse option"));
669 exception(BX_UD_EXCEPTION, 0, 0);
670 #endif
674 * Opcode: 0F 5A
675 * Convert two single precision FP numbers to two double precision FP numbers
676 * Possible floating point exceptions: #I, #D
678 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2PD_VpsWps(bxInstruction_c *i)
680 #if BX_SUPPORT_SSE >= 2
681 BX_CPU_THIS_PTR prepareSSE();
683 Bit64u op;
684 BxPackedXmmRegister result;
686 /* op is a register or memory reference */
687 if (i->modC0()) {
688 op = BX_READ_XMM_REG_LO_QWORD(i->rm());
690 else {
691 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
692 /* pointer, segment address pair */
693 op = read_virtual_qword(i->seg(), eaddr);
696 float_status_t status_word;
697 mxcsr_to_softfloat_status_word(status_word, MXCSR);
699 float32 r0 = (float32)(op & 0xFFFFFFFF);
700 float32 r1 = (float32)(op >> 32);
702 if (MXCSR.get_DAZ()) {
703 r0 = float32_denormal_to_zero(r0);
704 r1 = float32_denormal_to_zero(r1);
707 result.xmm64u(0) = float32_to_float64(r0, status_word);
708 result.xmm64u(1) = float32_to_float64(r1, status_word);
710 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
711 BX_WRITE_XMM_REG(i->nnn(), result);
713 #else
714 BX_INFO(("CVTPS2PD_VpsWps: required SSE2, use --enable-sse option"));
715 exception(BX_UD_EXCEPTION, 0, 0);
716 #endif
720 * Opcode: 66 0F 5A
721 * Convert two double precision FP numbers to two single precision FP.
722 * When a conversion is inexact, the value returned is rounded according
723 * to rounding control bits in MXCSR register.
724 * Possible floating point exceptions: #I, #D, #O, #I, #P
726 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2PS_VpdWpd(bxInstruction_c *i)
728 #if BX_SUPPORT_SSE >= 2
729 BX_CPU_THIS_PTR prepareSSE();
731 BxPackedXmmRegister op, result;
733 /* op is a register or memory reference */
734 if (i->modC0()) {
735 op = BX_READ_XMM_REG(i->rm());
737 else {
738 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
739 /* pointer, segment address pair */
740 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
743 float_status_t status_word;
744 mxcsr_to_softfloat_status_word(status_word, MXCSR);
746 if (MXCSR.get_DAZ())
748 op.xmm64u(0) = float64_denormal_to_zero(op.xmm64u(0));
749 op.xmm64u(1) = float64_denormal_to_zero(op.xmm64u(1));
752 result.xmm32u(0) = float64_to_float32(op.xmm64u(0), status_word);
753 result.xmm32u(1) = float64_to_float32(op.xmm64u(1), status_word);
754 result.xmm64u(1) = 0;
756 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
757 BX_WRITE_XMM_REG(i->nnn(), result);
759 #else
760 BX_INFO(("CVTPD2PS_VpdWpd: required SSE2, use --enable-sse option"));
761 exception(BX_UD_EXCEPTION, 0, 0);
762 #endif
766 * Opcode: F2 0F 5A
767 * Convert one double precision FP number to one single precision FP.
768 * When a conversion is inexact, the value returned is rounded according
769 * to rounding control bits in MXCSR register.
770 * Possible floating point exceptions: #I, #D, #O, #I, #P
772 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSD2SS_VsdWsd(bxInstruction_c *i)
774 #if BX_SUPPORT_SSE >= 2
775 BX_CPU_THIS_PTR prepareSSE();
777 float64 op;
778 float32 result;
780 /* op is a register or memory reference */
781 if (i->modC0()) {
782 op = BX_READ_XMM_REG_LO_QWORD(i->rm());
784 else {
785 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
786 /* pointer, segment address pair */
787 op = read_virtual_qword(i->seg(), eaddr);
790 float_status_t status_word;
791 mxcsr_to_softfloat_status_word(status_word, MXCSR);
792 if (MXCSR.get_DAZ()) op = float64_denormal_to_zero(op);
793 result = float64_to_float32(op, status_word);
794 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
795 BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), result);
797 #else
798 BX_INFO(("CVTSD2SS_VsdWsd: required SSE2, use --enable-sse option"));
799 exception(BX_UD_EXCEPTION, 0, 0);
800 #endif
804 * Opcode: F3 0F 5A
805 * Convert one single precision FP number to one double precision FP.
806 * Possible floating point exceptions: #I, #D
808 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSS2SD_VssWss(bxInstruction_c *i)
810 #if BX_SUPPORT_SSE >= 2
811 BX_CPU_THIS_PTR prepareSSE();
813 float32 op;
814 float64 result;
816 /* op is a register or memory reference */
817 if (i->modC0()) {
818 op = BX_READ_XMM_REG_LO_DWORD(i->rm());
820 else {
821 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
822 /* pointer, segment address pair */
823 op = read_virtual_dword(i->seg(), eaddr);
826 float_status_t status_word;
827 mxcsr_to_softfloat_status_word(status_word, MXCSR);
828 if (MXCSR.get_DAZ()) op = float32_denormal_to_zero(op);
829 result = float32_to_float64(op, status_word);
830 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
831 BX_WRITE_XMM_REG_LO_QWORD(i->nnn(), result);
833 #else
834 BX_INFO(("CVTSS2SD_VssWss: required SSE2, use --enable-sse option"));
835 exception(BX_UD_EXCEPTION, 0, 0);
836 #endif
840 * Opcode: 0F 5B
841 * Convert four signed integers to four single precision FP numbers.
842 * When a conversion is inexact, the value returned is rounded according
843 * to rounding control bits in MXCSR register.
844 * Possible floating point exceptions: #P
846 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTDQ2PS_VpsWdq(bxInstruction_c *i)
848 #if BX_SUPPORT_SSE >= 2
849 BX_CPU_THIS_PTR prepareSSE();
851 BxPackedXmmRegister op;
853 /* op is a register or memory reference */
854 if (i->modC0()) {
855 op = BX_READ_XMM_REG(i->rm());
857 else {
858 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
859 /* pointer, segment address pair */
860 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
863 BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX state transition */
865 float_status_t status_word;
866 mxcsr_to_softfloat_status_word(status_word, MXCSR);
868 op.xmm32u(0) = int32_to_float32(op.xmm32u(0), status_word);
869 op.xmm32u(1) = int32_to_float32(op.xmm32u(1), status_word);
870 op.xmm32u(2) = int32_to_float32(op.xmm32u(2), status_word);
871 op.xmm32u(3) = int32_to_float32(op.xmm32u(3), status_word);
873 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
874 BX_WRITE_XMM_REG(i->nnn(), op);
875 #else
876 BX_INFO(("CVTDQ2PS_VpsWdq: required SSE2, use --enable-sse option"));
877 exception(BX_UD_EXCEPTION, 0, 0);
878 #endif
882 * Opcode: 66 0F 5B
883 * Convert four single precision FP to four doubleword integers.
884 * When a conversion is inexact, the value returned is rounded according
885 * to rounding control bits in MXCSR register.
886 * Possible floating point exceptions: #I, #P
888 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2DQ_VdqWps(bxInstruction_c *i)
890 #if BX_SUPPORT_SSE >= 2
891 BX_CPU_THIS_PTR prepareSSE();
893 BxPackedXmmRegister op;
895 /* op is a register or memory reference */
896 if (i->modC0()) {
897 op = BX_READ_XMM_REG(i->rm());
899 else {
900 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
901 /* pointer, segment address pair */
902 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
905 float_status_t status_word;
906 mxcsr_to_softfloat_status_word(status_word, MXCSR);
908 if (MXCSR.get_DAZ()) {
909 op.xmm32u(0) = float32_denormal_to_zero(op.xmm32u(0));
910 op.xmm32u(1) = float32_denormal_to_zero(op.xmm32u(1));
911 op.xmm32u(2) = float32_denormal_to_zero(op.xmm32u(2));
912 op.xmm32u(3) = float32_denormal_to_zero(op.xmm32u(3));
915 op.xmm32u(0) = float32_to_int32(op.xmm32u(0), status_word);
916 op.xmm32u(1) = float32_to_int32(op.xmm32u(1), status_word);
917 op.xmm32u(2) = float32_to_int32(op.xmm32u(2), status_word);
918 op.xmm32u(3) = float32_to_int32(op.xmm32u(3), status_word);
920 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
921 BX_WRITE_XMM_REG(i->nnn(), op);
922 #else
923 BX_INFO(("CVTPS2DQ_VdqWps: required SSE2, use --enable-sse option"));
924 exception(BX_UD_EXCEPTION, 0, 0);
925 #endif
929 * Opcode: F3 0F 5B
930 * Convert four single precision FP to four doubleword integers using
931 * truncation if the conversion is inexact.
932 * Possible floating point exceptions: #I, #P
934 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPS2DQ_VdqWps(bxInstruction_c *i)
936 #if BX_SUPPORT_SSE >= 2
937 BX_CPU_THIS_PTR prepareSSE();
939 BxPackedXmmRegister op;
941 /* op is a register or memory reference */
942 if (i->modC0()) {
943 op = BX_READ_XMM_REG(i->rm());
945 else {
946 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
947 /* pointer, segment address pair */
948 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
951 float_status_t status_word;
952 mxcsr_to_softfloat_status_word(status_word, MXCSR);
954 if (MXCSR.get_DAZ()) {
955 op.xmm32u(0) = float32_denormal_to_zero(op.xmm32u(0));
956 op.xmm32u(1) = float32_denormal_to_zero(op.xmm32u(1));
957 op.xmm32u(2) = float32_denormal_to_zero(op.xmm32u(2));
958 op.xmm32u(3) = float32_denormal_to_zero(op.xmm32u(3));
961 op.xmm32u(0) = float32_to_int32_round_to_zero(op.xmm32u(0), status_word);
962 op.xmm32u(1) = float32_to_int32_round_to_zero(op.xmm32u(1), status_word);
963 op.xmm32u(2) = float32_to_int32_round_to_zero(op.xmm32u(2), status_word);
964 op.xmm32u(3) = float32_to_int32_round_to_zero(op.xmm32u(3), status_word);
966 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
967 BX_WRITE_XMM_REG(i->nnn(), op);
968 #else
969 BX_INFO(("CVTTPS2DQ_VdqWps: required SSE2, use --enable-sse option"));
970 exception(BX_UD_EXCEPTION, 0, 0);
971 #endif
975 * Opcode: 66 0F E6
976 * Convert two double precision FP to two signed doubleword integers using
977 * truncation if the conversion is inexact.
978 * Possible floating point exceptions: #I, #P
980 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPD2DQ_VqWpd(bxInstruction_c *i)
982 #if BX_SUPPORT_SSE >= 2
983 BX_CPU_THIS_PTR prepareSSE();
985 BxPackedXmmRegister op, result;
987 /* op is a register or memory reference */
988 if (i->modC0()) {
989 op = BX_READ_XMM_REG(i->rm());
991 else {
992 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
993 /* pointer, segment address pair */
994 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
997 float_status_t status_word;
998 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1000 if (MXCSR.get_DAZ()) {
1001 op.xmm64u(0) = float64_denormal_to_zero(op.xmm64u(0));
1002 op.xmm64u(1) = float64_denormal_to_zero(op.xmm64u(1));
1005 result.xmm32u(0) = float64_to_int32_round_to_zero(op.xmm64u(0), status_word);
1006 result.xmm32u(1) = float64_to_int32_round_to_zero(op.xmm64u(1), status_word);
1007 result.xmm64u(1) = 0;
1009 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1010 BX_WRITE_XMM_REG(i->nnn(), result);
1011 #else
1012 BX_INFO(("CVTTPD2DQ_VqWpd: required SSE2, use --enable-sse option"));
1013 exception(BX_UD_EXCEPTION, 0, 0);
1014 #endif
1018 * Opcode: F2 0F E6
1019 * Convert two double precision FP to two signed doubleword integers.
1020 * When a conversion is inexact, the value returned is rounded according
1021 * to rounding control bits in MXCSR register.
1022 * Possible floating point exceptions: #I, #P
1024 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2DQ_VqWpd(bxInstruction_c *i)
1026 #if BX_SUPPORT_SSE >= 2
1027 BX_CPU_THIS_PTR prepareSSE();
1029 BxPackedXmmRegister op, result;
1031 /* op is a register or memory reference */
1032 if (i->modC0()) {
1033 op = BX_READ_XMM_REG(i->rm());
1035 else {
1036 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1037 /* pointer, segment address pair */
1038 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
1041 float_status_t status_word;
1042 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1044 if (MXCSR.get_DAZ()) {
1045 op.xmm64u(0) = float64_denormal_to_zero(op.xmm64u(0));
1046 op.xmm64u(1) = float64_denormal_to_zero(op.xmm64u(1));
1049 result.xmm32u(0) = float64_to_int32(op.xmm64u(0), status_word);
1050 result.xmm32u(1) = float64_to_int32(op.xmm64u(1), status_word);
1051 result.xmm64u(1) = 0;
1053 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1054 BX_WRITE_XMM_REG(i->nnn(), result);
1055 #else
1056 BX_INFO(("CVTPD2DQ_VqWpd: required SSE2, use --enable-sse option"));
1057 exception(BX_UD_EXCEPTION, 0, 0);
1058 #endif
1062 * Opcode: F3 0F E6
1063 * Convert two 32bit signed integers from XMM/MEM to two double precision FP
1064 * Possible floating point exceptions: -
1066 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTDQ2PD_VpdWq(bxInstruction_c *i)
1068 #if BX_SUPPORT_SSE >= 2
1069 BX_CPU_THIS_PTR prepareSSE();
1071 Bit64u op;
1072 BxPackedXmmRegister result;
1074 /* op is a register or memory reference */
1075 if (i->modC0()) {
1076 op = BX_READ_XMM_REG_LO_QWORD(i->rm());
1078 else {
1079 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1080 /* pointer, segment address pair */
1081 op = read_virtual_qword(i->seg(), eaddr);
1084 Bit32u r0 = (Bit32u)(op & 0xFFFFFFFF);
1085 Bit32u r1 = (Bit32u)(op >> 32);
1087 result.xmm64u(0) = int32_to_float64(r0);
1088 result.xmm64u(1) = int32_to_float64(r1);
1090 BX_WRITE_XMM_REG(i->nnn(), result);
1091 #else
1092 BX_INFO(("CVTDQ2PD_VpdWq: required SSE2, use --enable-sse option"));
1093 exception(BX_UD_EXCEPTION, 0, 0);
1094 #endif
1098 * Opcode: 0F 2E
1099 * Compare two single precision FP numbers and set EFLAGS accordintly.
1100 * Possible floating point exceptions: #I, #D
1102 void BX_CPP_AttrRegparmN(1) BX_CPU_C::UCOMISS_VssWss(bxInstruction_c *i)
1104 #if BX_SUPPORT_SSE >= 1
1105 BX_CPU_THIS_PTR prepareSSE();
1107 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->nnn()), op2;
1109 /* op2 is a register or memory reference */
1110 if (i->modC0()) {
1111 op2 = BX_READ_XMM_REG_LO_DWORD(i->rm());
1113 else {
1114 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1115 /* pointer, segment address pair */
1116 op2 = read_virtual_dword(i->seg(), eaddr);
1119 float_status_t status_word;
1120 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1122 if (MXCSR.get_DAZ())
1124 op1 = float32_denormal_to_zero(op1);
1125 op2 = float32_denormal_to_zero(op2);
1128 int rc = float32_compare_quiet(op1, op2, status_word);
1129 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1130 BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
1131 #else
1132 BX_INFO(("UCOMISS_VssWss: required SSE, use --enable-sse option"));
1133 exception(BX_UD_EXCEPTION, 0, 0);
1134 #endif
1138 * Opcode: 66 0F 2E
1139 * Compare two double precision FP numbers and set EFLAGS accordintly.
1140 * Possible floating point exceptions: #I, #D
1142 void BX_CPP_AttrRegparmN(1) BX_CPU_C::UCOMISD_VsdWsd(bxInstruction_c *i)
1144 #if BX_SUPPORT_SSE >= 2
1145 BX_CPU_THIS_PTR prepareSSE();
1147 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->nnn()), op2;
1149 /* op2 is a register or memory reference */
1150 if (i->modC0()) {
1151 op2 = BX_READ_XMM_REG_LO_QWORD(i->rm());
1153 else {
1154 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1155 /* pointer, segment address pair */
1156 op2 = read_virtual_qword(i->seg(), eaddr);
1159 float_status_t status_word;
1160 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1162 if (MXCSR.get_DAZ())
1164 op1 = float64_denormal_to_zero(op1);
1165 op2 = float64_denormal_to_zero(op2);
1168 int rc = float64_compare_quiet(op1, op2, status_word);
1169 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1170 BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
1171 #else
1172 BX_INFO(("UCOMISD_VsdWsd: required SSE2, use --enable-sse option"));
1173 exception(BX_UD_EXCEPTION, 0, 0);
1174 #endif
1178 * Opcode: 0F 2F
1179 * Compare two single precision FP numbers and set EFLAGS accordintly.
1180 * Possible floating point exceptions: #I, #D
1182 void BX_CPP_AttrRegparmN(1) BX_CPU_C::COMISS_VpsWps(bxInstruction_c *i)
1184 #if BX_SUPPORT_SSE >= 1
1185 BX_CPU_THIS_PTR prepareSSE();
1187 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->nnn()), op2;
1189 /* op2 is a register or memory reference */
1190 if (i->modC0()) {
1191 op2 = BX_READ_XMM_REG_LO_DWORD(i->rm());
1193 else {
1194 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1195 /* pointer, segment address pair */
1196 op2 = read_virtual_dword(i->seg(), eaddr);
1199 float_status_t status_word;
1200 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1202 if (MXCSR.get_DAZ())
1204 op1 = float32_denormal_to_zero(op1);
1205 op2 = float32_denormal_to_zero(op2);
1208 int rc = float32_compare(op1, op2, status_word);
1209 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1210 BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
1211 #else
1212 BX_INFO(("COMISS_VpsWps: required SSE, use --enable-sse option"));
1213 exception(BX_UD_EXCEPTION, 0, 0);
1214 #endif
1218 * Opcode: 66 0F 2F
1219 * Compare two double precision FP numbers and set EFLAGS accordintly.
1220 * Possible floating point exceptions: #I, #D
1222 void BX_CPP_AttrRegparmN(1) BX_CPU_C::COMISD_VpdWpd(bxInstruction_c *i)
1224 #if BX_SUPPORT_SSE >= 2
1225 BX_CPU_THIS_PTR prepareSSE();
1227 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->nnn()), op2;
1229 /* op2 is a register or memory reference */
1230 if (i->modC0()) {
1231 op2 = BX_READ_XMM_REG_LO_QWORD(i->rm());
1233 else {
1234 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1235 /* pointer, segment address pair */
1236 op2 = read_virtual_qword(i->seg(), eaddr);
1239 float_status_t status_word;
1240 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1242 if (MXCSR.get_DAZ())
1244 op1 = float64_denormal_to_zero(op1);
1245 op2 = float64_denormal_to_zero(op2);
1248 int rc = float64_compare(op1, op2, status_word);
1249 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1250 BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
1251 #else
1252 BX_INFO(("COMISD_VpdWpd: required SSE2, use --enable-sse option"));
1253 exception(BX_UD_EXCEPTION, 0, 0);
1254 #endif
1258 * Opcode: 0F 51
1259 * Square Root packed single precision.
1260 * Possible floating point exceptions: #I, #D, #P
1262 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTPS_VpsWps(bxInstruction_c *i)
1264 #if BX_SUPPORT_SSE >= 1
1265 BX_CPU_THIS_PTR prepareSSE();
1267 BxPackedXmmRegister op;
1269 /* op is a register or memory reference */
1270 if (i->modC0()) {
1271 op = BX_READ_XMM_REG(i->rm());
1273 else {
1274 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1275 /* pointer, segment address pair */
1276 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
1279 float_status_t status_word;
1280 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1282 if (MXCSR.get_DAZ())
1284 op.xmm32u(0) = float32_denormal_to_zero(op.xmm32u(0));
1285 op.xmm32u(1) = float32_denormal_to_zero(op.xmm32u(1));
1286 op.xmm32u(2) = float32_denormal_to_zero(op.xmm32u(2));
1287 op.xmm32u(3) = float32_denormal_to_zero(op.xmm32u(3));
1290 op.xmm32u(0) = float32_sqrt(op.xmm32u(0), status_word);
1291 op.xmm32u(1) = float32_sqrt(op.xmm32u(1), status_word);
1292 op.xmm32u(2) = float32_sqrt(op.xmm32u(2), status_word);
1293 op.xmm32u(3) = float32_sqrt(op.xmm32u(3), status_word);
1295 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1296 BX_WRITE_XMM_REG(i->nnn(), op);
1298 #else
1299 BX_INFO(("SQRTPS_VpsWps: required SSE, use --enable-sse option"));
1300 exception(BX_UD_EXCEPTION, 0, 0);
1301 #endif
1305 * Opcode: 66 0F 51
1306 * Square Root packed double precision.
1307 * Possible floating point exceptions: #I, #D, #P
1309 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTPD_VpdWpd(bxInstruction_c *i)
1311 #if BX_SUPPORT_SSE >= 2
1312 BX_CPU_THIS_PTR prepareSSE();
1314 BxPackedXmmRegister op;
1316 /* op is a register or memory reference */
1317 if (i->modC0()) {
1318 op = BX_READ_XMM_REG(i->rm());
1320 else {
1321 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1322 /* pointer, segment address pair */
1323 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
1326 float_status_t status_word;
1327 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1329 if (MXCSR.get_DAZ())
1331 op.xmm64u(0) = float64_denormal_to_zero(op.xmm64u(0));
1332 op.xmm64u(1) = float64_denormal_to_zero(op.xmm64u(1));
1335 op.xmm64u(0) = float64_sqrt(op.xmm64u(0), status_word);
1336 op.xmm64u(1) = float64_sqrt(op.xmm64u(1), status_word);
1338 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1339 BX_WRITE_XMM_REG(i->nnn(), op);
1341 #else
1342 BX_INFO(("SQRTPD_VpdWpd: required SSE2, use --enable-sse option"));
1343 exception(BX_UD_EXCEPTION, 0, 0);
1344 #endif
1348 * Opcode: F2 0F 51
1349 * Square Root scalar double precision.
1350 * Possible floating point exceptions: #I, #D, #P
1352 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTSD_VsdWsd(bxInstruction_c *i)
1354 #if BX_SUPPORT_SSE >= 2
1355 BX_CPU_THIS_PTR prepareSSE();
1357 float64 op;
1359 /* op is a register or memory reference */
1360 if (i->modC0()) {
1361 op = BX_READ_XMM_REG_LO_QWORD(i->rm());
1363 else {
1364 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1365 /* pointer, segment address pair */
1366 op = read_virtual_qword(i->seg(), eaddr);
1369 float_status_t status_word;
1370 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1371 if (MXCSR.get_DAZ()) op = float64_denormal_to_zero(op);
1372 op = float64_sqrt(op, status_word);
1373 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1374 BX_WRITE_XMM_REG_LO_QWORD(i->nnn(), op);
1376 #else
1377 BX_INFO(("SQRTSD_VsdWsd: required SSE2, use --enable-sse option"));
1378 exception(BX_UD_EXCEPTION, 0, 0);
1379 #endif
1383 * Opcode: F3 0F 51
1384 * Square Root scalar single precision.
1385 * Possible floating point exceptions: #I, #D, #P
1387 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTSS_VssWss(bxInstruction_c *i)
1389 #if BX_SUPPORT_SSE >= 1
1390 BX_CPU_THIS_PTR prepareSSE();
1392 float32 op;
1394 /* op is a register or memory reference */
1395 if (i->modC0()) {
1396 op = BX_READ_XMM_REG_LO_DWORD(i->rm());
1398 else {
1399 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1400 /* pointer, segment address pair */
1401 op = read_virtual_dword(i->seg(), eaddr);
1404 float_status_t status_word;
1405 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1406 if (MXCSR.get_DAZ()) op = float32_denormal_to_zero(op);
1407 op = float32_sqrt(op, status_word);
1408 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1409 BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), op);
1411 #else
1412 BX_INFO(("SQRTSS_VssWss: required SSE, use --enable-sse option"));
1413 exception(BX_UD_EXCEPTION, 0, 0);
1414 #endif
1418 * Opcode: 0F 58
1419 * Add packed single precision FP numbers from XMM2/MEM to XMM1.
1420 * Possible floating point exceptions: #I, #D, #O, #U, #P
1422 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDPS_VpsWps(bxInstruction_c *i)
1424 #if BX_SUPPORT_SSE >= 1
1425 BX_CPU_THIS_PTR prepareSSE();
1427 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1429 /* op2 is a register or memory reference */
1430 if (i->modC0()) {
1431 op2 = BX_READ_XMM_REG(i->rm());
1433 else {
1434 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1435 /* pointer, segment address pair */
1436 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1439 float_status_t status_word;
1440 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1442 if (MXCSR.get_DAZ()) {
1443 op1.xmm32u(0) = float32_denormal_to_zero(op1.xmm32u(0));
1444 op1.xmm32u(1) = float32_denormal_to_zero(op1.xmm32u(1));
1445 op1.xmm32u(2) = float32_denormal_to_zero(op1.xmm32u(2));
1446 op1.xmm32u(3) = float32_denormal_to_zero(op1.xmm32u(3));
1448 op2.xmm32u(0) = float32_denormal_to_zero(op2.xmm32u(0));
1449 op2.xmm32u(1) = float32_denormal_to_zero(op2.xmm32u(1));
1450 op2.xmm32u(2) = float32_denormal_to_zero(op2.xmm32u(2));
1451 op2.xmm32u(3) = float32_denormal_to_zero(op2.xmm32u(3));
1454 op1.xmm32u(0) = float32_add(op1.xmm32u(0), op2.xmm32u(0), status_word);
1455 op1.xmm32u(1) = float32_add(op1.xmm32u(1), op2.xmm32u(1), status_word);
1456 op1.xmm32u(2) = float32_add(op1.xmm32u(2), op2.xmm32u(2), status_word);
1457 op1.xmm32u(3) = float32_add(op1.xmm32u(3), op2.xmm32u(3), status_word);
1459 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1460 BX_WRITE_XMM_REG(i->nnn(), op1);
1462 #else
1463 BX_INFO(("ADDPS_VpsWps: required SSE, use --enable-sse option"));
1464 exception(BX_UD_EXCEPTION, 0, 0);
1465 #endif
1469 * Opcode: 66 0F 58
1470 * Add packed double precision FP numbers from XMM2/MEM to XMM1.
1471 * Possible floating point exceptions: #I, #D, #O, #U, #P
1473 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDPD_VpdWpd(bxInstruction_c *i)
1475 #if BX_SUPPORT_SSE >= 2
1476 BX_CPU_THIS_PTR prepareSSE();
1478 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1480 /* op2 is a register or memory reference */
1481 if (i->modC0()) {
1482 op2 = BX_READ_XMM_REG(i->rm());
1484 else {
1485 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1486 /* pointer, segment address pair */
1487 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1490 float_status_t status_word;
1491 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1493 if (MXCSR.get_DAZ())
1495 op1.xmm64u(0) = float64_denormal_to_zero(op1.xmm64u(0));
1496 op1.xmm64u(1) = float64_denormal_to_zero(op1.xmm64u(1));
1497 op2.xmm64u(0) = float64_denormal_to_zero(op2.xmm64u(0));
1498 op2.xmm64u(1) = float64_denormal_to_zero(op2.xmm64u(1));
1501 op1.xmm64u(0) = float64_add(op1.xmm64u(0), op2.xmm64u(0), status_word);
1502 op1.xmm64u(1) = float64_add(op1.xmm64u(1), op2.xmm64u(1), status_word);
1504 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1505 BX_WRITE_XMM_REG(i->nnn(), op1);
1507 #else
1508 BX_INFO(("ADDPD_VpdWpd: required SSE2, use --enable-sse option"));
1509 exception(BX_UD_EXCEPTION, 0, 0);
1510 #endif
1514 * Opcode: F2 0F 58
1515 * Add the lower double precision FP number from XMM2/MEM to XMM1.
1516 * Possible floating point exceptions: #I, #D, #O, #U, #P
1518 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSD_VsdWsd(bxInstruction_c *i)
1520 #if BX_SUPPORT_SSE >= 2
1521 BX_CPU_THIS_PTR prepareSSE();
1523 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->nnn()), op2;
1525 /* op2 is a register or memory reference */
1526 if (i->modC0()) {
1527 op2 = BX_READ_XMM_REG_LO_QWORD(i->rm());
1529 else {
1530 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1531 /* pointer, segment address pair */
1532 op2 = read_virtual_qword(i->seg(), eaddr);
1535 float_status_t status_word;
1536 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1538 if (MXCSR.get_DAZ())
1540 op1 = float64_denormal_to_zero(op1);
1541 op2 = float64_denormal_to_zero(op2);
1544 op1 = float64_add(op1, op2, status_word);
1545 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1546 BX_WRITE_XMM_REG_LO_QWORD(i->nnn(), op1);
1548 #else
1549 BX_INFO(("ADDSD_VsdWsd: required SSE2, use --enable-sse option"));
1550 exception(BX_UD_EXCEPTION, 0, 0);
1551 #endif
1555 * Opcode: F3 0F 58
1556 * Add the lower single precision FP number from XMM2/MEM to XMM1.
1557 * Possible floating point exceptions: #I, #D, #O, #U, #P
1559 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSS_VssWss(bxInstruction_c *i)
1561 #if BX_SUPPORT_SSE >= 1
1562 BX_CPU_THIS_PTR prepareSSE();
1564 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->nnn()), op2;
1566 /* op2 is a register or memory reference */
1567 if (i->modC0()) {
1568 op2 = BX_READ_XMM_REG_LO_DWORD(i->rm());
1570 else {
1571 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1572 /* pointer, segment address pair */
1573 op2 = read_virtual_dword(i->seg(), eaddr);
1576 float_status_t status_word;
1577 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1579 if (MXCSR.get_DAZ())
1581 op1 = float32_denormal_to_zero(op1);
1582 op2 = float32_denormal_to_zero(op2);
1585 op1 = float32_add(op1, op2, status_word);
1586 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1587 BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), op1);
1589 #else
1590 BX_INFO(("ADDSS_VssWss: required SSE, use --enable-sse option"));
1591 exception(BX_UD_EXCEPTION, 0, 0);
1592 #endif
1596 * Opcode: 0F 59
1597 * Multiply packed single precision FP numbers from XMM2/MEM to XMM1.
1598 * Possible floating point exceptions: #I, #D, #O, #U, #P
1600 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULPS_VpsWps(bxInstruction_c *i)
1602 #if BX_SUPPORT_SSE >= 1
1603 BX_CPU_THIS_PTR prepareSSE();
1605 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1607 /* op2 is a register or memory reference */
1608 if (i->modC0()) {
1609 op2 = BX_READ_XMM_REG(i->rm());
1611 else {
1612 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1613 /* pointer, segment address pair */
1614 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1617 float_status_t status_word;
1618 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1620 if (MXCSR.get_DAZ()) {
1621 op1.xmm32u(0) = float32_denormal_to_zero(op1.xmm32u(0));
1622 op1.xmm32u(1) = float32_denormal_to_zero(op1.xmm32u(1));
1623 op1.xmm32u(2) = float32_denormal_to_zero(op1.xmm32u(2));
1624 op1.xmm32u(3) = float32_denormal_to_zero(op1.xmm32u(3));
1626 op2.xmm32u(0) = float32_denormal_to_zero(op2.xmm32u(0));
1627 op2.xmm32u(1) = float32_denormal_to_zero(op2.xmm32u(1));
1628 op2.xmm32u(2) = float32_denormal_to_zero(op2.xmm32u(2));
1629 op2.xmm32u(3) = float32_denormal_to_zero(op2.xmm32u(3));
1632 op1.xmm32u(0) = float32_mul(op1.xmm32u(0), op2.xmm32u(0), status_word);
1633 op1.xmm32u(1) = float32_mul(op1.xmm32u(1), op2.xmm32u(1), status_word);
1634 op1.xmm32u(2) = float32_mul(op1.xmm32u(2), op2.xmm32u(2), status_word);
1635 op1.xmm32u(3) = float32_mul(op1.xmm32u(3), op2.xmm32u(3), status_word);
1637 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1638 BX_WRITE_XMM_REG(i->nnn(), op1);
1640 #else
1641 BX_INFO(("MULPS_VpsWps: required SSE, use --enable-sse option"));
1642 exception(BX_UD_EXCEPTION, 0, 0);
1643 #endif
1647 * Opcode: 66 0F 59
1648 * Multiply packed double precision FP numbers from XMM2/MEM to XMM1.
1649 * Possible floating point exceptions: #I, #D, #O, #U, #P
1651 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULPD_VpdWpd(bxInstruction_c *i)
1653 #if BX_SUPPORT_SSE >= 2
1654 BX_CPU_THIS_PTR prepareSSE();
1656 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1658 /* op2 is a register or memory reference */
1659 if (i->modC0()) {
1660 op2 = BX_READ_XMM_REG(i->rm());
1662 else {
1663 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1664 /* pointer, segment address pair */
1665 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1668 float_status_t status_word;
1669 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1671 if (MXCSR.get_DAZ())
1673 op1.xmm64u(0) = float64_denormal_to_zero(op1.xmm64u(0));
1674 op1.xmm64u(1) = float64_denormal_to_zero(op1.xmm64u(1));
1675 op2.xmm64u(0) = float64_denormal_to_zero(op2.xmm64u(0));
1676 op2.xmm64u(1) = float64_denormal_to_zero(op2.xmm64u(1));
1679 op1.xmm64u(0) = float64_mul(op1.xmm64u(0), op2.xmm64u(0), status_word);
1680 op1.xmm64u(1) = float64_mul(op1.xmm64u(1), op2.xmm64u(1), status_word);
1682 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1683 BX_WRITE_XMM_REG(i->nnn(), op1);
1685 #else
1686 BX_INFO(("MULPD_VpdWpd: required SSE2, use --enable-sse option"));
1687 exception(BX_UD_EXCEPTION, 0, 0);
1688 #endif
1692 * Opcode: F2 0F 59
1693 * Multiply the lower double precision FP number from XMM2/MEM to XMM1.
1694 * Possible floating point exceptions: #I, #D, #O, #U, #P
1696 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULSD_VsdWsd(bxInstruction_c *i)
1698 #if BX_SUPPORT_SSE >= 2
1699 BX_CPU_THIS_PTR prepareSSE();
1701 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->nnn()), op2;
1703 /* op2 is a register or memory reference */
1704 if (i->modC0()) {
1705 op2 = BX_READ_XMM_REG_LO_QWORD(i->rm());
1707 else {
1708 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1709 /* pointer, segment address pair */
1710 op2 = read_virtual_qword(i->seg(), eaddr);
1713 float_status_t status_word;
1714 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1716 if (MXCSR.get_DAZ())
1718 op1 = float64_denormal_to_zero(op1);
1719 op2 = float64_denormal_to_zero(op2);
1722 op1 = float64_mul(op1, op2, status_word);
1723 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1724 BX_WRITE_XMM_REG_LO_QWORD(i->nnn(), op1);
1726 #else
1727 BX_INFO(("MULSD_VsdWsd: required SSE2, use --enable-sse option"));
1728 exception(BX_UD_EXCEPTION, 0, 0);
1729 #endif
1733 * Opcode: F3 0F 59
1734 * Multiply the lower single precision FP number from XMM2/MEM to XMM1.
1735 * Possible floating point exceptions: #I, #D, #O, #U, #P
1737 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULSS_VssWss(bxInstruction_c *i)
1739 #if BX_SUPPORT_SSE >= 1
1740 BX_CPU_THIS_PTR prepareSSE();
1742 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->nnn()), op2;
1744 /* op2 is a register or memory reference */
1745 if (i->modC0()) {
1746 op2 = BX_READ_XMM_REG_LO_DWORD(i->rm());
1748 else {
1749 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1750 /* pointer, segment address pair */
1751 op2 = read_virtual_dword(i->seg(), eaddr);
1754 float_status_t status_word;
1755 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1757 if (MXCSR.get_DAZ())
1759 op1 = float32_denormal_to_zero(op1);
1760 op2 = float32_denormal_to_zero(op2);
1763 op1 = float32_mul(op1, op2, status_word);
1764 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1765 BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), op1);
1767 #else
1768 BX_INFO(("MULSS_VssWss: required SSE, use --enable-sse option"));
1769 exception(BX_UD_EXCEPTION, 0, 0);
1770 #endif
1774 * Opcode: 0F 5C
1775 * Subtract packed single precision FP numbers from XMM2/MEM to XMM1.
1776 * Possible floating point exceptions: #I, #D, #O, #U, #P
1778 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBPS_VpsWps(bxInstruction_c *i)
1780 #if BX_SUPPORT_SSE >= 1
1781 BX_CPU_THIS_PTR prepareSSE();
1783 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1785 /* op2 is a register or memory reference */
1786 if (i->modC0()) {
1787 op2 = BX_READ_XMM_REG(i->rm());
1789 else {
1790 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1791 /* pointer, segment address pair */
1792 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1795 float_status_t status_word;
1796 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1798 if (MXCSR.get_DAZ()) {
1799 op1.xmm32u(0) = float32_denormal_to_zero(op1.xmm32u(0));
1800 op1.xmm32u(1) = float32_denormal_to_zero(op1.xmm32u(1));
1801 op1.xmm32u(2) = float32_denormal_to_zero(op1.xmm32u(2));
1802 op1.xmm32u(3) = float32_denormal_to_zero(op1.xmm32u(3));
1804 op2.xmm32u(0) = float32_denormal_to_zero(op2.xmm32u(0));
1805 op2.xmm32u(1) = float32_denormal_to_zero(op2.xmm32u(1));
1806 op2.xmm32u(2) = float32_denormal_to_zero(op2.xmm32u(2));
1807 op2.xmm32u(3) = float32_denormal_to_zero(op2.xmm32u(3));
1810 op1.xmm32u(0) = float32_sub(op1.xmm32u(0), op2.xmm32u(0), status_word);
1811 op1.xmm32u(1) = float32_sub(op1.xmm32u(1), op2.xmm32u(1), status_word);
1812 op1.xmm32u(2) = float32_sub(op1.xmm32u(2), op2.xmm32u(2), status_word);
1813 op1.xmm32u(3) = float32_sub(op1.xmm32u(3), op2.xmm32u(3), status_word);
1815 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1816 BX_WRITE_XMM_REG(i->nnn(), op1);
1818 #else
1819 BX_INFO(("SUBPS_VpsWps: required SSE, use --enable-sse option"));
1820 exception(BX_UD_EXCEPTION, 0, 0);
1821 #endif
1825 * Opcode: 66 0F 5C
1826 * Subtract packed double precision FP numbers from XMM2/MEM to XMM1.
1827 * Possible floating point exceptions: #I, #D, #O, #U, #P
1829 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBPD_VpdWpd(bxInstruction_c *i)
1831 #if BX_SUPPORT_SSE >= 2
1832 BX_CPU_THIS_PTR prepareSSE();
1834 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1836 /* op2 is a register or memory reference */
1837 if (i->modC0()) {
1838 op2 = BX_READ_XMM_REG(i->rm());
1840 else {
1841 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1842 /* pointer, segment address pair */
1843 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1846 float_status_t status_word;
1847 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1849 if (MXCSR.get_DAZ())
1851 op1.xmm64u(0) = float64_denormal_to_zero(op1.xmm64u(0));
1852 op1.xmm64u(1) = float64_denormal_to_zero(op1.xmm64u(1));
1853 op2.xmm64u(0) = float64_denormal_to_zero(op2.xmm64u(0));
1854 op2.xmm64u(1) = float64_denormal_to_zero(op2.xmm64u(1));
1857 op1.xmm64u(0) = float64_sub(op1.xmm64u(0), op2.xmm64u(0), status_word);
1858 op1.xmm64u(1) = float64_sub(op1.xmm64u(1), op2.xmm64u(1), status_word);
1860 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1861 BX_WRITE_XMM_REG(i->nnn(), op1);
1863 #else
1864 BX_INFO(("SUBPD_VpdWpd: required SSE2, use --enable-sse option"));
1865 exception(BX_UD_EXCEPTION, 0, 0);
1866 #endif
1870 * Opcode: F2 0F 5C
1871 * Subtract the lower double precision FP number from XMM2/MEM to XMM1.
1872 * Possible floating point exceptions: #I, #D, #O, #U, #P
1874 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBSD_VsdWsd(bxInstruction_c *i)
1876 #if BX_SUPPORT_SSE >= 2
1877 BX_CPU_THIS_PTR prepareSSE();
1879 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->nnn()), op2;
1881 /* op2 is a register or memory reference */
1882 if (i->modC0()) {
1883 op2 = BX_READ_XMM_REG_LO_QWORD(i->rm());
1885 else {
1886 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1887 /* pointer, segment address pair */
1888 op2 = read_virtual_qword(i->seg(), eaddr);
1891 float_status_t status_word;
1892 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1894 if (MXCSR.get_DAZ())
1896 op1 = float64_denormal_to_zero(op1);
1897 op2 = float64_denormal_to_zero(op2);
1900 op1 = float64_sub(op1, op2, status_word);
1901 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1902 BX_WRITE_XMM_REG_LO_QWORD(i->nnn(), op1);
1904 #else
1905 BX_INFO(("SUBSD_VsdWsd: required SSE2, use --enable-sse option"));
1906 exception(BX_UD_EXCEPTION, 0, 0);
1907 #endif
1911 * Opcode: F3 0F 5C
1912 * Subtract the lower single precision FP number from XMM2/MEM to XMM1.
1913 * Possible floating point exceptions: #I, #D, #O, #U, #P
1915 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBSS_VssWss(bxInstruction_c *i)
1917 #if BX_SUPPORT_SSE >= 1
1918 BX_CPU_THIS_PTR prepareSSE();
1920 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->nnn()), op2;
1922 /* op2 is a register or memory reference */
1923 if (i->modC0()) {
1924 op2 = BX_READ_XMM_REG_LO_DWORD(i->rm());
1926 else {
1927 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1928 /* pointer, segment address pair */
1929 op2 = read_virtual_dword(i->seg(), eaddr);
1932 float_status_t status_word;
1933 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1935 if (MXCSR.get_DAZ())
1937 op1 = float32_denormal_to_zero(op1);
1938 op2 = float32_denormal_to_zero(op2);
1941 op1 = float32_sub(op1, op2, status_word);
1942 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1943 BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), op1);
1945 #else
1946 BX_INFO(("SUBSS_VssWss: required SSE, use --enable-sse option"));
1947 exception(BX_UD_EXCEPTION, 0, 0);
1948 #endif
1952 * Opcode: 0F 5D
1953 * Calculate the minimum single precision FP between XMM2/MEM to XMM1.
1954 * Possible floating point exceptions: #I, #D
1956 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINPS_VpsWps(bxInstruction_c *i)
1958 #if BX_SUPPORT_SSE >= 1
1959 BX_CPU_THIS_PTR prepareSSE();
1961 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
1963 /* op2 is a register or memory reference */
1964 if (i->modC0()) {
1965 op2 = BX_READ_XMM_REG(i->rm());
1967 else {
1968 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
1969 /* pointer, segment address pair */
1970 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
1973 float_status_t status_word;
1974 mxcsr_to_softfloat_status_word(status_word, MXCSR);
1975 int rc;
1977 if (MXCSR.get_DAZ()) {
1978 op1.xmm32u(0) = float32_denormal_to_zero(op1.xmm32u(0));
1979 op1.xmm32u(1) = float32_denormal_to_zero(op1.xmm32u(1));
1980 op1.xmm32u(2) = float32_denormal_to_zero(op1.xmm32u(2));
1981 op1.xmm32u(3) = float32_denormal_to_zero(op1.xmm32u(3));
1983 op2.xmm32u(0) = float32_denormal_to_zero(op2.xmm32u(0));
1984 op2.xmm32u(1) = float32_denormal_to_zero(op2.xmm32u(1));
1985 op2.xmm32u(2) = float32_denormal_to_zero(op2.xmm32u(2));
1986 op2.xmm32u(3) = float32_denormal_to_zero(op2.xmm32u(3));
1989 rc = float32_compare(op1.xmm32u(0), op2.xmm32u(0), status_word);
1990 op1.xmm32u(0) = (rc == float_relation_less) ? op1.xmm32u(0) : op2.xmm32u(0);
1991 rc = float32_compare(op1.xmm32u(1), op2.xmm32u(1), status_word);
1992 op1.xmm32u(1) = (rc == float_relation_less) ? op1.xmm32u(1) : op2.xmm32u(1);
1993 rc = float32_compare(op1.xmm32u(2), op2.xmm32u(2), status_word);
1994 op1.xmm32u(2) = (rc == float_relation_less) ? op1.xmm32u(2) : op2.xmm32u(2);
1995 rc = float32_compare(op1.xmm32u(3), op2.xmm32u(3), status_word);
1996 op1.xmm32u(3) = (rc == float_relation_less) ? op1.xmm32u(3) : op2.xmm32u(3);
1998 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
1999 BX_WRITE_XMM_REG(i->nnn(), op1);
2001 #else
2002 BX_INFO(("MINPS_VpsWps: required SSE, use --enable-sse option"));
2003 exception(BX_UD_EXCEPTION, 0, 0);
2004 #endif
2008 * Opcode: 66 0F 5D
2009 * Calculate the minimum double precision FP between XMM2/MEM to XMM1.
2010 * Possible floating point exceptions: #I, #D
2012 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINPD_VpdWpd(bxInstruction_c *i)
2014 #if BX_SUPPORT_SSE >= 2
2015 BX_CPU_THIS_PTR prepareSSE();
2017 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2019 /* op2 is a register or memory reference */
2020 if (i->modC0()) {
2021 op2 = BX_READ_XMM_REG(i->rm());
2023 else {
2024 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2025 /* pointer, segment address pair */
2026 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2029 float_status_t status_word;
2030 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2031 int rc;
2033 if (MXCSR.get_DAZ())
2035 op1.xmm64u(0) = float64_denormal_to_zero(op1.xmm64u(0));
2036 op1.xmm64u(1) = float64_denormal_to_zero(op1.xmm64u(1));
2037 op2.xmm64u(0) = float64_denormal_to_zero(op2.xmm64u(0));
2038 op2.xmm64u(1) = float64_denormal_to_zero(op2.xmm64u(1));
2041 rc = float64_compare(op1.xmm64u(0), op2.xmm64u(0), status_word);
2042 op1.xmm64u(0) = (rc == float_relation_less) ? op1.xmm64u(0) : op2.xmm64u(0);
2043 rc = float64_compare(op1.xmm64u(1), op2.xmm64u(1), status_word);
2044 op1.xmm64u(1) = (rc == float_relation_less) ? op1.xmm64u(1) : op2.xmm64u(1);
2046 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2047 BX_WRITE_XMM_REG(i->nnn(), op1);
2049 #else
2050 BX_INFO(("MINPD_VpdWpd: required SSE2, use --enable-sse option"));
2051 exception(BX_UD_EXCEPTION, 0, 0);
2052 #endif
2056 * Opcode: F2 0F 5D
2057 * Calculate the minimum scalar double precision FP between XMM2/MEM to XMM1.
2058 * Possible floating point exceptions: #I, #D
2060 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINSD_VsdWsd(bxInstruction_c *i)
2062 #if BX_SUPPORT_SSE >= 2
2063 BX_CPU_THIS_PTR prepareSSE();
2065 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->nnn()), op2;
2067 /* op2 is a register or memory reference */
2068 if (i->modC0()) {
2069 op2 = BX_READ_XMM_REG_LO_QWORD(i->rm());
2071 else {
2072 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2073 /* pointer, segment address pair */
2074 op2 = read_virtual_qword(i->seg(), eaddr);
2077 float_status_t status_word;
2078 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2080 if (MXCSR.get_DAZ())
2082 op1 = float64_denormal_to_zero(op1);
2083 op2 = float64_denormal_to_zero(op2);
2086 int rc = float64_compare(op1, op2, status_word);
2087 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2088 BX_WRITE_XMM_REG_LO_QWORD(i->nnn(),
2089 (rc == float_relation_less) ? op1 : op2);
2091 #else
2092 BX_INFO(("MINSD_VsdWsd: required SSE2, use --enable-sse option"));
2093 exception(BX_UD_EXCEPTION, 0, 0);
2094 #endif
2098 * Opcode: F3 0F 5D
2099 * Calculate the minimum scalar single precision FP between XMM2/MEM to XMM1.
2100 * Possible floating point exceptions: #I, #D
2102 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINSS_VssWss(bxInstruction_c *i)
2104 #if BX_SUPPORT_SSE >= 1
2105 BX_CPU_THIS_PTR prepareSSE();
2107 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->nnn()), op2;
2109 /* op2 is a register or memory reference */
2110 if (i->modC0()) {
2111 op2 = BX_READ_XMM_REG_LO_DWORD(i->rm());
2113 else {
2114 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2115 /* pointer, segment address pair */
2116 op2 = read_virtual_dword(i->seg(), eaddr);
2119 float_status_t status_word;
2120 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2122 if (MXCSR.get_DAZ())
2124 op1 = float32_denormal_to_zero(op1);
2125 op2 = float32_denormal_to_zero(op2);
2128 int rc = float32_compare(op1, op2, status_word);
2129 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2130 BX_WRITE_XMM_REG_LO_DWORD(i->nnn(),
2131 (rc == float_relation_less) ? op1 : op2);
2133 #else
2134 BX_INFO(("MINSS_VssWss: required SSE, use --enable-sse option"));
2135 exception(BX_UD_EXCEPTION, 0, 0);
2136 #endif
2140 * Opcode: 0F 5E
2141 * Divide packed single precision FP numbers from XMM2/MEM to XMM1.
2142 * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
2144 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVPS_VpsWps(bxInstruction_c *i)
2146 #if BX_SUPPORT_SSE >= 1
2147 BX_CPU_THIS_PTR prepareSSE();
2149 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2151 /* op2 is a register or memory reference */
2152 if (i->modC0()) {
2153 op2 = BX_READ_XMM_REG(i->rm());
2155 else {
2156 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2157 /* pointer, segment address pair */
2158 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2161 float_status_t status_word;
2162 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2164 if (MXCSR.get_DAZ()) {
2165 op1.xmm32u(0) = float32_denormal_to_zero(op1.xmm32u(0));
2166 op1.xmm32u(1) = float32_denormal_to_zero(op1.xmm32u(1));
2167 op1.xmm32u(2) = float32_denormal_to_zero(op1.xmm32u(2));
2168 op1.xmm32u(3) = float32_denormal_to_zero(op1.xmm32u(3));
2170 op2.xmm32u(0) = float32_denormal_to_zero(op2.xmm32u(0));
2171 op2.xmm32u(1) = float32_denormal_to_zero(op2.xmm32u(1));
2172 op2.xmm32u(2) = float32_denormal_to_zero(op2.xmm32u(2));
2173 op2.xmm32u(3) = float32_denormal_to_zero(op2.xmm32u(3));
2176 op1.xmm32u(0) = float32_div(op1.xmm32u(0), op2.xmm32u(0), status_word);
2177 op1.xmm32u(1) = float32_div(op1.xmm32u(1), op2.xmm32u(1), status_word);
2178 op1.xmm32u(2) = float32_div(op1.xmm32u(2), op2.xmm32u(2), status_word);
2179 op1.xmm32u(3) = float32_div(op1.xmm32u(3), op2.xmm32u(3), status_word);
2181 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2182 BX_WRITE_XMM_REG(i->nnn(), op1);
2184 #else
2185 BX_INFO(("DIVPS_VpsWps: required SSE, use --enable-sse option"));
2186 exception(BX_UD_EXCEPTION, 0, 0);
2187 #endif
2191 * Opcode: 66 0F 5E
2192 * Divide packed double precision FP numbers from XMM2/MEM to XMM1.
2193 * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
2195 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVPD_VpdWpd(bxInstruction_c *i)
2197 #if BX_SUPPORT_SSE >= 2
2198 BX_CPU_THIS_PTR prepareSSE();
2200 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2202 /* op2 is a register or memory reference */
2203 if (i->modC0()) {
2204 op2 = BX_READ_XMM_REG(i->rm());
2206 else {
2207 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2208 /* pointer, segment address pair */
2209 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2212 float_status_t status_word;
2213 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2215 if (MXCSR.get_DAZ())
2217 op1.xmm64u(0) = float64_denormal_to_zero(op1.xmm64u(0));
2218 op1.xmm64u(1) = float64_denormal_to_zero(op1.xmm64u(1));
2219 op2.xmm64u(0) = float64_denormal_to_zero(op2.xmm64u(0));
2220 op2.xmm64u(1) = float64_denormal_to_zero(op2.xmm64u(1));
2223 op1.xmm64u(0) = float64_div(op1.xmm64u(0), op2.xmm64u(0), status_word);
2224 op1.xmm64u(1) = float64_div(op1.xmm64u(1), op2.xmm64u(1), status_word);
2226 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2227 BX_WRITE_XMM_REG(i->nnn(), op1);
2229 #else
2230 BX_INFO(("DIVPD_VpdWpd: required SSE2, use --enable-sse option"));
2231 exception(BX_UD_EXCEPTION, 0, 0);
2232 #endif
2236 * Opcode: F2 0F 5E
2237 * Divide the lower double precision FP number from XMM2/MEM to XMM1.
2238 * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
2240 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVSD_VsdWsd(bxInstruction_c *i)
2242 #if BX_SUPPORT_SSE >= 2
2243 BX_CPU_THIS_PTR prepareSSE();
2245 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->nnn()), op2;
2247 /* op2 is a register or memory reference */
2248 if (i->modC0()) {
2249 op2 = BX_READ_XMM_REG_LO_QWORD(i->rm());
2251 else {
2252 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2253 /* pointer, segment address pair */
2254 op2 = read_virtual_qword(i->seg(), eaddr);
2257 float_status_t status_word;
2258 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2260 if (MXCSR.get_DAZ())
2262 op1 = float64_denormal_to_zero(op1);
2263 op2 = float64_denormal_to_zero(op2);
2266 op1 = float64_div(op1, op2, status_word);
2267 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2268 BX_WRITE_XMM_REG_LO_QWORD(i->nnn(), op1);
2270 #else
2271 BX_INFO(("DIVSD_VsdWsd: required SSE2, use --enable-sse option"));
2272 exception(BX_UD_EXCEPTION, 0, 0);
2273 #endif
2277 * Opcode: F3 0F 5E
2278 * Divide the lower single precision FP number from XMM2/MEM to XMM1.
2279 * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
2281 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVSS_VssWss(bxInstruction_c *i)
2283 #if BX_SUPPORT_SSE >= 1
2284 BX_CPU_THIS_PTR prepareSSE();
2286 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->nnn()), op2;
2288 /* op2 is a register or memory reference */
2289 if (i->modC0()) {
2290 op2 = BX_READ_XMM_REG_LO_DWORD(i->rm());
2292 else {
2293 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2294 /* pointer, segment address pair */
2295 op2 = read_virtual_dword(i->seg(), eaddr);
2298 float_status_t status_word;
2299 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2301 if (MXCSR.get_DAZ())
2303 op1 = float32_denormal_to_zero(op1);
2304 op2 = float32_denormal_to_zero(op2);
2307 op1 = float32_div(op1, op2, status_word);
2308 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2309 BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), op1);
2311 #else
2312 BX_INFO(("DIVSS_VssWss: required SSE, use --enable-sse option"));
2313 exception(BX_UD_EXCEPTION, 0, 0);
2314 #endif
2318 * Opcode: 0F 5F
2319 * Calculate the maximum single precision FP between XMM2/MEM to XMM1.
2320 * Possible floating point exceptions: #I, #D
2322 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXPS_VpsWps(bxInstruction_c *i)
2324 #if BX_SUPPORT_SSE >= 1
2325 BX_CPU_THIS_PTR prepareSSE();
2327 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2329 /* op2 is a register or memory reference */
2330 if (i->modC0()) {
2331 op2 = BX_READ_XMM_REG(i->rm());
2333 else {
2334 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2335 /* pointer, segment address pair */
2336 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2339 float_status_t status_word;
2340 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2341 int rc;
2343 if (MXCSR.get_DAZ()) {
2344 op1.xmm32u(0) = float32_denormal_to_zero(op1.xmm32u(0));
2345 op1.xmm32u(1) = float32_denormal_to_zero(op1.xmm32u(1));
2346 op1.xmm32u(2) = float32_denormal_to_zero(op1.xmm32u(2));
2347 op1.xmm32u(3) = float32_denormal_to_zero(op1.xmm32u(3));
2349 op2.xmm32u(0) = float32_denormal_to_zero(op2.xmm32u(0));
2350 op2.xmm32u(1) = float32_denormal_to_zero(op2.xmm32u(1));
2351 op2.xmm32u(2) = float32_denormal_to_zero(op2.xmm32u(2));
2352 op2.xmm32u(3) = float32_denormal_to_zero(op2.xmm32u(3));
2355 rc = float32_compare(op1.xmm32u(0), op2.xmm32u(0), status_word);
2356 op1.xmm32u(0) = (rc == float_relation_greater) ? op1.xmm32u(0) : op2.xmm32u(0);
2357 rc = float32_compare(op1.xmm32u(1), op2.xmm32u(1), status_word);
2358 op1.xmm32u(1) = (rc == float_relation_greater) ? op1.xmm32u(1) : op2.xmm32u(1);
2359 rc = float32_compare(op1.xmm32u(2), op2.xmm32u(2), status_word);
2360 op1.xmm32u(2) = (rc == float_relation_greater) ? op1.xmm32u(2) : op2.xmm32u(2);
2361 rc = float32_compare(op1.xmm32u(3), op2.xmm32u(3), status_word);
2362 op1.xmm32u(3) = (rc == float_relation_greater) ? op1.xmm32u(3) : op2.xmm32u(3);
2364 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2365 BX_WRITE_XMM_REG(i->nnn(), op1);
2367 #else
2368 BX_INFO(("MAXPS_VpsWps: required SSE, use --enable-sse option"));
2369 exception(BX_UD_EXCEPTION, 0, 0);
2370 #endif
2374 * Opcode: 66 0F 5F
2375 * Calculate the maximum double precision FP between XMM2/MEM to XMM1.
2376 * Possible floating point exceptions: #I, #D
2378 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXPD_VpdWpd(bxInstruction_c *i)
2380 #if BX_SUPPORT_SSE >= 2
2381 BX_CPU_THIS_PTR prepareSSE();
2383 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2385 /* op2 is a register or memory reference */
2386 if (i->modC0()) {
2387 op2 = BX_READ_XMM_REG(i->rm());
2389 else {
2390 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2391 /* pointer, segment address pair */
2392 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2395 float_status_t status_word;
2396 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2397 int rc;
2399 if (MXCSR.get_DAZ())
2401 op1.xmm64u(0) = float64_denormal_to_zero(op1.xmm64u(0));
2402 op1.xmm64u(1) = float64_denormal_to_zero(op1.xmm64u(1));
2403 op2.xmm64u(0) = float64_denormal_to_zero(op2.xmm64u(0));
2404 op2.xmm64u(1) = float64_denormal_to_zero(op2.xmm64u(1));
2407 rc = float64_compare(op1.xmm64u(0), op2.xmm64u(0), status_word);
2408 op1.xmm64u(0) = (rc == float_relation_greater) ? op1.xmm64u(0) : op2.xmm64u(0);
2409 rc = float64_compare(op1.xmm64u(1), op2.xmm64u(1), status_word);
2410 op1.xmm64u(1) = (rc == float_relation_greater) ? op1.xmm64u(1) : op2.xmm64u(1);
2412 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2413 BX_WRITE_XMM_REG(i->nnn(), op1);
2415 #else
2416 BX_INFO(("MAXPD_VpdWpd: required SSE2, use --enable-sse option"));
2417 exception(BX_UD_EXCEPTION, 0, 0);
2418 #endif
2422 * Opcode: F2 0F 5F
2423 * Calculate the maximum scalar double precision FP between XMM2/MEM to XMM1.
2424 * Possible floating point exceptions: #I, #D
2426 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXSD_VsdWsd(bxInstruction_c *i)
2428 #if BX_SUPPORT_SSE >= 2
2429 BX_CPU_THIS_PTR prepareSSE();
2431 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->nnn()), op2;
2433 /* op2 is a register or memory reference */
2434 if (i->modC0()) {
2435 op2 = BX_READ_XMM_REG_LO_QWORD(i->rm());
2437 else {
2438 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2439 /* pointer, segment address pair */
2440 op2 = read_virtual_qword(i->seg(), eaddr);
2443 float_status_t status_word;
2444 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2446 if (MXCSR.get_DAZ())
2448 op1 = float64_denormal_to_zero(op1);
2449 op2 = float64_denormal_to_zero(op2);
2452 int rc = float64_compare(op1, op2, status_word);
2453 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2454 BX_WRITE_XMM_REG_LO_QWORD(i->nnn(),
2455 (rc == float_relation_greater) ? op1 : op2);
2457 #else
2458 BX_INFO(("MAXSD_VsdWsd: required SSE2, use --enable-sse option"));
2459 exception(BX_UD_EXCEPTION, 0, 0);
2460 #endif
2464 * Opcode: F3 0F 5F
2465 * Calculate the maxumim scalar single precision FP between XMM2/MEM to XMM1.
2466 * Possible floating point exceptions: #I, #D
2468 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXSS_VssWss(bxInstruction_c *i)
2470 #if BX_SUPPORT_SSE >= 1
2471 BX_CPU_THIS_PTR prepareSSE();
2473 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->nnn()), op2;
2475 /* op2 is a register or memory reference */
2476 if (i->modC0()) {
2477 op2 = BX_READ_XMM_REG_LO_DWORD(i->rm());
2479 else {
2480 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2481 /* pointer, segment address pair */
2482 op2 = read_virtual_dword(i->seg(), eaddr);
2485 float_status_t status_word;
2486 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2488 if (MXCSR.get_DAZ())
2490 op1 = float32_denormal_to_zero(op1);
2491 op2 = float32_denormal_to_zero(op2);
2494 int rc = float32_compare(op1, op2, status_word);
2495 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2496 BX_WRITE_XMM_REG_LO_DWORD(i->nnn(),
2497 (rc == float_relation_greater) ? op1 : op2);
2499 #else
2500 BX_INFO(("MAXSS_VssWss: required SSE, use --enable-sse option"));
2501 exception(BX_UD_EXCEPTION, 0, 0);
2502 #endif
2506 * Opcode: 66 0F 7C
2507 * Add horizontally packed double precision FP in XMM2/MEM from XMM1.
2508 * Possible floating point exceptions: #I, #D, #O, #U, #P
2510 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HADDPD_VpdWpd(bxInstruction_c *i)
2512 #if BX_SUPPORT_SSE >= 3
2513 BX_CPU_THIS_PTR prepareSSE();
2515 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2517 /* op2 is a register or memory reference */
2518 if (i->modC0()) {
2519 op2 = BX_READ_XMM_REG(i->rm());
2521 else {
2522 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2523 /* pointer, segment address pair */
2524 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2527 float_status_t status_word;
2528 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2530 if (MXCSR.get_DAZ())
2532 op1.xmm64u(0) = float64_denormal_to_zero(op1.xmm64u(0));
2533 op1.xmm64u(1) = float64_denormal_to_zero(op1.xmm64u(1));
2534 op2.xmm64u(0) = float64_denormal_to_zero(op2.xmm64u(0));
2535 op2.xmm64u(1) = float64_denormal_to_zero(op2.xmm64u(1));
2538 op1.xmm64u(0) = float64_add(op1.xmm64u(0), op1.xmm64u(1), status_word);
2539 op1.xmm64u(1) = float64_add(op2.xmm64u(0), op2.xmm64u(1), status_word);
2541 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2542 BX_WRITE_XMM_REG(i->nnn(), op1);
2544 #else
2545 BX_INFO(("HADDPD_VpdWpd: required SSE3, use --enable-sse option"));
2546 exception(BX_UD_EXCEPTION, 0, 0);
2547 #endif
2551 * Opcode: F2 0F 7C
2552 * Add horizontally packed single precision FP in XMM2/MEM from XMM1.
2553 * Possible floating point exceptions: #I, #D, #O, #U, #P
2555 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HADDPS_VpsWps(bxInstruction_c *i)
2557 #if BX_SUPPORT_SSE >= 3
2558 BX_CPU_THIS_PTR prepareSSE();
2560 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2562 /* op2 is a register or memory reference */
2563 if (i->modC0()) {
2564 op2 = BX_READ_XMM_REG(i->rm());
2566 else {
2567 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2568 /* pointer, segment address pair */
2569 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2572 float_status_t status_word;
2573 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2575 if (MXCSR.get_DAZ()) {
2576 op1.xmm32u(0) = float32_denormal_to_zero(op1.xmm32u(0));
2577 op1.xmm32u(1) = float32_denormal_to_zero(op1.xmm32u(1));
2578 op1.xmm32u(2) = float32_denormal_to_zero(op1.xmm32u(2));
2579 op1.xmm32u(3) = float32_denormal_to_zero(op1.xmm32u(3));
2581 op2.xmm32u(0) = float32_denormal_to_zero(op2.xmm32u(0));
2582 op2.xmm32u(1) = float32_denormal_to_zero(op2.xmm32u(1));
2583 op2.xmm32u(2) = float32_denormal_to_zero(op2.xmm32u(2));
2584 op2.xmm32u(3) = float32_denormal_to_zero(op2.xmm32u(3));
2587 op1.xmm32u(0) = float32_add(op1.xmm32u(0), op1.xmm32u(1), status_word);
2588 op1.xmm32u(1) = float32_add(op1.xmm32u(2), op1.xmm32u(3), status_word);
2589 op1.xmm32u(2) = float32_add(op2.xmm32u(0), op2.xmm32u(1), status_word);
2590 op1.xmm32u(3) = float32_add(op2.xmm32u(2), op2.xmm32u(3), status_word);
2592 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2593 BX_WRITE_XMM_REG(i->nnn(), op1);
2595 #else
2596 BX_INFO(("HADDPS_VpsWps: required SSE3, use --enable-sse option"));
2597 exception(BX_UD_EXCEPTION, 0, 0);
2598 #endif
2602 * Opcode: 66 0F 7D
2603 * Subtract horizontally packed double precision FP in XMM2/MEM from XMM1.
2604 * Possible floating point exceptions: #I, #D, #O, #U, #P
2606 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HSUBPD_VpdWpd(bxInstruction_c *i)
2608 #if BX_SUPPORT_SSE >= 3
2609 BX_CPU_THIS_PTR prepareSSE();
2611 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2613 /* op2 is a register or memory reference */
2614 if (i->modC0()) {
2615 op2 = BX_READ_XMM_REG(i->rm());
2617 else {
2618 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2619 /* pointer, segment address pair */
2620 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2623 float_status_t status_word;
2624 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2626 if (MXCSR.get_DAZ())
2628 op1.xmm64u(0) = float64_denormal_to_zero(op1.xmm64u(0));
2629 op1.xmm64u(1) = float64_denormal_to_zero(op1.xmm64u(1));
2630 op2.xmm64u(0) = float64_denormal_to_zero(op2.xmm64u(0));
2631 op2.xmm64u(1) = float64_denormal_to_zero(op2.xmm64u(1));
2634 op1.xmm64u(0) = float64_sub(op1.xmm64u(0), op1.xmm64u(1), status_word);
2635 op1.xmm64u(1) = float64_sub(op2.xmm64u(0), op2.xmm64u(1), status_word);
2637 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2638 BX_WRITE_XMM_REG(i->nnn(), op1);
2640 #else
2641 BX_INFO(("HSUBPD_VpdWpd: required SSE3, use --enable-sse option"));
2642 exception(BX_UD_EXCEPTION, 0, 0);
2643 #endif
2647 * Opcode: F2 0F 7D
2648 * Subtract horizontally packed single precision FP in XMM2/MEM from XMM1.
2649 * Possible floating point exceptions: #I, #D, #O, #U, #P
2651 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HSUBPS_VpsWps(bxInstruction_c *i)
2653 #if BX_SUPPORT_SSE >= 3
2654 BX_CPU_THIS_PTR prepareSSE();
2656 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2658 /* op2 is a register or memory reference */
2659 if (i->modC0()) {
2660 op2 = BX_READ_XMM_REG(i->rm());
2662 else {
2663 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2664 /* pointer, segment address pair */
2665 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2668 float_status_t status_word;
2669 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2671 if (MXCSR.get_DAZ()) {
2672 op1.xmm32u(0) = float32_denormal_to_zero(op1.xmm32u(0));
2673 op1.xmm32u(1) = float32_denormal_to_zero(op1.xmm32u(1));
2674 op1.xmm32u(2) = float32_denormal_to_zero(op1.xmm32u(2));
2675 op1.xmm32u(3) = float32_denormal_to_zero(op1.xmm32u(3));
2677 op2.xmm32u(0) = float32_denormal_to_zero(op2.xmm32u(0));
2678 op2.xmm32u(1) = float32_denormal_to_zero(op2.xmm32u(1));
2679 op2.xmm32u(2) = float32_denormal_to_zero(op2.xmm32u(2));
2680 op2.xmm32u(3) = float32_denormal_to_zero(op2.xmm32u(3));
2683 op1.xmm32u(0) = float32_sub(op1.xmm32u(0), op1.xmm32u(1), status_word);
2684 op1.xmm32u(1) = float32_sub(op1.xmm32u(2), op1.xmm32u(3), status_word);
2685 op1.xmm32u(2) = float32_sub(op2.xmm32u(0), op2.xmm32u(1), status_word);
2686 op1.xmm32u(3) = float32_sub(op2.xmm32u(2), op2.xmm32u(3), status_word);
2688 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2689 BX_WRITE_XMM_REG(i->nnn(), op1);
2691 #else
2692 BX_INFO(("HSUBPS_VpsWps: required SSE3, use --enable-sse option"));
2693 exception(BX_UD_EXCEPTION, 0, 0);
2694 #endif
2698 * Opcode: 0F C2
2699 * Compare packed single precision FP values using Ib as comparison predicate.
2700 * Possible floating point exceptions: #I, #D
2702 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPPS_VpsWpsIb(bxInstruction_c *i)
2704 #if BX_SUPPORT_SSE >= 1
2705 BX_CPU_THIS_PTR prepareSSE();
2707 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2709 /* op2 is a register or memory reference */
2710 if (i->modC0()) {
2711 op2 = BX_READ_XMM_REG(i->rm());
2713 else {
2714 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2715 /* pointer, segment address pair */
2716 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2719 float_status_t status;
2720 mxcsr_to_softfloat_status_word(status, MXCSR);
2721 int ib = i->Ib();
2723 /* mask used bits, ignore reserved */
2724 if (ib > 7) {
2725 BX_ERROR(("CMPPS_VpsWpsIb: unrecognized predicate %u", i->Ib()));
2727 ib &= 7;
2729 if (MXCSR.get_DAZ()) {
2730 op1.xmm32u(0) = float32_denormal_to_zero(op1.xmm32u(0));
2731 op1.xmm32u(1) = float32_denormal_to_zero(op1.xmm32u(1));
2732 op1.xmm32u(2) = float32_denormal_to_zero(op1.xmm32u(2));
2733 op1.xmm32u(3) = float32_denormal_to_zero(op1.xmm32u(3));
2735 op2.xmm32u(0) = float32_denormal_to_zero(op2.xmm32u(0));
2736 op2.xmm32u(1) = float32_denormal_to_zero(op2.xmm32u(1));
2737 op2.xmm32u(2) = float32_denormal_to_zero(op2.xmm32u(2));
2738 op2.xmm32u(3) = float32_denormal_to_zero(op2.xmm32u(3));
2741 if(ib < 4)
2743 op1.xmm32u(0) =
2744 compare32[ib](op1.xmm32u(0), op2.xmm32u(0), status) ? 0xFFFFFFFF : 0;
2745 op1.xmm32u(1) =
2746 compare32[ib](op1.xmm32u(1), op2.xmm32u(1), status) ? 0xFFFFFFFF : 0;
2747 op1.xmm32u(2) =
2748 compare32[ib](op1.xmm32u(2), op2.xmm32u(2), status) ? 0xFFFFFFFF : 0;
2749 op1.xmm32u(3) =
2750 compare32[ib](op1.xmm32u(3), op2.xmm32u(3), status) ? 0xFFFFFFFF : 0;
2752 else
2754 ib -= 4;
2756 op1.xmm32u(0) =
2757 compare32[ib](op1.xmm32u(0), op2.xmm32u(0), status) ? 0 : 0xFFFFFFFF;
2758 op1.xmm32u(1) =
2759 compare32[ib](op1.xmm32u(1), op2.xmm32u(1), status) ? 0 : 0xFFFFFFFF;
2760 op1.xmm32u(2) =
2761 compare32[ib](op1.xmm32u(2), op2.xmm32u(2), status) ? 0 : 0xFFFFFFFF;
2762 op1.xmm32u(3) =
2763 compare32[ib](op1.xmm32u(3), op2.xmm32u(3), status) ? 0 : 0xFFFFFFFF;
2766 BX_CPU_THIS_PTR check_exceptionsSSE(status.float_exception_flags);
2767 BX_WRITE_XMM_REG(i->nnn(), op1);
2769 #else
2770 BX_INFO(("CMPPS_VpsWpsIb: required SSE, use --enable-sse option"));
2771 exception(BX_UD_EXCEPTION, 0, 0);
2772 #endif
2776 * Opcode: 66 0F C2
2777 * Compare packed double precision FP values using Ib as comparison predicate.
2778 * Possible floating point exceptions: #I, #D
2780 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPPD_VpdWpdIb(bxInstruction_c *i)
2782 #if BX_SUPPORT_SSE >= 2
2783 BX_CPU_THIS_PTR prepareSSE();
2785 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2787 /* op2 is a register or memory reference */
2788 if (i->modC0()) {
2789 op2 = BX_READ_XMM_REG(i->rm());
2791 else {
2792 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2793 /* pointer, segment address pair */
2794 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2797 float_status_t status;
2798 mxcsr_to_softfloat_status_word(status, MXCSR);
2799 int ib = i->Ib();
2801 /* mask used bits, ignore reserved */
2802 if (ib > 7) {
2803 BX_ERROR(("CMPPD_VpdWpdIb: unrecognized predicate %u", i->Ib()));
2805 ib &= 7;
2807 if (MXCSR.get_DAZ())
2809 op1.xmm64u(0) = float64_denormal_to_zero(op1.xmm64u(0));
2810 op1.xmm64u(1) = float64_denormal_to_zero(op1.xmm64u(1));
2811 op2.xmm64u(0) = float64_denormal_to_zero(op2.xmm64u(0));
2812 op2.xmm64u(1) = float64_denormal_to_zero(op2.xmm64u(1));
2815 if(ib < 4)
2817 op1.xmm64u(0) = compare64[ib](op1.xmm64u(0), op2.xmm64u(0), status) ?
2818 BX_CONST64(0xFFFFFFFFFFFFFFFF) : 0;
2819 op1.xmm64u(1) = compare64[ib](op1.xmm64u(1), op2.xmm64u(1), status) ?
2820 BX_CONST64(0xFFFFFFFFFFFFFFFF) : 0;
2822 else
2824 ib -= 4;
2826 op1.xmm64u(0) = compare64[ib](op1.xmm64u(0), op2.xmm64u(0), status) ?
2827 0 : BX_CONST64(0xFFFFFFFFFFFFFFFF);
2828 op1.xmm64u(1) = compare64[ib](op1.xmm64u(1), op2.xmm64u(1), status) ?
2829 0 : BX_CONST64(0xFFFFFFFFFFFFFFFF);
2832 BX_CPU_THIS_PTR check_exceptionsSSE(status.float_exception_flags);
2833 BX_WRITE_XMM_REG(i->nnn(), op1);
2835 #else
2836 BX_INFO(("CMPPD_VpdWpdIb: required SSE2, use --enable-sse option"));
2837 exception(BX_UD_EXCEPTION, 0, 0);
2838 #endif
2842 * Opcode: F2 0F C2
2843 * Compare double precision FP values using Ib as comparison predicate.
2844 * Possible floating point exceptions: #I, #D
2846 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPSD_VsdWsdIb(bxInstruction_c *i)
2848 #if BX_SUPPORT_SSE >= 2
2849 BX_CPU_THIS_PTR prepareSSE();
2851 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->nnn()), op2, result = 0;
2853 /* op2 is a register or memory reference */
2854 if (i->modC0()) {
2855 op2 = BX_READ_XMM_REG_LO_QWORD(i->rm());
2857 else {
2858 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2859 /* pointer, segment address pair */
2860 op2 = read_virtual_qword(i->seg(), eaddr);
2863 float_status_t status_word;
2864 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2865 int ib = i->Ib();
2867 /* mask used bits, ignore reserved */
2868 if (ib > 7) {
2869 BX_ERROR(("CMPSD_VsdWsdIb: unrecognized predicate %u", i->Ib()));
2871 ib &= 7;
2873 if (MXCSR.get_DAZ())
2875 op1 = float64_denormal_to_zero(op1);
2876 op2 = float64_denormal_to_zero(op2);
2879 if(ib < 4) {
2880 if(compare64[ib](op1, op2, status_word)) {
2881 result = BX_CONST64(0xFFFFFFFFFFFFFFFF);
2882 } else {
2883 result = 0;
2885 } else {
2886 if(compare64[ib-4](op1, op2, status_word)) {
2887 result = 0;
2888 } else {
2889 result = BX_CONST64(0xFFFFFFFFFFFFFFFF);
2893 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2894 BX_WRITE_XMM_REG_LO_QWORD(i->nnn(), result);
2895 #else
2896 BX_INFO(("CMPSD_VsdWsdIb: required SSE2, use --enable-sse option"));
2897 exception(BX_UD_EXCEPTION, 0, 0);
2898 #endif
2902 * Opcode: F3 0F C2
2903 * Compare single precision FP values using Ib as comparison predicate.
2904 * Possible floating point exceptions: #I, #D
2906 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPSS_VssWssIb(bxInstruction_c *i)
2908 #if BX_SUPPORT_SSE >= 1
2909 BX_CPU_THIS_PTR prepareSSE();
2911 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->nnn()), op2, result = 0;
2913 /* op2 is a register or memory reference */
2914 if (i->modC0()) {
2915 op2 = BX_READ_XMM_REG_LO_DWORD(i->rm());
2917 else {
2918 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2919 /* pointer, segment address pair */
2920 op2 = read_virtual_dword(i->seg(), eaddr);
2923 float_status_t status_word;
2924 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2925 int ib = i->Ib();
2927 /* mask used bits, ignore reserved */
2928 if (ib > 7) {
2929 BX_ERROR(("CMPSS_VssWssIb: unrecognized predicate %u", i->Ib()));
2931 ib &= 7;
2933 if (MXCSR.get_DAZ())
2935 op1 = float32_denormal_to_zero(op1);
2936 op2 = float32_denormal_to_zero(op2);
2939 if(ib < 4) {
2940 if(compare32[ib](op1, op2, status_word)) {
2941 result = 0xFFFFFFFF;
2942 } else {
2943 result = 0;
2945 } else {
2946 if(compare32[ib-4](op1, op2, status_word)) {
2947 result = 0;
2948 } else {
2949 result = 0xFFFFFFFF;
2953 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2954 BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), result);
2955 #else
2956 BX_INFO(("CMPSS_VssWssIb: required SSE, use --enable-sse option"));
2957 exception(BX_UD_EXCEPTION, 0, 0);
2958 #endif
2962 * Opcode: 66 0F D0
2963 * Add/Subtract packed double precision FP numbers from XMM2/MEM to XMM1.
2964 * Possible floating point exceptions: #I, #D, #O, #U, #P
2966 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSUBPD_VpdWpd(bxInstruction_c *i)
2968 #if BX_SUPPORT_SSE >= 3
2969 BX_CPU_THIS_PTR prepareSSE();
2971 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
2973 /* op2 is a register or memory reference */
2974 if (i->modC0()) {
2975 op2 = BX_READ_XMM_REG(i->rm());
2977 else {
2978 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
2979 /* pointer, segment address pair */
2980 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
2983 float_status_t status_word;
2984 mxcsr_to_softfloat_status_word(status_word, MXCSR);
2986 if (MXCSR.get_DAZ())
2988 op1.xmm64u(0) = float64_denormal_to_zero(op1.xmm64u(0));
2989 op1.xmm64u(1) = float64_denormal_to_zero(op1.xmm64u(1));
2990 op2.xmm64u(0) = float64_denormal_to_zero(op2.xmm64u(0));
2991 op2.xmm64u(1) = float64_denormal_to_zero(op2.xmm64u(1));
2994 op1.xmm64u(0) = float64_sub(op1.xmm64u(0), op2.xmm64u(0), status_word);
2995 op1.xmm64u(1) = float64_add(op1.xmm64u(1), op2.xmm64u(1), status_word);
2997 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
2998 BX_WRITE_XMM_REG(i->nnn(), op1);
3000 #else
3001 BX_INFO(("ADDSUBPD_VpdWpd: required SSE3, use --enable-sse option"));
3002 exception(BX_UD_EXCEPTION, 0, 0);
3003 #endif
3007 * Opcode: F2 0F D0
3008 * Add/Substract packed single precision FP numbers from XMM2/MEM to XMM1.
3009 * Possible floating point exceptions: #I, #D, #O, #U, #P
3011 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSUBPS_VpsWps(bxInstruction_c *i)
3013 #if BX_SUPPORT_SSE >= 3
3014 BX_CPU_THIS_PTR prepareSSE();
3016 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
3018 /* op2 is a register or memory reference */
3019 if (i->modC0()) {
3020 op2 = BX_READ_XMM_REG(i->rm());
3022 else {
3023 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3024 /* pointer, segment address pair */
3025 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3028 float_status_t status_word;
3029 mxcsr_to_softfloat_status_word(status_word, MXCSR);
3031 if (MXCSR.get_DAZ()) {
3032 op1.xmm32u(0) = float32_denormal_to_zero(op1.xmm32u(0));
3033 op1.xmm32u(1) = float32_denormal_to_zero(op1.xmm32u(1));
3034 op1.xmm32u(2) = float32_denormal_to_zero(op1.xmm32u(2));
3035 op1.xmm32u(3) = float32_denormal_to_zero(op1.xmm32u(3));
3037 op2.xmm32u(0) = float32_denormal_to_zero(op2.xmm32u(0));
3038 op2.xmm32u(1) = float32_denormal_to_zero(op2.xmm32u(1));
3039 op2.xmm32u(2) = float32_denormal_to_zero(op2.xmm32u(2));
3040 op2.xmm32u(3) = float32_denormal_to_zero(op2.xmm32u(3));
3043 op1.xmm32u(0) = float32_sub(op1.xmm32u(0), op2.xmm32u(0), status_word);
3044 op1.xmm32u(1) = float32_add(op1.xmm32u(1), op2.xmm32u(1), status_word);
3045 op1.xmm32u(2) = float32_sub(op1.xmm32u(2), op2.xmm32u(2), status_word);
3046 op1.xmm32u(3) = float32_add(op1.xmm32u(3), op2.xmm32u(3), status_word);
3048 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
3049 BX_WRITE_XMM_REG(i->nnn(), op1);
3051 #else
3052 BX_INFO(("ADDSUBPS_VpsWps: required SSE3, use --enable-sse option"));
3053 exception(BX_UD_EXCEPTION, 0, 0);
3054 #endif
3057 // for 3-byte opcodes
3058 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
3060 /* 66 0F 3A 08 */
3061 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDPS_VpsWpsIb(bxInstruction_c *i)
3063 #if BX_SUPPORT_SSE >= 4
3064 BX_CPU_THIS_PTR prepareSSE();
3066 BxPackedXmmRegister op;
3068 /* op is a register or memory reference */
3069 if (i->modC0()) {
3070 op = BX_READ_XMM_REG(i->rm());
3072 else {
3073 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3074 /* pointer, segment address pair */
3075 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
3078 float_status_t status_word;
3079 mxcsr_to_softfloat_status_word(status_word, MXCSR);
3080 Bit8u control = i->Ib();
3082 // override MXCSR rounding mode with control coming from imm8
3083 if ((control & 0x4) == 0)
3084 status_word.float_rounding_mode = control & 0x3;
3086 if (MXCSR.get_DAZ()) {
3087 op.xmm32u(0) = float32_denormal_to_zero(op.xmm32u(0));
3088 op.xmm32u(1) = float32_denormal_to_zero(op.xmm32u(1));
3089 op.xmm32u(2) = float32_denormal_to_zero(op.xmm32u(2));
3090 op.xmm32u(3) = float32_denormal_to_zero(op.xmm32u(3));
3093 for (unsigned j=0; j < 4; j++) {
3094 if (float32_is_nan(op.xmm32u(j))) {
3095 op.xmm32u(j) = propagateFloat32NaN(op.xmm32u(j), status_word);
3097 else {
3098 op.xmm32u(j) = float32_to_int32(op.xmm32u(j), status_word);
3099 op.xmm32u(j) = int32_to_float32(op.xmm32u(j), status_word);
3103 // ignore precision exception result
3104 if (control & 0x8)
3105 status_word.float_exception_flags &= ~float_flag_inexact;
3107 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
3108 BX_WRITE_XMM_REG(i->nnn(), op);
3109 #else
3110 BX_INFO(("ROUNDPS_VpsWpsIb: required SSE4, use --enable-sse option"));
3111 exception(BX_UD_EXCEPTION, 0, 0);
3112 #endif
3115 /* 66 0F 3A 09 */
3116 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDPD_VpdWpdIb(bxInstruction_c *i)
3118 #if BX_SUPPORT_SSE >= 4
3119 BX_CPU_THIS_PTR prepareSSE();
3121 BxPackedXmmRegister op;
3123 /* op is a register or memory reference */
3124 if (i->modC0()) {
3125 op = BX_READ_XMM_REG(i->rm());
3127 else {
3128 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3129 /* pointer, segment address pair */
3130 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
3133 float_status_t status_word;
3134 mxcsr_to_softfloat_status_word(status_word, MXCSR);
3135 Bit8u control = i->Ib();
3137 // override MXCSR rounding mode with control coming from imm8
3138 if ((control & 0x4) == 0)
3139 status_word.float_rounding_mode = control & 0x3;
3141 if (MXCSR.get_DAZ()) {
3142 op.xmm64u(0) = float64_denormal_to_zero(op.xmm64u(0));
3143 op.xmm64u(1) = float64_denormal_to_zero(op.xmm64u(1));
3146 for (unsigned j=0; j < 2; j++) {
3147 if (float64_is_nan(op.xmm64u(j))) {
3148 op.xmm64u(j) = propagateFloat64NaN(op.xmm64u(j), status_word);
3150 else {
3151 op.xmm64u(j) = float64_to_int64(op.xmm64u(j), status_word);
3152 op.xmm64u(j) = int64_to_float64(op.xmm64u(j), status_word);
3156 // ignore precision exception result
3157 if (control & 0x8)
3158 status_word.float_exception_flags &= ~float_flag_inexact;
3160 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
3161 BX_WRITE_XMM_REG(i->nnn(), op);
3162 #else
3163 BX_INFO(("ROUNDPD_VpdWpdIb: required SSE4, use --enable-sse option"));
3164 exception(BX_UD_EXCEPTION, 0, 0);
3165 #endif
3168 /* 66 0F 3A 0A */
3169 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDSS_VssWssIb(bxInstruction_c *i)
3171 #if BX_SUPPORT_SSE >= 4
3172 BX_CPU_THIS_PTR prepareSSE();
3174 float32 op;
3176 /* op is a register or memory reference */
3177 if (i->modC0()) {
3178 op = BX_READ_XMM_REG_LO_DWORD(i->rm());
3180 else {
3181 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3182 /* pointer, segment address pair */
3183 op = read_virtual_dword(i->seg(), eaddr);
3186 float_status_t status_word;
3187 mxcsr_to_softfloat_status_word(status_word, MXCSR);
3188 Bit8u control = i->Ib();
3190 // override MXCSR rounding mode with control coming from imm8
3191 if ((control & 0x4) == 0)
3192 status_word.float_rounding_mode = control & 0x3;
3194 if (MXCSR.get_DAZ()) op = float32_denormal_to_zero(op);
3196 if (float32_is_nan(op)) {
3197 op = propagateFloat32NaN(op, status_word);
3199 else {
3200 op = int32_to_float32(float32_to_int32(op, status_word), status_word);
3203 // ignore precision exception result
3204 if (control & 0x8)
3205 status_word.float_exception_flags &= ~float_flag_inexact;
3207 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
3208 BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), op);
3209 #else
3210 BX_INFO(("ROUNDSS_VssWssIb: required SSE4, use --enable-sse option"));
3211 exception(BX_UD_EXCEPTION, 0, 0);
3212 #endif
3215 /* 66 0F 3A 0B */
3216 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDSD_VsdWsdIb(bxInstruction_c *i)
3218 #if BX_SUPPORT_SSE >= 4
3219 BX_CPU_THIS_PTR prepareSSE();
3221 float64 op;
3223 /* op is a register or memory reference */
3224 if (i->modC0()) {
3225 op = BX_READ_XMM_REG_LO_QWORD(i->rm());
3227 else {
3228 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3229 /* pointer, segment address pair */
3230 op = read_virtual_qword(i->seg(), eaddr);
3233 float_status_t status_word;
3234 mxcsr_to_softfloat_status_word(status_word, MXCSR);
3235 Bit8u control = i->Ib();
3237 // override MXCSR rounding mode with control coming from imm8
3238 if ((control & 0x4) == 0)
3239 status_word.float_rounding_mode = control & 0x3;
3241 if (MXCSR.get_DAZ()) op = float64_denormal_to_zero(op);
3243 if (float64_is_nan(op))
3244 op = propagateFloat64NaN(op, status_word);
3245 else
3246 op = int64_to_float64(float64_to_int64(op, status_word), status_word);
3248 // ignore precision exception result
3249 if (control & 0x8)
3250 status_word.float_exception_flags &= ~float_flag_inexact;
3252 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
3253 BX_WRITE_XMM_REG_LO_QWORD(i->nnn(), op);
3254 #else
3255 BX_INFO(("ROUNDSD_VsdWsdIb: required SSE4, use --enable-sse option"));
3256 exception(BX_UD_EXCEPTION, 0, 0);
3257 #endif
3260 /* Opcode: 66 0F 3A 40
3261 * Selectively multiply packed SP floating-point values from xmm1 with
3262 * packed SP floating-point values from xmm2, add and selectively
3263 * store the packed SP floating-point values or zero values to xmm1
3265 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPS_VpsWpsIb(bxInstruction_c *i)
3267 #if BX_SUPPORT_SSE >= 4
3268 BX_CPU_THIS_PTR prepareSSE();
3270 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, tmp;
3271 Bit8u mask = i->Ib();
3273 /* op2 is a register or memory reference */
3274 if (i->modC0()) {
3275 op2 = BX_READ_XMM_REG(i->rm());
3277 else {
3278 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3279 /* pointer, segment address pair */
3280 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3283 float_status_t status_word;
3284 mxcsr_to_softfloat_status_word(status_word, MXCSR);
3286 if (MXCSR.get_DAZ()) {
3287 op1.xmm32u(0) = float32_denormal_to_zero(op1.xmm32u(0));
3288 op1.xmm32u(1) = float32_denormal_to_zero(op1.xmm32u(1));
3289 op1.xmm32u(2) = float32_denormal_to_zero(op1.xmm32u(2));
3290 op1.xmm32u(3) = float32_denormal_to_zero(op1.xmm32u(3));
3292 op2.xmm32u(0) = float32_denormal_to_zero(op2.xmm32u(0));
3293 op2.xmm32u(1) = float32_denormal_to_zero(op2.xmm32u(1));
3294 op2.xmm32u(2) = float32_denormal_to_zero(op2.xmm32u(2));
3295 op2.xmm32u(3) = float32_denormal_to_zero(op2.xmm32u(3));
3298 tmp.xmm64u(0) = tmp.xmm64u(1) = 0;
3300 if (mask & 0x10)
3301 tmp.xmm32u(0) = float32_mul(op1.xmm32u(0), op2.xmm32u(0), status_word);
3302 if (mask & 0x20)
3303 tmp.xmm32u(1) = float32_mul(op1.xmm32u(1), op2.xmm32u(1), status_word);
3304 if (mask & 0x40)
3305 tmp.xmm32u(2) = float32_mul(op1.xmm32u(2), op2.xmm32u(2), status_word);
3306 if (mask & 0x80)
3307 tmp.xmm32u(3) = float32_mul(op1.xmm32u(3), op2.xmm32u(3), status_word);
3309 float32 r1 = float32_add(tmp.xmm32u(0), tmp.xmm32u(1), status_word);
3310 float32 r2 = float32_add(tmp.xmm32u(2), tmp.xmm32u(3), status_word);
3311 float32 r = float32_add(r1, r2, status_word);
3313 op1.xmm64u(0) = op1.xmm64u(1) = 0;
3315 if (mask & 0x01) op1.xmm32u(0) = r;
3316 if (mask & 0x02) op1.xmm32u(1) = r;
3317 if (mask & 0x04) op1.xmm32u(2) = r;
3318 if (mask & 0x08) op1.xmm32u(3) = r;
3320 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
3321 BX_WRITE_XMM_REG(i->nnn(), op1);
3322 #else
3323 BX_INFO(("DPPS_VpsWpsIb: required SSE4, use --enable-sse option"));
3324 exception(BX_UD_EXCEPTION, 0, 0);
3325 #endif
3328 /* Opcode: 66 0F 3A 41
3329 * Selectively multiply packed DP floating-point values from xmm1 with
3330 * packed DP floating-point values from xmm2, add and selectively
3331 * store the packed DP floating-point values or zero values to xmm1
3333 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPD_VpdWpdIb(bxInstruction_c *i)
3335 #if BX_SUPPORT_SSE >= 4
3336 BX_CPU_THIS_PTR prepareSSE();
3338 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, tmp;
3339 Bit8u mask = i->Ib();
3341 /* op2 is a register or memory reference */
3342 if (i->modC0()) {
3343 op2 = BX_READ_XMM_REG(i->rm());
3345 else {
3346 bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
3347 /* pointer, segment address pair */
3348 readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
3351 float_status_t status_word;
3352 mxcsr_to_softfloat_status_word(status_word, MXCSR);
3354 if (MXCSR.get_DAZ()) {
3355 op1.xmm64u(0) = float64_denormal_to_zero(op1.xmm64u(0));
3356 op1.xmm64u(1) = float64_denormal_to_zero(op1.xmm64u(1));
3358 op2.xmm64u(0) = float64_denormal_to_zero(op2.xmm64u(0));
3359 op2.xmm64u(1) = float64_denormal_to_zero(op2.xmm64u(1));
3362 tmp.xmm64u(0) = tmp.xmm64u(1) = 0;
3364 if (mask & 0x10)
3365 tmp.xmm64u(0) = float64_mul(op1.xmm64u(0), op2.xmm64u(0), status_word);
3366 if (mask & 0x20)
3367 tmp.xmm64u(1) = float64_mul(op1.xmm64u(1), op2.xmm64u(1), status_word);
3369 float64 result = float64_add(tmp.xmm64u(0), tmp.xmm64u(1), status_word);
3371 op1.xmm64u(0) = op1.xmm64u(1) = 0;
3373 if (mask & 0x01) op1.xmm64u(0) = result;
3374 if (mask & 0x02) op1.xmm64u(1) = result;
3376 BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
3377 BX_WRITE_XMM_REG(i->nnn(), op1);
3378 #else
3379 BX_INFO(("DPPD_VpdWpdIb: required SSE4, use --enable-sse option"));
3380 exception(BX_UD_EXCEPTION, 0, 0);
3381 #endif
3384 #endif // BX_SUPPORT_SSE >= 4 || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)