2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
24 #include "exec/exec-all.h"
25 #include "exec/cpu_ldst.h"
26 #include "exec/helper-proto.h"
27 #include "fpu/softfloat.h"
28 #include "fpu/softfloat-macros.h"
29 #include "helper-tcg.h"
33 #define FT0 (env->ft0)
34 #define ST0 (env->fpregs[env->fpstt].d)
35 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d)
38 #define FPU_RC_SHIFT 10
39 #define FPU_RC_MASK (3 << FPU_RC_SHIFT)
40 #define FPU_RC_NEAR 0x000
41 #define FPU_RC_DOWN 0x400
42 #define FPU_RC_UP 0x800
43 #define FPU_RC_CHOP 0xc00
45 #define MAXTAN 9223372036854775808.0
47 /* the following deal with x86 long double-precision numbers */
48 #define MAXEXPD 0x7fff
50 #define EXPD(fp) (fp.l.upper & 0x7fff)
51 #define SIGND(fp) ((fp.l.upper) & 0x8000)
52 #define MANTD(fp) (fp.l.lower)
53 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
55 #define FPUS_IE (1 << 0)
56 #define FPUS_DE (1 << 1)
57 #define FPUS_ZE (1 << 2)
58 #define FPUS_OE (1 << 3)
59 #define FPUS_UE (1 << 4)
60 #define FPUS_PE (1 << 5)
61 #define FPUS_SF (1 << 6)
62 #define FPUS_SE (1 << 7)
63 #define FPUS_B (1 << 15)
67 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
68 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
69 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
70 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
71 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
72 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
73 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
74 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
76 static inline void fpush(CPUX86State
*env
)
78 env
->fpstt
= (env
->fpstt
- 1) & 7;
79 env
->fptags
[env
->fpstt
] = 0; /* validate stack entry */
82 static inline void fpop(CPUX86State
*env
)
84 env
->fptags
[env
->fpstt
] = 1; /* invalidate stack entry */
85 env
->fpstt
= (env
->fpstt
+ 1) & 7;
88 static floatx80
do_fldt(X86Access
*ac
, target_ulong ptr
)
92 temp
.l
.lower
= access_ldq(ac
, ptr
);
93 temp
.l
.upper
= access_ldw(ac
, ptr
+ 8);
97 static void do_fstt(X86Access
*ac
, target_ulong ptr
, floatx80 f
)
102 access_stq(ac
, ptr
, temp
.l
.lower
);
103 access_stw(ac
, ptr
+ 8, temp
.l
.upper
);
106 /* x87 FPU helpers */
108 static inline double floatx80_to_double(CPUX86State
*env
, floatx80 a
)
115 u
.f64
= floatx80_to_float64(a
, &env
->fp_status
);
119 static inline floatx80
double_to_floatx80(CPUX86State
*env
, double a
)
127 return float64_to_floatx80(u
.f64
, &env
->fp_status
);
130 static void fpu_set_exception(CPUX86State
*env
, int mask
)
133 if (env
->fpus
& (~env
->fpuc
& FPUC_EM
)) {
134 env
->fpus
|= FPUS_SE
| FPUS_B
;
138 static inline uint8_t save_exception_flags(CPUX86State
*env
)
140 uint8_t old_flags
= get_float_exception_flags(&env
->fp_status
);
141 set_float_exception_flags(0, &env
->fp_status
);
145 static void merge_exception_flags(CPUX86State
*env
, uint8_t old_flags
)
147 uint8_t new_flags
= get_float_exception_flags(&env
->fp_status
);
148 float_raise(old_flags
, &env
->fp_status
);
149 fpu_set_exception(env
,
150 ((new_flags
& float_flag_invalid
? FPUS_IE
: 0) |
151 (new_flags
& float_flag_divbyzero
? FPUS_ZE
: 0) |
152 (new_flags
& float_flag_overflow
? FPUS_OE
: 0) |
153 (new_flags
& float_flag_underflow
? FPUS_UE
: 0) |
154 (new_flags
& float_flag_inexact
? FPUS_PE
: 0) |
155 (new_flags
& float_flag_input_denormal
? FPUS_DE
: 0)));
158 static inline floatx80
helper_fdiv(CPUX86State
*env
, floatx80 a
, floatx80 b
)
160 uint8_t old_flags
= save_exception_flags(env
);
161 floatx80 ret
= floatx80_div(a
, b
, &env
->fp_status
);
162 merge_exception_flags(env
, old_flags
);
166 static void fpu_raise_exception(CPUX86State
*env
, uintptr_t retaddr
)
168 if (env
->cr
[0] & CR0_NE_MASK
) {
169 raise_exception_ra(env
, EXCP10_COPR
, retaddr
);
171 #if !defined(CONFIG_USER_ONLY)
173 fpu_check_raise_ferr_irq(env
);
178 void helper_flds_FT0(CPUX86State
*env
, uint32_t val
)
180 uint8_t old_flags
= save_exception_flags(env
);
187 FT0
= float32_to_floatx80(u
.f
, &env
->fp_status
);
188 merge_exception_flags(env
, old_flags
);
191 void helper_fldl_FT0(CPUX86State
*env
, uint64_t val
)
193 uint8_t old_flags
= save_exception_flags(env
);
200 FT0
= float64_to_floatx80(u
.f
, &env
->fp_status
);
201 merge_exception_flags(env
, old_flags
);
204 void helper_fildl_FT0(CPUX86State
*env
, int32_t val
)
206 FT0
= int32_to_floatx80(val
, &env
->fp_status
);
209 void helper_flds_ST0(CPUX86State
*env
, uint32_t val
)
211 uint8_t old_flags
= save_exception_flags(env
);
218 new_fpstt
= (env
->fpstt
- 1) & 7;
220 env
->fpregs
[new_fpstt
].d
= float32_to_floatx80(u
.f
, &env
->fp_status
);
221 env
->fpstt
= new_fpstt
;
222 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
223 merge_exception_flags(env
, old_flags
);
226 void helper_fldl_ST0(CPUX86State
*env
, uint64_t val
)
228 uint8_t old_flags
= save_exception_flags(env
);
235 new_fpstt
= (env
->fpstt
- 1) & 7;
237 env
->fpregs
[new_fpstt
].d
= float64_to_floatx80(u
.f
, &env
->fp_status
);
238 env
->fpstt
= new_fpstt
;
239 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
240 merge_exception_flags(env
, old_flags
);
243 static FloatX80RoundPrec
tmp_maximise_precision(float_status
*st
)
245 FloatX80RoundPrec old
= get_floatx80_rounding_precision(st
);
246 set_floatx80_rounding_precision(floatx80_precision_x
, st
);
250 void helper_fildl_ST0(CPUX86State
*env
, int32_t val
)
253 FloatX80RoundPrec old
= tmp_maximise_precision(&env
->fp_status
);
255 new_fpstt
= (env
->fpstt
- 1) & 7;
256 env
->fpregs
[new_fpstt
].d
= int32_to_floatx80(val
, &env
->fp_status
);
257 env
->fpstt
= new_fpstt
;
258 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
260 set_floatx80_rounding_precision(old
, &env
->fp_status
);
263 void helper_fildll_ST0(CPUX86State
*env
, int64_t val
)
266 FloatX80RoundPrec old
= tmp_maximise_precision(&env
->fp_status
);
268 new_fpstt
= (env
->fpstt
- 1) & 7;
269 env
->fpregs
[new_fpstt
].d
= int64_to_floatx80(val
, &env
->fp_status
);
270 env
->fpstt
= new_fpstt
;
271 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
273 set_floatx80_rounding_precision(old
, &env
->fp_status
);
276 uint32_t helper_fsts_ST0(CPUX86State
*env
)
278 uint8_t old_flags
= save_exception_flags(env
);
284 u
.f
= floatx80_to_float32(ST0
, &env
->fp_status
);
285 merge_exception_flags(env
, old_flags
);
289 uint64_t helper_fstl_ST0(CPUX86State
*env
)
291 uint8_t old_flags
= save_exception_flags(env
);
297 u
.f
= floatx80_to_float64(ST0
, &env
->fp_status
);
298 merge_exception_flags(env
, old_flags
);
302 int32_t helper_fist_ST0(CPUX86State
*env
)
304 uint8_t old_flags
= save_exception_flags(env
);
307 val
= floatx80_to_int32(ST0
, &env
->fp_status
);
308 if (val
!= (int16_t)val
) {
309 set_float_exception_flags(float_flag_invalid
, &env
->fp_status
);
312 merge_exception_flags(env
, old_flags
);
316 int32_t helper_fistl_ST0(CPUX86State
*env
)
318 uint8_t old_flags
= save_exception_flags(env
);
321 val
= floatx80_to_int32(ST0
, &env
->fp_status
);
322 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
325 merge_exception_flags(env
, old_flags
);
329 int64_t helper_fistll_ST0(CPUX86State
*env
)
331 uint8_t old_flags
= save_exception_flags(env
);
334 val
= floatx80_to_int64(ST0
, &env
->fp_status
);
335 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
336 val
= 0x8000000000000000ULL
;
338 merge_exception_flags(env
, old_flags
);
342 int32_t helper_fistt_ST0(CPUX86State
*env
)
344 uint8_t old_flags
= save_exception_flags(env
);
347 val
= floatx80_to_int32_round_to_zero(ST0
, &env
->fp_status
);
348 if (val
!= (int16_t)val
) {
349 set_float_exception_flags(float_flag_invalid
, &env
->fp_status
);
352 merge_exception_flags(env
, old_flags
);
356 int32_t helper_fisttl_ST0(CPUX86State
*env
)
358 uint8_t old_flags
= save_exception_flags(env
);
361 val
= floatx80_to_int32_round_to_zero(ST0
, &env
->fp_status
);
362 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
365 merge_exception_flags(env
, old_flags
);
369 int64_t helper_fisttll_ST0(CPUX86State
*env
)
371 uint8_t old_flags
= save_exception_flags(env
);
374 val
= floatx80_to_int64_round_to_zero(ST0
, &env
->fp_status
);
375 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
376 val
= 0x8000000000000000ULL
;
378 merge_exception_flags(env
, old_flags
);
382 void helper_fldt_ST0(CPUX86State
*env
, target_ulong ptr
)
387 access_prepare(&ac
, env
, ptr
, 10, MMU_DATA_LOAD
, GETPC());
389 new_fpstt
= (env
->fpstt
- 1) & 7;
390 env
->fpregs
[new_fpstt
].d
= do_fldt(&ac
, ptr
);
391 env
->fpstt
= new_fpstt
;
392 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
395 void helper_fstt_ST0(CPUX86State
*env
, target_ulong ptr
)
399 access_prepare(&ac
, env
, ptr
, 10, MMU_DATA_STORE
, GETPC());
400 do_fstt(&ac
, ptr
, ST0
);
403 void helper_fpush(CPUX86State
*env
)
408 void helper_fpop(CPUX86State
*env
)
413 void helper_fdecstp(CPUX86State
*env
)
415 env
->fpstt
= (env
->fpstt
- 1) & 7;
416 env
->fpus
&= ~0x4700;
419 void helper_fincstp(CPUX86State
*env
)
421 env
->fpstt
= (env
->fpstt
+ 1) & 7;
422 env
->fpus
&= ~0x4700;
427 void helper_ffree_STN(CPUX86State
*env
, int st_index
)
429 env
->fptags
[(env
->fpstt
+ st_index
) & 7] = 1;
432 void helper_fmov_ST0_FT0(CPUX86State
*env
)
437 void helper_fmov_FT0_STN(CPUX86State
*env
, int st_index
)
442 void helper_fmov_ST0_STN(CPUX86State
*env
, int st_index
)
447 void helper_fmov_STN_ST0(CPUX86State
*env
, int st_index
)
452 void helper_fxchg_ST0_STN(CPUX86State
*env
, int st_index
)
463 static const int fcom_ccval
[4] = {0x0100, 0x4000, 0x0000, 0x4500};
465 void helper_fcom_ST0_FT0(CPUX86State
*env
)
467 uint8_t old_flags
= save_exception_flags(env
);
470 ret
= floatx80_compare(ST0
, FT0
, &env
->fp_status
);
471 env
->fpus
= (env
->fpus
& ~0x4500) | fcom_ccval
[ret
+ 1];
472 merge_exception_flags(env
, old_flags
);
475 void helper_fucom_ST0_FT0(CPUX86State
*env
)
477 uint8_t old_flags
= save_exception_flags(env
);
480 ret
= floatx80_compare_quiet(ST0
, FT0
, &env
->fp_status
);
481 env
->fpus
= (env
->fpus
& ~0x4500) | fcom_ccval
[ret
+ 1];
482 merge_exception_flags(env
, old_flags
);
485 static const int fcomi_ccval
[4] = {CC_C
, CC_Z
, 0, CC_Z
| CC_P
| CC_C
};
487 void helper_fcomi_ST0_FT0(CPUX86State
*env
)
489 uint8_t old_flags
= save_exception_flags(env
);
493 ret
= floatx80_compare(ST0
, FT0
, &env
->fp_status
);
494 eflags
= cpu_cc_compute_all(env
) & ~(CC_Z
| CC_P
| CC_C
);
495 CC_SRC
= eflags
| fcomi_ccval
[ret
+ 1];
496 CC_OP
= CC_OP_EFLAGS
;
497 merge_exception_flags(env
, old_flags
);
500 void helper_fucomi_ST0_FT0(CPUX86State
*env
)
502 uint8_t old_flags
= save_exception_flags(env
);
506 ret
= floatx80_compare_quiet(ST0
, FT0
, &env
->fp_status
);
507 eflags
= cpu_cc_compute_all(env
) & ~(CC_Z
| CC_P
| CC_C
);
508 CC_SRC
= eflags
| fcomi_ccval
[ret
+ 1];
509 CC_OP
= CC_OP_EFLAGS
;
510 merge_exception_flags(env
, old_flags
);
513 void helper_fadd_ST0_FT0(CPUX86State
*env
)
515 uint8_t old_flags
= save_exception_flags(env
);
516 ST0
= floatx80_add(ST0
, FT0
, &env
->fp_status
);
517 merge_exception_flags(env
, old_flags
);
520 void helper_fmul_ST0_FT0(CPUX86State
*env
)
522 uint8_t old_flags
= save_exception_flags(env
);
523 ST0
= floatx80_mul(ST0
, FT0
, &env
->fp_status
);
524 merge_exception_flags(env
, old_flags
);
527 void helper_fsub_ST0_FT0(CPUX86State
*env
)
529 uint8_t old_flags
= save_exception_flags(env
);
530 ST0
= floatx80_sub(ST0
, FT0
, &env
->fp_status
);
531 merge_exception_flags(env
, old_flags
);
534 void helper_fsubr_ST0_FT0(CPUX86State
*env
)
536 uint8_t old_flags
= save_exception_flags(env
);
537 ST0
= floatx80_sub(FT0
, ST0
, &env
->fp_status
);
538 merge_exception_flags(env
, old_flags
);
541 void helper_fdiv_ST0_FT0(CPUX86State
*env
)
543 ST0
= helper_fdiv(env
, ST0
, FT0
);
546 void helper_fdivr_ST0_FT0(CPUX86State
*env
)
548 ST0
= helper_fdiv(env
, FT0
, ST0
);
551 /* fp operations between STN and ST0 */
553 void helper_fadd_STN_ST0(CPUX86State
*env
, int st_index
)
555 uint8_t old_flags
= save_exception_flags(env
);
556 ST(st_index
) = floatx80_add(ST(st_index
), ST0
, &env
->fp_status
);
557 merge_exception_flags(env
, old_flags
);
560 void helper_fmul_STN_ST0(CPUX86State
*env
, int st_index
)
562 uint8_t old_flags
= save_exception_flags(env
);
563 ST(st_index
) = floatx80_mul(ST(st_index
), ST0
, &env
->fp_status
);
564 merge_exception_flags(env
, old_flags
);
567 void helper_fsub_STN_ST0(CPUX86State
*env
, int st_index
)
569 uint8_t old_flags
= save_exception_flags(env
);
570 ST(st_index
) = floatx80_sub(ST(st_index
), ST0
, &env
->fp_status
);
571 merge_exception_flags(env
, old_flags
);
574 void helper_fsubr_STN_ST0(CPUX86State
*env
, int st_index
)
576 uint8_t old_flags
= save_exception_flags(env
);
577 ST(st_index
) = floatx80_sub(ST0
, ST(st_index
), &env
->fp_status
);
578 merge_exception_flags(env
, old_flags
);
581 void helper_fdiv_STN_ST0(CPUX86State
*env
, int st_index
)
586 *p
= helper_fdiv(env
, *p
, ST0
);
589 void helper_fdivr_STN_ST0(CPUX86State
*env
, int st_index
)
594 *p
= helper_fdiv(env
, ST0
, *p
);
597 /* misc FPU operations */
598 void helper_fchs_ST0(CPUX86State
*env
)
600 ST0
= floatx80_chs(ST0
);
603 void helper_fabs_ST0(CPUX86State
*env
)
605 ST0
= floatx80_abs(ST0
);
608 void helper_fld1_ST0(CPUX86State
*env
)
613 void helper_fldl2t_ST0(CPUX86State
*env
)
615 switch (env
->fpuc
& FPU_RC_MASK
) {
617 ST0
= floatx80_l2t_u
;
625 void helper_fldl2e_ST0(CPUX86State
*env
)
627 switch (env
->fpuc
& FPU_RC_MASK
) {
630 ST0
= floatx80_l2e_d
;
638 void helper_fldpi_ST0(CPUX86State
*env
)
640 switch (env
->fpuc
& FPU_RC_MASK
) {
651 void helper_fldlg2_ST0(CPUX86State
*env
)
653 switch (env
->fpuc
& FPU_RC_MASK
) {
656 ST0
= floatx80_lg2_d
;
664 void helper_fldln2_ST0(CPUX86State
*env
)
666 switch (env
->fpuc
& FPU_RC_MASK
) {
669 ST0
= floatx80_ln2_d
;
677 void helper_fldz_ST0(CPUX86State
*env
)
682 void helper_fldz_FT0(CPUX86State
*env
)
687 uint32_t helper_fnstsw(CPUX86State
*env
)
689 return (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
692 uint32_t helper_fnstcw(CPUX86State
*env
)
697 static void set_x86_rounding_mode(unsigned mode
, float_status
*status
)
699 static FloatRoundMode x86_round_mode
[4] = {
700 float_round_nearest_even
,
705 assert(mode
< ARRAY_SIZE(x86_round_mode
));
706 set_float_rounding_mode(x86_round_mode
[mode
], status
);
709 void update_fp_status(CPUX86State
*env
)
712 FloatX80RoundPrec rnd_prec
;
714 /* set rounding mode */
715 rnd_mode
= (env
->fpuc
& FPU_RC_MASK
) >> FPU_RC_SHIFT
;
716 set_x86_rounding_mode(rnd_mode
, &env
->fp_status
);
718 switch ((env
->fpuc
>> 8) & 3) {
720 rnd_prec
= floatx80_precision_s
;
723 rnd_prec
= floatx80_precision_d
;
727 rnd_prec
= floatx80_precision_x
;
730 set_floatx80_rounding_precision(rnd_prec
, &env
->fp_status
);
733 void helper_fldcw(CPUX86State
*env
, uint32_t val
)
735 cpu_set_fpuc(env
, val
);
738 void helper_fclex(CPUX86State
*env
)
743 void helper_fwait(CPUX86State
*env
)
745 if (env
->fpus
& FPUS_SE
) {
746 fpu_raise_exception(env
, GETPC());
750 static void do_fninit(CPUX86State
*env
)
758 cpu_set_fpuc(env
, 0x37f);
769 void helper_fninit(CPUX86State
*env
)
776 void helper_fbld_ST0(CPUX86State
*env
, target_ulong ptr
)
784 access_prepare(&ac
, env
, ptr
, 10, MMU_DATA_LOAD
, GETPC());
787 for (i
= 8; i
>= 0; i
--) {
788 v
= access_ldb(&ac
, ptr
+ i
);
789 val
= (val
* 100) + ((v
>> 4) * 10) + (v
& 0xf);
791 tmp
= int64_to_floatx80(val
, &env
->fp_status
);
792 if (access_ldb(&ac
, ptr
+ 9) & 0x80) {
793 tmp
= floatx80_chs(tmp
);
799 void helper_fbst_ST0(CPUX86State
*env
, target_ulong ptr
)
801 uint8_t old_flags
= save_exception_flags(env
);
803 target_ulong mem_ref
, mem_end
;
808 access_prepare(&ac
, env
, ptr
, 10, MMU_DATA_STORE
, GETPC());
811 val
= floatx80_to_int64(ST0
, &env
->fp_status
);
813 if (val
>= 1000000000000000000LL || val
<= -1000000000000000000LL) {
814 set_float_exception_flags(float_flag_invalid
, &env
->fp_status
);
815 while (mem_ref
< ptr
+ 7) {
816 access_stb(&ac
, mem_ref
++, 0);
818 access_stb(&ac
, mem_ref
++, 0xc0);
819 access_stb(&ac
, mem_ref
++, 0xff);
820 access_stb(&ac
, mem_ref
++, 0xff);
821 merge_exception_flags(env
, old_flags
);
824 mem_end
= mem_ref
+ 9;
826 access_stb(&ac
, mem_end
, 0x80);
829 access_stb(&ac
, mem_end
, 0x00);
831 while (mem_ref
< mem_end
) {
837 v
= ((v
/ 10) << 4) | (v
% 10);
838 access_stb(&ac
, mem_ref
++, v
);
840 while (mem_ref
< mem_end
) {
841 access_stb(&ac
, mem_ref
++, 0);
843 merge_exception_flags(env
, old_flags
);
846 /* 128-bit significand of log(2). */
847 #define ln2_sig_high 0xb17217f7d1cf79abULL
848 #define ln2_sig_low 0xc9e3b39803f2f6afULL
851 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
852 * the interval [-1/64, 1/64].
854 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
855 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
856 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
857 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
858 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
859 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
860 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
861 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
862 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
866 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
867 * are very close to exact floatx80 values.
870 /* The value of 2^t. */
872 /* The value of 2^t - 1. */
876 static const struct f2xm1_data f2xm1_table
[65] = {
877 { make_floatx80_init(0xbfff, 0x8000000000000000ULL
),
878 make_floatx80_init(0x3ffe, 0x8000000000000000ULL
),
879 make_floatx80_init(0xbffe, 0x8000000000000000ULL
) },
880 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL
),
881 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL
),
882 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL
) },
883 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL
),
884 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL
),
885 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL
) },
886 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL
),
887 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL
),
888 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL
) },
889 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL
),
890 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL
),
891 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL
) },
892 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL
),
893 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL
),
894 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL
) },
895 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL
),
896 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL
),
897 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL
) },
898 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL
),
899 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL
),
900 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL
) },
901 { make_floatx80_init(0xbffe, 0xc000000000006530ULL
),
902 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL
),
903 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL
) },
904 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL
),
905 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL
),
906 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL
) },
907 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL
),
908 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL
),
909 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL
) },
910 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL
),
911 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL
),
912 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL
) },
913 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL
),
914 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL
),
915 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL
) },
916 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL
),
917 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL
),
918 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL
) },
919 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL
),
920 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL
),
921 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL
) },
922 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL
),
923 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL
),
924 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL
) },
925 { make_floatx80_init(0xbffe, 0x800000000000227dULL
),
926 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL
),
927 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL
) },
928 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL
),
929 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL
),
930 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL
) },
931 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL
),
932 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL
),
933 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL
) },
934 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL
),
935 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL
),
936 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL
) },
937 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL
),
938 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL
),
939 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL
) },
940 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL
),
941 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL
),
942 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL
) },
943 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL
),
944 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL
),
945 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL
) },
946 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL
),
947 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL
),
948 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL
) },
949 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL
),
950 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL
),
951 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL
) },
952 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL
),
953 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL
),
954 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL
) },
955 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL
),
956 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL
),
957 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL
) },
958 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL
),
959 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL
),
960 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL
) },
961 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL
),
962 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL
),
963 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL
) },
964 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL
),
965 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL
),
966 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL
) },
967 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL
),
968 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL
),
969 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL
) },
970 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL
),
971 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL
),
972 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL
) },
973 { floatx80_zero_init
,
974 make_floatx80_init(0x3fff, 0x8000000000000000ULL
),
975 floatx80_zero_init
},
976 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL
),
977 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL
),
978 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL
) },
979 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL
),
980 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL
),
981 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL
) },
982 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL
),
983 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL
),
984 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL
) },
985 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL
),
986 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL
),
987 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL
) },
988 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL
),
989 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL
),
990 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL
) },
991 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL
),
992 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL
),
993 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL
) },
994 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL
),
995 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL
),
996 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL
) },
997 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL
),
998 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL
),
999 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL
) },
1000 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL
),
1001 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL
),
1002 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL
) },
1003 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL
),
1004 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL
),
1005 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL
) },
1006 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL
),
1007 make_floatx80_init(0x3fff, 0xa27043030c49370aULL
),
1008 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL
) },
1009 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL
),
1010 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL
),
1011 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL
) },
1012 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL
),
1013 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL
),
1014 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL
) },
1015 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL
),
1016 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL
),
1017 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL
) },
1018 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL
),
1019 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL
),
1020 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL
) },
1021 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL
),
1022 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL
),
1023 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL
) },
1024 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL
),
1025 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL
),
1026 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL
) },
1027 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL
),
1028 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL
),
1029 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL
) },
1030 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL
),
1031 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL
),
1032 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL
) },
1033 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL
),
1034 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL
),
1035 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL
) },
1036 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL
),
1037 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL
),
1038 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL
) },
1039 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL
),
1040 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL
),
1041 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL
) },
1042 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL
),
1043 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL
),
1044 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL
) },
1045 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL
),
1046 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL
),
1047 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL
) },
1048 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL
),
1049 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL
),
1050 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL
) },
1051 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL
),
1052 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL
),
1053 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL
) },
1054 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL
),
1055 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL
),
1056 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL
) },
1057 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL
),
1058 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL
),
1059 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL
) },
1060 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL
),
1061 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL
),
1062 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL
) },
1063 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL
),
1064 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL
),
1065 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL
) },
1066 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL
),
1067 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL
),
1068 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL
) },
1069 { make_floatx80_init(0x3fff, 0x8000000000000000ULL
),
1070 make_floatx80_init(0x4000, 0x8000000000000000ULL
),
1071 make_floatx80_init(0x3fff, 0x8000000000000000ULL
) },
1074 void helper_f2xm1(CPUX86State
*env
)
1076 uint8_t old_flags
= save_exception_flags(env
);
1077 uint64_t sig
= extractFloatx80Frac(ST0
);
1078 int32_t exp
= extractFloatx80Exp(ST0
);
1079 bool sign
= extractFloatx80Sign(ST0
);
1081 if (floatx80_invalid_encoding(ST0
)) {
1082 float_raise(float_flag_invalid
, &env
->fp_status
);
1083 ST0
= floatx80_default_nan(&env
->fp_status
);
1084 } else if (floatx80_is_any_nan(ST0
)) {
1085 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1086 float_raise(float_flag_invalid
, &env
->fp_status
);
1087 ST0
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1089 } else if (exp
> 0x3fff ||
1090 (exp
== 0x3fff && sig
!= (0x8000000000000000ULL
))) {
1091 /* Out of range for the instruction, treat as invalid. */
1092 float_raise(float_flag_invalid
, &env
->fp_status
);
1093 ST0
= floatx80_default_nan(&env
->fp_status
);
1094 } else if (exp
== 0x3fff) {
1095 /* Argument 1 or -1, exact result 1 or -0.5. */
1097 ST0
= make_floatx80(0xbffe, 0x8000000000000000ULL
);
1099 } else if (exp
< 0x3fb0) {
1100 if (!floatx80_is_zero(ST0
)) {
1102 * Multiplying the argument by an extra-precision version
1103 * of log(2) is sufficiently precise. Zero arguments are
1104 * returned unchanged.
1106 uint64_t sig0
, sig1
, sig2
;
1108 normalizeFloatx80Subnormal(sig
, &exp
, &sig
);
1110 mul128By64To192(ln2_sig_high
, ln2_sig_low
, sig
, &sig0
, &sig1
,
1112 /* This result is inexact. */
1114 ST0
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
1115 sign
, exp
, sig0
, sig1
,
1119 floatx80 tmp
, y
, accum
;
1121 int32_t n
, aexp
, bexp
;
1122 uint64_t asig0
, asig1
, asig2
, bsig0
, bsig1
;
1123 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
1124 FloatX80RoundPrec save_prec
=
1125 env
->fp_status
.floatx80_rounding_precision
;
1126 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
1127 env
->fp_status
.floatx80_rounding_precision
= floatx80_precision_x
;
1129 /* Find the nearest multiple of 1/32 to the argument. */
1130 tmp
= floatx80_scalbn(ST0
, 5, &env
->fp_status
);
1131 n
= 32 + floatx80_to_int32(tmp
, &env
->fp_status
);
1132 y
= floatx80_sub(ST0
, f2xm1_table
[n
].t
, &env
->fp_status
);
1134 if (floatx80_is_zero(y
)) {
1136 * Use the value of 2^t - 1 from the table, to avoid
1137 * needing to special-case zero as a result of
1138 * multiplication below.
1140 ST0
= f2xm1_table
[n
].t
;
1141 set_float_exception_flags(float_flag_inexact
, &env
->fp_status
);
1142 env
->fp_status
.float_rounding_mode
= save_mode
;
1145 * Compute the lower parts of a polynomial expansion for
1148 accum
= floatx80_mul(f2xm1_coeff_7
, y
, &env
->fp_status
);
1149 accum
= floatx80_add(f2xm1_coeff_6
, accum
, &env
->fp_status
);
1150 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1151 accum
= floatx80_add(f2xm1_coeff_5
, accum
, &env
->fp_status
);
1152 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1153 accum
= floatx80_add(f2xm1_coeff_4
, accum
, &env
->fp_status
);
1154 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1155 accum
= floatx80_add(f2xm1_coeff_3
, accum
, &env
->fp_status
);
1156 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1157 accum
= floatx80_add(f2xm1_coeff_2
, accum
, &env
->fp_status
);
1158 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1159 accum
= floatx80_add(f2xm1_coeff_1
, accum
, &env
->fp_status
);
1160 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1161 accum
= floatx80_add(f2xm1_coeff_0_low
, accum
, &env
->fp_status
);
1164 * The full polynomial expansion is f2xm1_coeff_0 + accum
1165 * (where accum has much lower magnitude, and so, in
1166 * particular, carry out of the addition is not possible).
1167 * (This expansion is only accurate to about 70 bits, not
1170 aexp
= extractFloatx80Exp(f2xm1_coeff_0
);
1171 asign
= extractFloatx80Sign(f2xm1_coeff_0
);
1172 shift128RightJamming(extractFloatx80Frac(accum
), 0,
1173 aexp
- extractFloatx80Exp(accum
),
1175 bsig0
= extractFloatx80Frac(f2xm1_coeff_0
);
1177 if (asign
== extractFloatx80Sign(accum
)) {
1178 add128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1180 sub128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1182 /* And thus compute an approximation to 2^y - 1. */
1183 mul128By64To192(asig0
, asig1
, extractFloatx80Frac(y
),
1184 &asig0
, &asig1
, &asig2
);
1185 aexp
+= extractFloatx80Exp(y
) - 0x3ffe;
1186 asign
^= extractFloatx80Sign(y
);
1189 * Multiply this by the precomputed value of 2^t and
1190 * add that of 2^t - 1.
1192 mul128By64To192(asig0
, asig1
,
1193 extractFloatx80Frac(f2xm1_table
[n
].exp2
),
1194 &asig0
, &asig1
, &asig2
);
1195 aexp
+= extractFloatx80Exp(f2xm1_table
[n
].exp2
) - 0x3ffe;
1196 bexp
= extractFloatx80Exp(f2xm1_table
[n
].exp2m1
);
1197 bsig0
= extractFloatx80Frac(f2xm1_table
[n
].exp2m1
);
1200 shift128RightJamming(bsig0
, bsig1
, aexp
- bexp
,
1202 } else if (aexp
< bexp
) {
1203 shift128RightJamming(asig0
, asig1
, bexp
- aexp
,
1207 /* The sign of 2^t - 1 is always that of the result. */
1208 bsign
= extractFloatx80Sign(f2xm1_table
[n
].exp2m1
);
1209 if (asign
== bsign
) {
1210 /* Avoid possible carry out of the addition. */
1211 shift128RightJamming(asig0
, asig1
, 1,
1213 shift128RightJamming(bsig0
, bsig1
, 1,
1216 add128(asig0
, asig1
, bsig0
, bsig1
, &asig0
, &asig1
);
1218 sub128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1222 env
->fp_status
.float_rounding_mode
= save_mode
;
1223 /* This result is inexact. */
1225 ST0
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
1226 asign
, aexp
, asig0
, asig1
,
1230 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
1232 merge_exception_flags(env
, old_flags
);
1235 void helper_fptan(CPUX86State
*env
)
1237 double fptemp
= floatx80_to_double(env
, ST0
);
1239 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
1242 fptemp
= tan(fptemp
);
1243 ST0
= double_to_floatx80(env
, fptemp
);
1246 env
->fpus
&= ~0x400; /* C2 <-- 0 */
1247 /* the above code is for |arg| < 2**52 only */
1251 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */
1252 #define pi_4_exp 0x3ffe
1253 #define pi_4_sig_high 0xc90fdaa22168c234ULL
1254 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
1255 #define pi_2_exp 0x3fff
1256 #define pi_2_sig_high 0xc90fdaa22168c234ULL
1257 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
1258 #define pi_34_exp 0x4000
1259 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
1260 #define pi_34_sig_low 0x9394c9e8a0a5159dULL
1261 #define pi_exp 0x4000
1262 #define pi_sig_high 0xc90fdaa22168c234ULL
1263 #define pi_sig_low 0xc4c6628b80dc1cd1ULL
1266 * Polynomial coefficients for an approximation to atan(x), with only
1267 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike
1268 * for some other approximations, no low part is needed for the first
1269 * coefficient here to achieve a sufficiently accurate result, because
1270 * the coefficient in this minimax approximation is very close to
1273 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
1274 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
1275 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
1276 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
1277 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
1278 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
1279 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
1281 struct fpatan_data
{
1282 /* High and low parts of atan(x). */
1283 floatx80 atan_high
, atan_low
;
1286 static const struct fpatan_data fpatan_table
[9] = {
1287 { floatx80_zero_init
,
1288 floatx80_zero_init
},
1289 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL
),
1290 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL
) },
1291 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL
),
1292 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL
) },
1293 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL
),
1294 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL
) },
1295 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL
),
1296 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL
) },
1297 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL
),
1298 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL
) },
1299 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL
),
1300 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL
) },
1301 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL
),
1302 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL
) },
1303 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL
),
1304 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL
) },
1307 void helper_fpatan(CPUX86State
*env
)
1309 uint8_t old_flags
= save_exception_flags(env
);
1310 uint64_t arg0_sig
= extractFloatx80Frac(ST0
);
1311 int32_t arg0_exp
= extractFloatx80Exp(ST0
);
1312 bool arg0_sign
= extractFloatx80Sign(ST0
);
1313 uint64_t arg1_sig
= extractFloatx80Frac(ST1
);
1314 int32_t arg1_exp
= extractFloatx80Exp(ST1
);
1315 bool arg1_sign
= extractFloatx80Sign(ST1
);
1317 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1318 float_raise(float_flag_invalid
, &env
->fp_status
);
1319 ST1
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1320 } else if (floatx80_is_signaling_nan(ST1
, &env
->fp_status
)) {
1321 float_raise(float_flag_invalid
, &env
->fp_status
);
1322 ST1
= floatx80_silence_nan(ST1
, &env
->fp_status
);
1323 } else if (floatx80_invalid_encoding(ST0
) ||
1324 floatx80_invalid_encoding(ST1
)) {
1325 float_raise(float_flag_invalid
, &env
->fp_status
);
1326 ST1
= floatx80_default_nan(&env
->fp_status
);
1327 } else if (floatx80_is_any_nan(ST0
)) {
1329 } else if (floatx80_is_any_nan(ST1
)) {
1330 /* Pass this NaN through. */
1331 } else if (floatx80_is_zero(ST1
) && !arg0_sign
) {
1332 /* Pass this zero through. */
1333 } else if (((floatx80_is_infinity(ST0
) && !floatx80_is_infinity(ST1
)) ||
1334 arg0_exp
- arg1_exp
>= 80) &&
1337 * Dividing ST1 by ST0 gives the correct result up to
1338 * rounding, and avoids spurious underflow exceptions that
1339 * might result from passing some small values through the
1340 * polynomial approximation, but if a finite nonzero result of
1341 * division is exact, the result of fpatan is still inexact
1342 * (and underflowing where appropriate).
1344 FloatX80RoundPrec save_prec
=
1345 env
->fp_status
.floatx80_rounding_precision
;
1346 env
->fp_status
.floatx80_rounding_precision
= floatx80_precision_x
;
1347 ST1
= floatx80_div(ST1
, ST0
, &env
->fp_status
);
1348 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
1349 if (!floatx80_is_zero(ST1
) &&
1350 !(get_float_exception_flags(&env
->fp_status
) &
1351 float_flag_inexact
)) {
1353 * The mathematical result is very slightly closer to zero
1354 * than this exact result. Round a value with the
1355 * significand adjusted accordingly to get the correct
1356 * exceptions, and possibly an adjusted result depending
1357 * on the rounding mode.
1359 uint64_t sig
= extractFloatx80Frac(ST1
);
1360 int32_t exp
= extractFloatx80Exp(ST1
);
1361 bool sign
= extractFloatx80Sign(ST1
);
1363 normalizeFloatx80Subnormal(sig
, &exp
, &sig
);
1365 ST1
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
1367 -1, &env
->fp_status
);
1370 /* The result is inexact. */
1371 bool rsign
= arg1_sign
;
1373 uint64_t rsig0
, rsig1
;
1374 if (floatx80_is_zero(ST1
)) {
1376 * ST0 is negative. The result is pi with the sign of
1380 rsig0
= pi_sig_high
;
1382 } else if (floatx80_is_infinity(ST1
)) {
1383 if (floatx80_is_infinity(ST0
)) {
1386 rsig0
= pi_34_sig_high
;
1387 rsig1
= pi_34_sig_low
;
1390 rsig0
= pi_4_sig_high
;
1391 rsig1
= pi_4_sig_low
;
1395 rsig0
= pi_2_sig_high
;
1396 rsig1
= pi_2_sig_low
;
1398 } else if (floatx80_is_zero(ST0
) || arg1_exp
- arg0_exp
>= 80) {
1400 rsig0
= pi_2_sig_high
;
1401 rsig1
= pi_2_sig_low
;
1402 } else if (floatx80_is_infinity(ST0
) || arg0_exp
- arg1_exp
>= 80) {
1403 /* ST0 is negative. */
1405 rsig0
= pi_sig_high
;
1409 * ST0 and ST1 are finite, nonzero and with exponents not
1412 int32_t adj_exp
, num_exp
, den_exp
, xexp
, yexp
, n
, texp
, zexp
, aexp
;
1413 int32_t azexp
, axexp
;
1414 bool adj_sub
, ysign
, zsign
;
1415 uint64_t adj_sig0
, adj_sig1
, num_sig
, den_sig
, xsig0
, xsig1
;
1416 uint64_t msig0
, msig1
, msig2
, remsig0
, remsig1
, remsig2
;
1417 uint64_t ysig0
, ysig1
, tsig
, zsig0
, zsig1
, asig0
, asig1
;
1418 uint64_t azsig0
, azsig1
;
1419 uint64_t azsig2
, azsig3
, axsig0
, axsig1
;
1421 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
1422 FloatX80RoundPrec save_prec
=
1423 env
->fp_status
.floatx80_rounding_precision
;
1424 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
1425 env
->fp_status
.floatx80_rounding_precision
= floatx80_precision_x
;
1427 if (arg0_exp
== 0) {
1428 normalizeFloatx80Subnormal(arg0_sig
, &arg0_exp
, &arg0_sig
);
1430 if (arg1_exp
== 0) {
1431 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
1433 if (arg0_exp
> arg1_exp
||
1434 (arg0_exp
== arg1_exp
&& arg0_sig
>= arg1_sig
)) {
1435 /* Work with abs(ST1) / abs(ST0). */
1441 /* The result is subtracted from pi. */
1443 adj_sig0
= pi_sig_high
;
1444 adj_sig1
= pi_sig_low
;
1447 /* The result is used as-is. */
1454 /* Work with abs(ST0) / abs(ST1). */
1459 /* The result is added to or subtracted from pi/2. */
1461 adj_sig0
= pi_2_sig_high
;
1462 adj_sig1
= pi_2_sig_low
;
1463 adj_sub
= !arg0_sign
;
1467 * Compute x = num/den, where 0 < x <= 1 and x is not too
1470 xexp
= num_exp
- den_exp
+ 0x3ffe;
1473 if (den_sig
<= remsig0
) {
1474 shift128Right(remsig0
, remsig1
, 1, &remsig0
, &remsig1
);
1477 xsig0
= estimateDiv128To64(remsig0
, remsig1
, den_sig
);
1478 mul64To128(den_sig
, xsig0
, &msig0
, &msig1
);
1479 sub128(remsig0
, remsig1
, msig0
, msig1
, &remsig0
, &remsig1
);
1480 while ((int64_t) remsig0
< 0) {
1482 add128(remsig0
, remsig1
, 0, den_sig
, &remsig0
, &remsig1
);
1484 xsig1
= estimateDiv128To64(remsig1
, 0, den_sig
);
1486 * No need to correct any estimation error in xsig1; even
1487 * with such error, it is accurate enough.
1491 * Split x as x = t + y, where t = n/8 is the nearest
1492 * multiple of 1/8 to x.
1494 x8
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
1495 false, xexp
+ 3, xsig0
,
1496 xsig1
, &env
->fp_status
);
1497 n
= floatx80_to_int32(x8
, &env
->fp_status
);
1506 int shift
= clz32(n
) + 32;
1507 texp
= 0x403b - shift
;
1511 sub128(xsig0
, xsig1
, tsig
, 0, &ysig0
, &ysig1
);
1512 if ((int64_t) ysig0
>= 0) {
1518 shift
= clz64(ysig1
) + 64;
1519 yexp
= xexp
- shift
;
1520 shift128Left(ysig0
, ysig1
, shift
,
1524 shift
= clz64(ysig0
);
1525 yexp
= xexp
- shift
;
1526 shift128Left(ysig0
, ysig1
, shift
, &ysig0
, &ysig1
);
1530 sub128(0, 0, ysig0
, ysig1
, &ysig0
, &ysig1
);
1532 shift
= clz64(ysig1
) + 64;
1534 shift
= clz64(ysig0
);
1536 yexp
= xexp
- shift
;
1537 shift128Left(ysig0
, ysig1
, shift
, &ysig0
, &ysig1
);
1541 * t's exponent must be greater than x's because t
1542 * is positive and the nearest multiple of 1/8 to
1543 * x, and if x has a greater exponent, the power
1544 * of 2 with that exponent is also a multiple of
1547 uint64_t usig0
, usig1
;
1548 shift128RightJamming(xsig0
, xsig1
, texp
- xexp
,
1551 sub128(tsig
, 0, usig0
, usig1
, &ysig0
, &ysig1
);
1553 shift
= clz64(ysig1
) + 64;
1555 shift
= clz64(ysig0
);
1557 yexp
= texp
- shift
;
1558 shift128Left(ysig0
, ysig1
, shift
, &ysig0
, &ysig1
);
1563 * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
1567 if (texp
== 0 || yexp
== 0) {
1573 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
1575 int32_t dexp
= texp
+ xexp
- 0x3ffe;
1576 uint64_t dsig0
, dsig1
, dsig2
;
1577 mul128By64To192(xsig0
, xsig1
, tsig
, &dsig0
, &dsig1
, &dsig2
);
1579 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
1580 * bit). Add 1 to produce the denominator 1+tx.
1582 shift128RightJamming(dsig0
, dsig1
, 0x3fff - dexp
,
1584 dsig0
|= 0x8000000000000000ULL
;
1589 if (dsig0
<= remsig0
) {
1590 shift128Right(remsig0
, remsig1
, 1, &remsig0
, &remsig1
);
1593 zsig0
= estimateDiv128To64(remsig0
, remsig1
, dsig0
);
1594 mul128By64To192(dsig0
, dsig1
, zsig0
, &msig0
, &msig1
, &msig2
);
1595 sub192(remsig0
, remsig1
, remsig2
, msig0
, msig1
, msig2
,
1596 &remsig0
, &remsig1
, &remsig2
);
1597 while ((int64_t) remsig0
< 0) {
1599 add192(remsig0
, remsig1
, remsig2
, 0, dsig0
, dsig1
,
1600 &remsig0
, &remsig1
, &remsig2
);
1602 zsig1
= estimateDiv128To64(remsig1
, remsig2
, dsig0
);
1603 /* No need to correct any estimation error in zsig1. */
1612 uint64_t z2sig0
, z2sig1
, z2sig2
, z2sig3
;
1614 mul128To256(zsig0
, zsig1
, zsig0
, zsig1
,
1615 &z2sig0
, &z2sig1
, &z2sig2
, &z2sig3
);
1616 z2
= normalizeRoundAndPackFloatx80(floatx80_precision_x
, false,
1617 zexp
+ zexp
- 0x3ffe,
1621 /* Compute the lower parts of the polynomial expansion. */
1622 accum
= floatx80_mul(fpatan_coeff_6
, z2
, &env
->fp_status
);
1623 accum
= floatx80_add(fpatan_coeff_5
, accum
, &env
->fp_status
);
1624 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1625 accum
= floatx80_add(fpatan_coeff_4
, accum
, &env
->fp_status
);
1626 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1627 accum
= floatx80_add(fpatan_coeff_3
, accum
, &env
->fp_status
);
1628 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1629 accum
= floatx80_add(fpatan_coeff_2
, accum
, &env
->fp_status
);
1630 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1631 accum
= floatx80_add(fpatan_coeff_1
, accum
, &env
->fp_status
);
1632 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1635 * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
1636 * fpatan_coeff_0 is 1, and accum is negative and much smaller.
1638 aexp
= extractFloatx80Exp(fpatan_coeff_0
);
1639 shift128RightJamming(extractFloatx80Frac(accum
), 0,
1640 aexp
- extractFloatx80Exp(accum
),
1642 sub128(extractFloatx80Frac(fpatan_coeff_0
), 0, asig0
, asig1
,
1644 /* Multiply by z to compute arctan(z). */
1645 azexp
= aexp
+ zexp
- 0x3ffe;
1646 mul128To256(asig0
, asig1
, zsig0
, zsig1
, &azsig0
, &azsig1
,
1650 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */
1652 /* z is positive. */
1657 bool low_sign
= extractFloatx80Sign(fpatan_table
[n
].atan_low
);
1658 int32_t low_exp
= extractFloatx80Exp(fpatan_table
[n
].atan_low
);
1660 extractFloatx80Frac(fpatan_table
[n
].atan_low
);
1661 uint64_t low_sig1
= 0;
1662 axexp
= extractFloatx80Exp(fpatan_table
[n
].atan_high
);
1663 axsig0
= extractFloatx80Frac(fpatan_table
[n
].atan_high
);
1665 shift128RightJamming(low_sig0
, low_sig1
, axexp
- low_exp
,
1666 &low_sig0
, &low_sig1
);
1668 sub128(axsig0
, axsig1
, low_sig0
, low_sig1
,
1671 add128(axsig0
, axsig1
, low_sig0
, low_sig1
,
1674 if (azexp
>= axexp
) {
1675 shift128RightJamming(axsig0
, axsig1
, azexp
- axexp
+ 1,
1678 shift128RightJamming(azsig0
, azsig1
, 1,
1681 shift128RightJamming(axsig0
, axsig1
, 1,
1683 shift128RightJamming(azsig0
, azsig1
, axexp
- azexp
+ 1,
1688 sub128(axsig0
, axsig1
, azsig0
, azsig1
,
1691 add128(axsig0
, axsig1
, azsig0
, azsig1
,
1702 * Add or subtract arctan(x) (exponent axexp,
1703 * significand axsig0 and axsig1, positive, not
1704 * necessarily normalized) to the number given by
1705 * adj_exp, adj_sig0 and adj_sig1, according to
1708 if (adj_exp
>= axexp
) {
1709 shift128RightJamming(axsig0
, axsig1
, adj_exp
- axexp
+ 1,
1712 shift128RightJamming(adj_sig0
, adj_sig1
, 1,
1713 &adj_sig0
, &adj_sig1
);
1715 shift128RightJamming(axsig0
, axsig1
, 1,
1717 shift128RightJamming(adj_sig0
, adj_sig1
,
1718 axexp
- adj_exp
+ 1,
1719 &adj_sig0
, &adj_sig1
);
1723 sub128(adj_sig0
, adj_sig1
, axsig0
, axsig1
,
1726 add128(adj_sig0
, adj_sig1
, axsig0
, axsig1
,
1731 env
->fp_status
.float_rounding_mode
= save_mode
;
1732 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
1734 /* This result is inexact. */
1736 ST1
= normalizeRoundAndPackFloatx80(floatx80_precision_x
, rsign
, rexp
,
1737 rsig0
, rsig1
, &env
->fp_status
);
1741 merge_exception_flags(env
, old_flags
);
1744 void helper_fxtract(CPUX86State
*env
)
1746 uint8_t old_flags
= save_exception_flags(env
);
1751 if (floatx80_is_zero(ST0
)) {
1752 /* Easy way to generate -inf and raising division by 0 exception */
1753 ST0
= floatx80_div(floatx80_chs(floatx80_one
), floatx80_zero
,
1757 } else if (floatx80_invalid_encoding(ST0
)) {
1758 float_raise(float_flag_invalid
, &env
->fp_status
);
1759 ST0
= floatx80_default_nan(&env
->fp_status
);
1762 } else if (floatx80_is_any_nan(ST0
)) {
1763 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1764 float_raise(float_flag_invalid
, &env
->fp_status
);
1765 ST0
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1769 } else if (floatx80_is_infinity(ST0
)) {
1772 ST1
= floatx80_infinity
;
1776 if (EXPD(temp
) == 0) {
1777 int shift
= clz64(temp
.l
.lower
);
1778 temp
.l
.lower
<<= shift
;
1779 expdif
= 1 - EXPBIAS
- shift
;
1780 float_raise(float_flag_input_denormal
, &env
->fp_status
);
1782 expdif
= EXPD(temp
) - EXPBIAS
;
1784 /* DP exponent bias */
1785 ST0
= int32_to_floatx80(expdif
, &env
->fp_status
);
1790 merge_exception_flags(env
, old_flags
);
1793 static void helper_fprem_common(CPUX86State
*env
, bool mod
)
1795 uint8_t old_flags
= save_exception_flags(env
);
1797 CPU_LDoubleU temp0
, temp1
;
1798 int exp0
, exp1
, expdiff
;
1805 env
->fpus
&= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1806 if (floatx80_is_zero(ST0
) || floatx80_is_zero(ST1
) ||
1807 exp0
== 0x7fff || exp1
== 0x7fff ||
1808 floatx80_invalid_encoding(ST0
) || floatx80_invalid_encoding(ST1
)) {
1809 ST0
= floatx80_modrem(ST0
, ST1
, mod
, "ient
, &env
->fp_status
);
1812 exp0
= 1 - clz64(temp0
.l
.lower
);
1815 exp1
= 1 - clz64(temp1
.l
.lower
);
1817 expdiff
= exp0
- exp1
;
1819 ST0
= floatx80_modrem(ST0
, ST1
, mod
, "ient
, &env
->fp_status
);
1820 env
->fpus
|= (quotient
& 0x4) << (8 - 2); /* (C0) <-- q2 */
1821 env
->fpus
|= (quotient
& 0x2) << (14 - 1); /* (C3) <-- q1 */
1822 env
->fpus
|= (quotient
& 0x1) << (9 - 0); /* (C1) <-- q0 */
1825 * Partial remainder. This choice of how many bits to
1826 * process at once is specified in AMD instruction set
1827 * manuals, and empirically is followed by Intel
1828 * processors as well; it ensures that the final remainder
1829 * operation in a loop does produce the correct low three
1830 * bits of the quotient. AMD manuals specify that the
1831 * flags other than C2 are cleared, and empirically Intel
1832 * processors clear them as well.
1834 int n
= 32 + (expdiff
% 32);
1835 temp1
.d
= floatx80_scalbn(temp1
.d
, expdiff
- n
, &env
->fp_status
);
1836 ST0
= floatx80_mod(ST0
, temp1
.d
, &env
->fp_status
);
1837 env
->fpus
|= 0x400; /* C2 <-- 1 */
1840 merge_exception_flags(env
, old_flags
);
1843 void helper_fprem1(CPUX86State
*env
)
1845 helper_fprem_common(env
, false);
1848 void helper_fprem(CPUX86State
*env
)
1850 helper_fprem_common(env
, true);
1853 /* 128-bit significand of log2(e). */
1854 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1855 #define log2_e_sig_low 0xbe87fed0691d3e89ULL
1858 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1859 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1860 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1861 * interval [sqrt(2)/2, sqrt(2)].
1863 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1864 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1865 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1866 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1867 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1868 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1869 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1870 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1871 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1872 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1873 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1876 * Compute an approximation of log2(1+arg), where 1+arg is in the
1877 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this
1878 * function is called, rounding precision is set to 80 and the
1879 * round-to-nearest mode is in effect. arg must not be exactly zero,
1880 * and must not be so close to zero that underflow might occur.
1882 static void helper_fyl2x_common(CPUX86State
*env
, floatx80 arg
, int32_t *exp
,
1883 uint64_t *sig0
, uint64_t *sig1
)
1885 uint64_t arg0_sig
= extractFloatx80Frac(arg
);
1886 int32_t arg0_exp
= extractFloatx80Exp(arg
);
1887 bool arg0_sign
= extractFloatx80Sign(arg
);
1889 int32_t dexp
, texp
, aexp
;
1890 uint64_t dsig0
, dsig1
, tsig0
, tsig1
, rsig0
, rsig1
, rsig2
;
1891 uint64_t msig0
, msig1
, msig2
, t2sig0
, t2sig1
, t2sig2
, t2sig3
;
1892 uint64_t asig0
, asig1
, asig2
, asig3
, bsig0
, bsig1
;
1896 * Compute an approximation of arg/(2+arg), with extra precision,
1897 * as the argument to a polynomial approximation. The extra
1898 * precision is only needed for the first term of the
1899 * approximation, with subsequent terms being significantly
1900 * smaller; the approximation only uses odd exponents, and the
1901 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1905 shift128RightJamming(arg0_sig
, 0, dexp
- arg0_exp
, &dsig0
, &dsig1
);
1906 sub128(0, 0, dsig0
, dsig1
, &dsig0
, &dsig1
);
1909 shift128RightJamming(arg0_sig
, 0, dexp
- arg0_exp
, &dsig0
, &dsig1
);
1910 dsig0
|= 0x8000000000000000ULL
;
1912 texp
= arg0_exp
- dexp
+ 0x3ffe;
1916 if (dsig0
<= rsig0
) {
1917 shift128Right(rsig0
, rsig1
, 1, &rsig0
, &rsig1
);
1920 tsig0
= estimateDiv128To64(rsig0
, rsig1
, dsig0
);
1921 mul128By64To192(dsig0
, dsig1
, tsig0
, &msig0
, &msig1
, &msig2
);
1922 sub192(rsig0
, rsig1
, rsig2
, msig0
, msig1
, msig2
,
1923 &rsig0
, &rsig1
, &rsig2
);
1924 while ((int64_t) rsig0
< 0) {
1926 add192(rsig0
, rsig1
, rsig2
, 0, dsig0
, dsig1
,
1927 &rsig0
, &rsig1
, &rsig2
);
1929 tsig1
= estimateDiv128To64(rsig1
, rsig2
, dsig0
);
1931 * No need to correct any estimation error in tsig1; even with
1932 * such error, it is accurate enough. Now compute the square of
1933 * that approximation.
1935 mul128To256(tsig0
, tsig1
, tsig0
, tsig1
,
1936 &t2sig0
, &t2sig1
, &t2sig2
, &t2sig3
);
1937 t2
= normalizeRoundAndPackFloatx80(floatx80_precision_x
, false,
1938 texp
+ texp
- 0x3ffe,
1939 t2sig0
, t2sig1
, &env
->fp_status
);
1941 /* Compute the lower parts of the polynomial expansion. */
1942 accum
= floatx80_mul(fyl2x_coeff_9
, t2
, &env
->fp_status
);
1943 accum
= floatx80_add(fyl2x_coeff_8
, accum
, &env
->fp_status
);
1944 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1945 accum
= floatx80_add(fyl2x_coeff_7
, accum
, &env
->fp_status
);
1946 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1947 accum
= floatx80_add(fyl2x_coeff_6
, accum
, &env
->fp_status
);
1948 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1949 accum
= floatx80_add(fyl2x_coeff_5
, accum
, &env
->fp_status
);
1950 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1951 accum
= floatx80_add(fyl2x_coeff_4
, accum
, &env
->fp_status
);
1952 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1953 accum
= floatx80_add(fyl2x_coeff_3
, accum
, &env
->fp_status
);
1954 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1955 accum
= floatx80_add(fyl2x_coeff_2
, accum
, &env
->fp_status
);
1956 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1957 accum
= floatx80_add(fyl2x_coeff_1
, accum
, &env
->fp_status
);
1958 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1959 accum
= floatx80_add(fyl2x_coeff_0_low
, accum
, &env
->fp_status
);
1962 * The full polynomial expansion is fyl2x_coeff_0 + accum (where
1963 * accum has much lower magnitude, and so, in particular, carry
1964 * out of the addition is not possible), multiplied by t. (This
1965 * expansion is only accurate to about 70 bits, not 128 bits.)
1967 aexp
= extractFloatx80Exp(fyl2x_coeff_0
);
1968 asign
= extractFloatx80Sign(fyl2x_coeff_0
);
1969 shift128RightJamming(extractFloatx80Frac(accum
), 0,
1970 aexp
- extractFloatx80Exp(accum
),
1972 bsig0
= extractFloatx80Frac(fyl2x_coeff_0
);
1974 if (asign
== extractFloatx80Sign(accum
)) {
1975 add128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1977 sub128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1979 /* Multiply by t to compute the required result. */
1980 mul128To256(asig0
, asig1
, tsig0
, tsig1
,
1981 &asig0
, &asig1
, &asig2
, &asig3
);
1982 aexp
+= texp
- 0x3ffe;
1988 void helper_fyl2xp1(CPUX86State
*env
)
1990 uint8_t old_flags
= save_exception_flags(env
);
1991 uint64_t arg0_sig
= extractFloatx80Frac(ST0
);
1992 int32_t arg0_exp
= extractFloatx80Exp(ST0
);
1993 bool arg0_sign
= extractFloatx80Sign(ST0
);
1994 uint64_t arg1_sig
= extractFloatx80Frac(ST1
);
1995 int32_t arg1_exp
= extractFloatx80Exp(ST1
);
1996 bool arg1_sign
= extractFloatx80Sign(ST1
);
1998 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1999 float_raise(float_flag_invalid
, &env
->fp_status
);
2000 ST1
= floatx80_silence_nan(ST0
, &env
->fp_status
);
2001 } else if (floatx80_is_signaling_nan(ST1
, &env
->fp_status
)) {
2002 float_raise(float_flag_invalid
, &env
->fp_status
);
2003 ST1
= floatx80_silence_nan(ST1
, &env
->fp_status
);
2004 } else if (floatx80_invalid_encoding(ST0
) ||
2005 floatx80_invalid_encoding(ST1
)) {
2006 float_raise(float_flag_invalid
, &env
->fp_status
);
2007 ST1
= floatx80_default_nan(&env
->fp_status
);
2008 } else if (floatx80_is_any_nan(ST0
)) {
2010 } else if (floatx80_is_any_nan(ST1
)) {
2011 /* Pass this NaN through. */
2012 } else if (arg0_exp
> 0x3ffd ||
2013 (arg0_exp
== 0x3ffd && arg0_sig
> (arg0_sign
?
2014 0x95f619980c4336f7ULL
:
2015 0xd413cccfe7799211ULL
))) {
2017 * Out of range for the instruction (ST0 must have absolute
2018 * value less than 1 - sqrt(2)/2 = 0.292..., according to
2019 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
2020 * to sqrt(2) - 1, which we allow here), treat as invalid.
2022 float_raise(float_flag_invalid
, &env
->fp_status
);
2023 ST1
= floatx80_default_nan(&env
->fp_status
);
2024 } else if (floatx80_is_zero(ST0
) || floatx80_is_zero(ST1
) ||
2025 arg1_exp
== 0x7fff) {
2027 * One argument is zero, or multiplying by infinity; correct
2028 * result is exact and can be obtained by multiplying the
2031 ST1
= floatx80_mul(ST0
, ST1
, &env
->fp_status
);
2032 } else if (arg0_exp
< 0x3fb0) {
2034 * Multiplying both arguments and an extra-precision version
2035 * of log2(e) is sufficiently precise.
2037 uint64_t sig0
, sig1
, sig2
;
2039 if (arg0_exp
== 0) {
2040 normalizeFloatx80Subnormal(arg0_sig
, &arg0_exp
, &arg0_sig
);
2042 if (arg1_exp
== 0) {
2043 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2045 mul128By64To192(log2_e_sig_high
, log2_e_sig_low
, arg0_sig
,
2046 &sig0
, &sig1
, &sig2
);
2048 mul128By64To192(sig0
, sig1
, arg1_sig
, &sig0
, &sig1
, &sig2
);
2049 exp
+= arg1_exp
- 0x3ffe;
2050 /* This result is inexact. */
2052 ST1
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
2053 arg0_sign
^ arg1_sign
, exp
,
2054 sig0
, sig1
, &env
->fp_status
);
2057 uint64_t asig0
, asig1
, asig2
;
2058 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
2059 FloatX80RoundPrec save_prec
=
2060 env
->fp_status
.floatx80_rounding_precision
;
2061 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
2062 env
->fp_status
.floatx80_rounding_precision
= floatx80_precision_x
;
2064 helper_fyl2x_common(env
, ST0
, &aexp
, &asig0
, &asig1
);
2066 * Multiply by the second argument to compute the required
2069 if (arg1_exp
== 0) {
2070 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2072 mul128By64To192(asig0
, asig1
, arg1_sig
, &asig0
, &asig1
, &asig2
);
2073 aexp
+= arg1_exp
- 0x3ffe;
2074 /* This result is inexact. */
2076 env
->fp_status
.float_rounding_mode
= save_mode
;
2077 ST1
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
2078 arg0_sign
^ arg1_sign
, aexp
,
2079 asig0
, asig1
, &env
->fp_status
);
2080 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
2083 merge_exception_flags(env
, old_flags
);
2086 void helper_fyl2x(CPUX86State
*env
)
2088 uint8_t old_flags
= save_exception_flags(env
);
2089 uint64_t arg0_sig
= extractFloatx80Frac(ST0
);
2090 int32_t arg0_exp
= extractFloatx80Exp(ST0
);
2091 bool arg0_sign
= extractFloatx80Sign(ST0
);
2092 uint64_t arg1_sig
= extractFloatx80Frac(ST1
);
2093 int32_t arg1_exp
= extractFloatx80Exp(ST1
);
2094 bool arg1_sign
= extractFloatx80Sign(ST1
);
2096 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
2097 float_raise(float_flag_invalid
, &env
->fp_status
);
2098 ST1
= floatx80_silence_nan(ST0
, &env
->fp_status
);
2099 } else if (floatx80_is_signaling_nan(ST1
, &env
->fp_status
)) {
2100 float_raise(float_flag_invalid
, &env
->fp_status
);
2101 ST1
= floatx80_silence_nan(ST1
, &env
->fp_status
);
2102 } else if (floatx80_invalid_encoding(ST0
) ||
2103 floatx80_invalid_encoding(ST1
)) {
2104 float_raise(float_flag_invalid
, &env
->fp_status
);
2105 ST1
= floatx80_default_nan(&env
->fp_status
);
2106 } else if (floatx80_is_any_nan(ST0
)) {
2108 } else if (floatx80_is_any_nan(ST1
)) {
2109 /* Pass this NaN through. */
2110 } else if (arg0_sign
&& !floatx80_is_zero(ST0
)) {
2111 float_raise(float_flag_invalid
, &env
->fp_status
);
2112 ST1
= floatx80_default_nan(&env
->fp_status
);
2113 } else if (floatx80_is_infinity(ST1
)) {
2114 FloatRelation cmp
= floatx80_compare(ST0
, floatx80_one
,
2117 case float_relation_less
:
2118 ST1
= floatx80_chs(ST1
);
2120 case float_relation_greater
:
2121 /* Result is infinity of the same sign as ST1. */
2124 float_raise(float_flag_invalid
, &env
->fp_status
);
2125 ST1
= floatx80_default_nan(&env
->fp_status
);
2128 } else if (floatx80_is_infinity(ST0
)) {
2129 if (floatx80_is_zero(ST1
)) {
2130 float_raise(float_flag_invalid
, &env
->fp_status
);
2131 ST1
= floatx80_default_nan(&env
->fp_status
);
2132 } else if (arg1_sign
) {
2133 ST1
= floatx80_chs(ST0
);
2137 } else if (floatx80_is_zero(ST0
)) {
2138 if (floatx80_is_zero(ST1
)) {
2139 float_raise(float_flag_invalid
, &env
->fp_status
);
2140 ST1
= floatx80_default_nan(&env
->fp_status
);
2142 /* Result is infinity with opposite sign to ST1. */
2143 float_raise(float_flag_divbyzero
, &env
->fp_status
);
2144 ST1
= make_floatx80(arg1_sign
? 0x7fff : 0xffff,
2145 0x8000000000000000ULL
);
2147 } else if (floatx80_is_zero(ST1
)) {
2148 if (floatx80_lt(ST0
, floatx80_one
, &env
->fp_status
)) {
2149 ST1
= floatx80_chs(ST1
);
2151 /* Otherwise, ST1 is already the correct result. */
2152 } else if (floatx80_eq(ST0
, floatx80_one
, &env
->fp_status
)) {
2154 ST1
= floatx80_chs(floatx80_zero
);
2156 ST1
= floatx80_zero
;
2161 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
2162 FloatX80RoundPrec save_prec
=
2163 env
->fp_status
.floatx80_rounding_precision
;
2164 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
2165 env
->fp_status
.floatx80_rounding_precision
= floatx80_precision_x
;
2167 if (arg0_exp
== 0) {
2168 normalizeFloatx80Subnormal(arg0_sig
, &arg0_exp
, &arg0_sig
);
2170 if (arg1_exp
== 0) {
2171 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2173 int_exp
= arg0_exp
- 0x3fff;
2174 if (arg0_sig
> 0xb504f333f9de6484ULL
) {
2177 arg0_m1
= floatx80_sub(floatx80_scalbn(ST0
, -int_exp
,
2179 floatx80_one
, &env
->fp_status
);
2180 if (floatx80_is_zero(arg0_m1
)) {
2181 /* Exact power of 2; multiply by ST1. */
2182 env
->fp_status
.float_rounding_mode
= save_mode
;
2183 ST1
= floatx80_mul(int32_to_floatx80(int_exp
, &env
->fp_status
),
2184 ST1
, &env
->fp_status
);
2186 bool asign
= extractFloatx80Sign(arg0_m1
);
2188 uint64_t asig0
, asig1
, asig2
;
2189 helper_fyl2x_common(env
, arg0_m1
, &aexp
, &asig0
, &asig1
);
2191 bool isign
= (int_exp
< 0);
2195 int_exp
= isign
? -int_exp
: int_exp
;
2196 shift
= clz32(int_exp
) + 32;
2199 iexp
= 0x403e - shift
;
2200 shift128RightJamming(asig0
, asig1
, iexp
- aexp
,
2202 if (asign
== isign
) {
2203 add128(isig
, 0, asig0
, asig1
, &asig0
, &asig1
);
2205 sub128(isig
, 0, asig0
, asig1
, &asig0
, &asig1
);
2211 * Multiply by the second argument to compute the required
2214 if (arg1_exp
== 0) {
2215 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2217 mul128By64To192(asig0
, asig1
, arg1_sig
, &asig0
, &asig1
, &asig2
);
2218 aexp
+= arg1_exp
- 0x3ffe;
2219 /* This result is inexact. */
2221 env
->fp_status
.float_rounding_mode
= save_mode
;
2222 ST1
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
2223 asign
^ arg1_sign
, aexp
,
2224 asig0
, asig1
, &env
->fp_status
);
2227 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
2230 merge_exception_flags(env
, old_flags
);
2233 void helper_fsqrt(CPUX86State
*env
)
2235 uint8_t old_flags
= save_exception_flags(env
);
2236 if (floatx80_is_neg(ST0
)) {
2237 env
->fpus
&= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2240 ST0
= floatx80_sqrt(ST0
, &env
->fp_status
);
2241 merge_exception_flags(env
, old_flags
);
2244 void helper_fsincos(CPUX86State
*env
)
2246 double fptemp
= floatx80_to_double(env
, ST0
);
2248 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
2251 ST0
= double_to_floatx80(env
, sin(fptemp
));
2253 ST0
= double_to_floatx80(env
, cos(fptemp
));
2254 env
->fpus
&= ~0x400; /* C2 <-- 0 */
2255 /* the above code is for |arg| < 2**63 only */
2259 void helper_frndint(CPUX86State
*env
)
2261 uint8_t old_flags
= save_exception_flags(env
);
2262 ST0
= floatx80_round_to_int(ST0
, &env
->fp_status
);
2263 merge_exception_flags(env
, old_flags
);
2266 void helper_fscale(CPUX86State
*env
)
2268 uint8_t old_flags
= save_exception_flags(env
);
2269 if (floatx80_invalid_encoding(ST1
) || floatx80_invalid_encoding(ST0
)) {
2270 float_raise(float_flag_invalid
, &env
->fp_status
);
2271 ST0
= floatx80_default_nan(&env
->fp_status
);
2272 } else if (floatx80_is_any_nan(ST1
)) {
2273 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
2274 float_raise(float_flag_invalid
, &env
->fp_status
);
2277 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
2278 float_raise(float_flag_invalid
, &env
->fp_status
);
2279 ST0
= floatx80_silence_nan(ST0
, &env
->fp_status
);
2281 } else if (floatx80_is_infinity(ST1
) &&
2282 !floatx80_invalid_encoding(ST0
) &&
2283 !floatx80_is_any_nan(ST0
)) {
2284 if (floatx80_is_neg(ST1
)) {
2285 if (floatx80_is_infinity(ST0
)) {
2286 float_raise(float_flag_invalid
, &env
->fp_status
);
2287 ST0
= floatx80_default_nan(&env
->fp_status
);
2289 ST0
= (floatx80_is_neg(ST0
) ?
2290 floatx80_chs(floatx80_zero
) :
2294 if (floatx80_is_zero(ST0
)) {
2295 float_raise(float_flag_invalid
, &env
->fp_status
);
2296 ST0
= floatx80_default_nan(&env
->fp_status
);
2298 ST0
= (floatx80_is_neg(ST0
) ?
2299 floatx80_chs(floatx80_infinity
) :
2305 FloatX80RoundPrec save
= env
->fp_status
.floatx80_rounding_precision
;
2306 uint8_t save_flags
= get_float_exception_flags(&env
->fp_status
);
2307 set_float_exception_flags(0, &env
->fp_status
);
2308 n
= floatx80_to_int32_round_to_zero(ST1
, &env
->fp_status
);
2309 set_float_exception_flags(save_flags
, &env
->fp_status
);
2310 env
->fp_status
.floatx80_rounding_precision
= floatx80_precision_x
;
2311 ST0
= floatx80_scalbn(ST0
, n
, &env
->fp_status
);
2312 env
->fp_status
.floatx80_rounding_precision
= save
;
2314 merge_exception_flags(env
, old_flags
);
2317 void helper_fsin(CPUX86State
*env
)
2319 double fptemp
= floatx80_to_double(env
, ST0
);
2321 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
2324 ST0
= double_to_floatx80(env
, sin(fptemp
));
2325 env
->fpus
&= ~0x400; /* C2 <-- 0 */
2326 /* the above code is for |arg| < 2**53 only */
2330 void helper_fcos(CPUX86State
*env
)
2332 double fptemp
= floatx80_to_double(env
, ST0
);
2334 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
2337 ST0
= double_to_floatx80(env
, cos(fptemp
));
2338 env
->fpus
&= ~0x400; /* C2 <-- 0 */
2339 /* the above code is for |arg| < 2**63 only */
2343 void helper_fxam_ST0(CPUX86State
*env
)
2350 env
->fpus
&= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2352 env
->fpus
|= 0x200; /* C1 <-- 1 */
2355 if (env
->fptags
[env
->fpstt
]) {
2356 env
->fpus
|= 0x4100; /* Empty */
2360 expdif
= EXPD(temp
);
2361 if (expdif
== MAXEXPD
) {
2362 if (MANTD(temp
) == 0x8000000000000000ULL
) {
2363 env
->fpus
|= 0x500; /* Infinity */
2364 } else if (MANTD(temp
) & 0x8000000000000000ULL
) {
2365 env
->fpus
|= 0x100; /* NaN */
2367 } else if (expdif
== 0) {
2368 if (MANTD(temp
) == 0) {
2369 env
->fpus
|= 0x4000; /* Zero */
2371 env
->fpus
|= 0x4400; /* Denormal */
2373 } else if (MANTD(temp
) & 0x8000000000000000ULL
) {
2378 static void do_fstenv(X86Access
*ac
, target_ulong ptr
, int data32
)
2380 CPUX86State
*env
= ac
->env
;
2381 int fpus
, fptag
, exp
, i
;
2385 fpus
= (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
2387 for (i
= 7; i
>= 0; i
--) {
2389 if (env
->fptags
[i
]) {
2392 tmp
.d
= env
->fpregs
[i
].d
;
2395 if (exp
== 0 && mant
== 0) {
2398 } else if (exp
== 0 || exp
== MAXEXPD
2399 || (mant
& (1LL << 63)) == 0) {
2400 /* NaNs, infinity, denormal */
2407 access_stl(ac
, ptr
, env
->fpuc
);
2408 access_stl(ac
, ptr
+ 4, fpus
);
2409 access_stl(ac
, ptr
+ 8, fptag
);
2410 access_stl(ac
, ptr
+ 12, env
->fpip
); /* fpip */
2411 access_stl(ac
, ptr
+ 16, env
->fpcs
); /* fpcs */
2412 access_stl(ac
, ptr
+ 20, env
->fpdp
); /* fpoo */
2413 access_stl(ac
, ptr
+ 24, env
->fpds
); /* fpos */
2416 access_stw(ac
, ptr
, env
->fpuc
);
2417 access_stw(ac
, ptr
+ 2, fpus
);
2418 access_stw(ac
, ptr
+ 4, fptag
);
2419 access_stw(ac
, ptr
+ 6, env
->fpip
);
2420 access_stw(ac
, ptr
+ 8, env
->fpcs
);
2421 access_stw(ac
, ptr
+ 10, env
->fpdp
);
2422 access_stw(ac
, ptr
+ 12, env
->fpds
);
2426 void helper_fstenv(CPUX86State
*env
, target_ulong ptr
, int data32
)
2430 access_prepare(&ac
, env
, ptr
, 14 << data32
, MMU_DATA_STORE
, GETPC());
2431 do_fstenv(&ac
, ptr
, data32
);
2434 static void cpu_set_fpus(CPUX86State
*env
, uint16_t fpus
)
2436 env
->fpstt
= (fpus
>> 11) & 7;
2437 env
->fpus
= fpus
& ~0x3800 & ~FPUS_B
;
2438 env
->fpus
|= env
->fpus
& FPUS_SE
? FPUS_B
: 0;
2439 #if !defined(CONFIG_USER_ONLY)
2440 if (!(env
->fpus
& FPUS_SE
)) {
2442 * Here the processor deasserts FERR#; in response, the chipset deasserts
2450 static void do_fldenv(X86Access
*ac
, target_ulong ptr
, int data32
)
2453 CPUX86State
*env
= ac
->env
;
2455 cpu_set_fpuc(env
, access_ldw(ac
, ptr
));
2456 fpus
= access_ldw(ac
, ptr
+ (2 << data32
));
2457 fptag
= access_ldw(ac
, ptr
+ (4 << data32
));
2459 cpu_set_fpus(env
, fpus
);
2460 for (i
= 0; i
< 8; i
++) {
2461 env
->fptags
[i
] = ((fptag
& 3) == 3);
2466 void helper_fldenv(CPUX86State
*env
, target_ulong ptr
, int data32
)
2470 access_prepare(&ac
, env
, ptr
, 14 << data32
, MMU_DATA_STORE
, GETPC());
2471 do_fldenv(&ac
, ptr
, data32
);
2474 static void do_fsave(X86Access
*ac
, target_ulong ptr
, int data32
)
2476 CPUX86State
*env
= ac
->env
;
2478 do_fstenv(ac
, ptr
, data32
);
2479 ptr
+= 14 << data32
;
2481 for (int i
= 0; i
< 8; i
++) {
2482 floatx80 tmp
= ST(i
);
2483 do_fstt(ac
, ptr
, tmp
);
2490 void helper_fsave(CPUX86State
*env
, target_ulong ptr
, int data32
)
2492 int size
= (14 << data32
) + 80;
2495 access_prepare(&ac
, env
, ptr
, size
, MMU_DATA_STORE
, GETPC());
2496 do_fsave(&ac
, ptr
, data32
);
2499 static void do_frstor(X86Access
*ac
, target_ulong ptr
, int data32
)
2501 CPUX86State
*env
= ac
->env
;
2503 do_fldenv(ac
, ptr
, data32
);
2504 ptr
+= 14 << data32
;
2506 for (int i
= 0; i
< 8; i
++) {
2507 floatx80 tmp
= do_fldt(ac
, ptr
);
2513 void helper_frstor(CPUX86State
*env
, target_ulong ptr
, int data32
)
2515 int size
= (14 << data32
) + 80;
2518 access_prepare(&ac
, env
, ptr
, size
, MMU_DATA_LOAD
, GETPC());
2519 do_frstor(&ac
, ptr
, data32
);
2522 #define XO(X) offsetof(X86XSaveArea, X)
2524 static void do_xsave_fpu(X86Access
*ac
, target_ulong ptr
)
2526 CPUX86State
*env
= ac
->env
;
2530 fpus
= (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
2532 for (i
= 0; i
< 8; i
++) {
2533 fptag
|= (env
->fptags
[i
] << i
);
2536 access_stw(ac
, ptr
+ XO(legacy
.fcw
), env
->fpuc
);
2537 access_stw(ac
, ptr
+ XO(legacy
.fsw
), fpus
);
2538 access_stw(ac
, ptr
+ XO(legacy
.ftw
), fptag
^ 0xff);
2540 /* In 32-bit mode this is eip, sel, dp, sel.
2541 In 64-bit mode this is rip, rdp.
2542 But in either case we don't write actual data, just zeros. */
2543 access_stq(ac
, ptr
+ XO(legacy
.fpip
), 0); /* eip+sel; rip */
2544 access_stq(ac
, ptr
+ XO(legacy
.fpdp
), 0); /* edp+sel; rdp */
2546 addr
= ptr
+ XO(legacy
.fpregs
);
2548 for (i
= 0; i
< 8; i
++) {
2549 floatx80 tmp
= ST(i
);
2550 do_fstt(ac
, addr
, tmp
);
2555 static void do_xsave_mxcsr(X86Access
*ac
, target_ulong ptr
)
2557 CPUX86State
*env
= ac
->env
;
2559 update_mxcsr_from_sse_status(env
);
2560 access_stl(ac
, ptr
+ XO(legacy
.mxcsr
), env
->mxcsr
);
2561 access_stl(ac
, ptr
+ XO(legacy
.mxcsr_mask
), 0x0000ffff);
2564 static void do_xsave_sse(X86Access
*ac
, target_ulong ptr
)
2566 CPUX86State
*env
= ac
->env
;
2570 if (env
->hflags
& HF_CS64_MASK
) {
2576 addr
= ptr
+ XO(legacy
.xmm_regs
);
2577 for (i
= 0; i
< nb_xmm_regs
; i
++) {
2578 access_stq(ac
, addr
, env
->xmm_regs
[i
].ZMM_Q(0));
2579 access_stq(ac
, addr
+ 8, env
->xmm_regs
[i
].ZMM_Q(1));
2584 static void do_xsave_ymmh(X86Access
*ac
, target_ulong ptr
)
2586 CPUX86State
*env
= ac
->env
;
2589 if (env
->hflags
& HF_CS64_MASK
) {
2595 for (i
= 0; i
< nb_xmm_regs
; i
++, ptr
+= 16) {
2596 access_stq(ac
, ptr
, env
->xmm_regs
[i
].ZMM_Q(2));
2597 access_stq(ac
, ptr
+ 8, env
->xmm_regs
[i
].ZMM_Q(3));
2601 static void do_xsave_bndregs(X86Access
*ac
, target_ulong ptr
)
2603 CPUX86State
*env
= ac
->env
;
2604 target_ulong addr
= ptr
+ offsetof(XSaveBNDREG
, bnd_regs
);
2607 for (i
= 0; i
< 4; i
++, addr
+= 16) {
2608 access_stq(ac
, addr
, env
->bnd_regs
[i
].lb
);
2609 access_stq(ac
, addr
+ 8, env
->bnd_regs
[i
].ub
);
2613 static void do_xsave_bndcsr(X86Access
*ac
, target_ulong ptr
)
2615 CPUX86State
*env
= ac
->env
;
2617 access_stq(ac
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.cfgu
),
2618 env
->bndcs_regs
.cfgu
);
2619 access_stq(ac
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.sts
),
2620 env
->bndcs_regs
.sts
);
2623 static void do_xsave_pkru(X86Access
*ac
, target_ulong ptr
)
2625 access_stq(ac
, ptr
, ac
->env
->pkru
);
2628 static void do_fxsave(X86Access
*ac
, target_ulong ptr
)
2630 CPUX86State
*env
= ac
->env
;
2632 do_xsave_fpu(ac
, ptr
);
2633 if (env
->cr
[4] & CR4_OSFXSR_MASK
) {
2634 do_xsave_mxcsr(ac
, ptr
);
2635 /* Fast FXSAVE leaves out the XMM registers */
2636 if (!(env
->efer
& MSR_EFER_FFXSR
)
2637 || (env
->hflags
& HF_CPL_MASK
)
2638 || !(env
->hflags
& HF_LMA_MASK
)) {
2639 do_xsave_sse(ac
, ptr
);
2644 void helper_fxsave(CPUX86State
*env
, target_ulong ptr
)
2646 uintptr_t ra
= GETPC();
2649 /* The operand must be 16 byte aligned */
2651 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2654 access_prepare(&ac
, env
, ptr
, sizeof(X86LegacyXSaveArea
),
2655 MMU_DATA_STORE
, ra
);
2656 do_fxsave(&ac
, ptr
);
2659 static uint64_t get_xinuse(CPUX86State
*env
)
2661 uint64_t inuse
= -1;
2663 /* For the most part, we don't track XINUSE. We could calculate it
2664 here for all components, but it's probably less work to simply
2665 indicate in use. That said, the state of BNDREGS is important
2666 enough to track in HFLAGS, so we might as well use that here. */
2667 if ((env
->hflags
& HF_MPX_IU_MASK
) == 0) {
2668 inuse
&= ~XSTATE_BNDREGS_MASK
;
2673 static void do_xsave_access(X86Access
*ac
, target_ulong ptr
, uint64_t rfbm
,
2674 uint64_t inuse
, uint64_t opt
)
2676 uint64_t old_bv
, new_bv
;
2678 if (opt
& XSTATE_FP_MASK
) {
2679 do_xsave_fpu(ac
, ptr
);
2681 if (rfbm
& XSTATE_SSE_MASK
) {
2682 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
2683 do_xsave_mxcsr(ac
, ptr
);
2685 if (opt
& XSTATE_SSE_MASK
) {
2686 do_xsave_sse(ac
, ptr
);
2688 if (opt
& XSTATE_YMM_MASK
) {
2689 do_xsave_ymmh(ac
, ptr
+ XO(avx_state
));
2691 if (opt
& XSTATE_BNDREGS_MASK
) {
2692 do_xsave_bndregs(ac
, ptr
+ XO(bndreg_state
));
2694 if (opt
& XSTATE_BNDCSR_MASK
) {
2695 do_xsave_bndcsr(ac
, ptr
+ XO(bndcsr_state
));
2697 if (opt
& XSTATE_PKRU_MASK
) {
2698 do_xsave_pkru(ac
, ptr
+ XO(pkru_state
));
2701 /* Update the XSTATE_BV field. */
2702 old_bv
= access_ldq(ac
, ptr
+ XO(header
.xstate_bv
));
2703 new_bv
= (old_bv
& ~rfbm
) | (inuse
& rfbm
);
2704 access_stq(ac
, ptr
+ XO(header
.xstate_bv
), new_bv
);
2707 static void do_xsave_chk(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2709 /* The OS must have enabled XSAVE. */
2710 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
2711 raise_exception_ra(env
, EXCP06_ILLOP
, ra
);
2714 /* The operand must be 64 byte aligned. */
2716 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2720 static void do_xsave(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
,
2721 uint64_t inuse
, uint64_t opt
, uintptr_t ra
)
2726 do_xsave_chk(env
, ptr
, ra
);
2728 /* Never save anything not enabled by XCR0. */
2731 size
= xsave_area_size(opt
, false);
2733 access_prepare(&ac
, env
, ptr
, size
, MMU_DATA_STORE
, ra
);
2734 do_xsave_access(&ac
, ptr
, rfbm
, inuse
, opt
);
2737 void helper_xsave(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
)
2739 do_xsave(env
, ptr
, rfbm
, get_xinuse(env
), rfbm
, GETPC());
2742 void helper_xsaveopt(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
)
2744 uint64_t inuse
= get_xinuse(env
);
2745 do_xsave(env
, ptr
, rfbm
, inuse
, inuse
, GETPC());
2748 static void do_xrstor_fpu(X86Access
*ac
, target_ulong ptr
)
2750 CPUX86State
*env
= ac
->env
;
2751 int i
, fpuc
, fpus
, fptag
;
2754 fpuc
= access_ldw(ac
, ptr
+ XO(legacy
.fcw
));
2755 fpus
= access_ldw(ac
, ptr
+ XO(legacy
.fsw
));
2756 fptag
= access_ldw(ac
, ptr
+ XO(legacy
.ftw
));
2757 cpu_set_fpuc(env
, fpuc
);
2758 cpu_set_fpus(env
, fpus
);
2761 for (i
= 0; i
< 8; i
++) {
2762 env
->fptags
[i
] = ((fptag
>> i
) & 1);
2765 addr
= ptr
+ XO(legacy
.fpregs
);
2767 for (i
= 0; i
< 8; i
++) {
2768 floatx80 tmp
= do_fldt(ac
, addr
);
2774 static void do_xrstor_mxcsr(X86Access
*ac
, target_ulong ptr
)
2776 CPUX86State
*env
= ac
->env
;
2777 cpu_set_mxcsr(env
, access_ldl(ac
, ptr
+ XO(legacy
.mxcsr
)));
2780 static void do_xrstor_sse(X86Access
*ac
, target_ulong ptr
)
2782 CPUX86State
*env
= ac
->env
;
2786 if (env
->hflags
& HF_CS64_MASK
) {
2792 addr
= ptr
+ XO(legacy
.xmm_regs
);
2793 for (i
= 0; i
< nb_xmm_regs
; i
++) {
2794 env
->xmm_regs
[i
].ZMM_Q(0) = access_ldq(ac
, addr
);
2795 env
->xmm_regs
[i
].ZMM_Q(1) = access_ldq(ac
, addr
+ 8);
2800 static void do_clear_sse(CPUX86State
*env
)
2804 if (env
->hflags
& HF_CS64_MASK
) {
2810 for (i
= 0; i
< nb_xmm_regs
; i
++) {
2811 env
->xmm_regs
[i
].ZMM_Q(0) = 0;
2812 env
->xmm_regs
[i
].ZMM_Q(1) = 0;
2816 static void do_xrstor_ymmh(X86Access
*ac
, target_ulong ptr
)
2818 CPUX86State
*env
= ac
->env
;
2821 if (env
->hflags
& HF_CS64_MASK
) {
2827 for (i
= 0; i
< nb_xmm_regs
; i
++, ptr
+= 16) {
2828 env
->xmm_regs
[i
].ZMM_Q(2) = access_ldq(ac
, ptr
);
2829 env
->xmm_regs
[i
].ZMM_Q(3) = access_ldq(ac
, ptr
+ 8);
2833 static void do_clear_ymmh(CPUX86State
*env
)
2837 if (env
->hflags
& HF_CS64_MASK
) {
2843 for (i
= 0; i
< nb_xmm_regs
; i
++) {
2844 env
->xmm_regs
[i
].ZMM_Q(2) = 0;
2845 env
->xmm_regs
[i
].ZMM_Q(3) = 0;
2849 static void do_xrstor_bndregs(X86Access
*ac
, target_ulong ptr
)
2851 CPUX86State
*env
= ac
->env
;
2852 target_ulong addr
= ptr
+ offsetof(XSaveBNDREG
, bnd_regs
);
2855 for (i
= 0; i
< 4; i
++, addr
+= 16) {
2856 env
->bnd_regs
[i
].lb
= access_ldq(ac
, addr
);
2857 env
->bnd_regs
[i
].ub
= access_ldq(ac
, addr
+ 8);
2861 static void do_xrstor_bndcsr(X86Access
*ac
, target_ulong ptr
)
2863 CPUX86State
*env
= ac
->env
;
2865 /* FIXME: Extend highest implemented bit of linear address. */
2866 env
->bndcs_regs
.cfgu
2867 = access_ldq(ac
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.cfgu
));
2869 = access_ldq(ac
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.sts
));
2872 static void do_xrstor_pkru(X86Access
*ac
, target_ulong ptr
)
2874 ac
->env
->pkru
= access_ldq(ac
, ptr
);
2877 static void do_fxrstor(X86Access
*ac
, target_ulong ptr
)
2879 CPUX86State
*env
= ac
->env
;
2881 do_xrstor_fpu(ac
, ptr
);
2882 if (env
->cr
[4] & CR4_OSFXSR_MASK
) {
2883 do_xrstor_mxcsr(ac
, ptr
);
2884 /* Fast FXRSTOR leaves out the XMM registers */
2885 if (!(env
->efer
& MSR_EFER_FFXSR
)
2886 || (env
->hflags
& HF_CPL_MASK
)
2887 || !(env
->hflags
& HF_LMA_MASK
)) {
2888 do_xrstor_sse(ac
, ptr
);
2893 void helper_fxrstor(CPUX86State
*env
, target_ulong ptr
)
2895 uintptr_t ra
= GETPC();
2898 /* The operand must be 16 byte aligned */
2900 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2903 access_prepare(&ac
, env
, ptr
, sizeof(X86LegacyXSaveArea
),
2905 do_fxrstor(&ac
, ptr
);
2908 static bool valid_xrstor_header(X86Access
*ac
, uint64_t *pxsbv
,
2911 uint64_t xstate_bv
, xcomp_bv
, reserve0
;
2913 xstate_bv
= access_ldq(ac
, ptr
+ XO(header
.xstate_bv
));
2914 xcomp_bv
= access_ldq(ac
, ptr
+ XO(header
.xcomp_bv
));
2915 reserve0
= access_ldq(ac
, ptr
+ XO(header
.reserve0
));
2919 * XCOMP_BV bit 63 indicates compact form, which we do not support,
2920 * and thus must raise #GP. That leaves us in standard form.
2921 * In standard form, bytes 23:8 must be zero -- which is both
2922 * XCOMP_BV and the following 64-bit field.
2924 if (xcomp_bv
|| reserve0
) {
2928 /* The XSTATE_BV field must not set bits not present in XCR0. */
2929 return (xstate_bv
& ~ac
->env
->xcr0
) == 0;
2932 static void do_xrstor(X86Access
*ac
, target_ulong ptr
,
2933 uint64_t rfbm
, uint64_t xstate_bv
)
2935 CPUX86State
*env
= ac
->env
;
2937 if (rfbm
& XSTATE_FP_MASK
) {
2938 if (xstate_bv
& XSTATE_FP_MASK
) {
2939 do_xrstor_fpu(ac
, ptr
);
2942 memset(env
->fpregs
, 0, sizeof(env
->fpregs
));
2945 if (rfbm
& XSTATE_SSE_MASK
) {
2946 /* Note that the standard form of XRSTOR loads MXCSR from memory
2947 whether or not the XSTATE_BV bit is set. */
2948 do_xrstor_mxcsr(ac
, ptr
);
2949 if (xstate_bv
& XSTATE_SSE_MASK
) {
2950 do_xrstor_sse(ac
, ptr
);
2955 if (rfbm
& XSTATE_YMM_MASK
) {
2956 if (xstate_bv
& XSTATE_YMM_MASK
) {
2957 do_xrstor_ymmh(ac
, ptr
+ XO(avx_state
));
2962 if (rfbm
& XSTATE_BNDREGS_MASK
) {
2963 if (xstate_bv
& XSTATE_BNDREGS_MASK
) {
2964 do_xrstor_bndregs(ac
, ptr
+ XO(bndreg_state
));
2965 env
->hflags
|= HF_MPX_IU_MASK
;
2967 memset(env
->bnd_regs
, 0, sizeof(env
->bnd_regs
));
2968 env
->hflags
&= ~HF_MPX_IU_MASK
;
2971 if (rfbm
& XSTATE_BNDCSR_MASK
) {
2972 if (xstate_bv
& XSTATE_BNDCSR_MASK
) {
2973 do_xrstor_bndcsr(ac
, ptr
+ XO(bndcsr_state
));
2975 memset(&env
->bndcs_regs
, 0, sizeof(env
->bndcs_regs
));
2977 cpu_sync_bndcs_hflags(env
);
2979 if (rfbm
& XSTATE_PKRU_MASK
) {
2980 uint64_t old_pkru
= env
->pkru
;
2981 if (xstate_bv
& XSTATE_PKRU_MASK
) {
2982 do_xrstor_pkru(ac
, ptr
+ XO(pkru_state
));
2986 if (env
->pkru
!= old_pkru
) {
2987 CPUState
*cs
= env_cpu(env
);
2995 void helper_xrstor(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
)
2997 uintptr_t ra
= GETPC();
3000 unsigned size
, size_ext
;
3002 do_xsave_chk(env
, ptr
, ra
);
3004 /* Begin with just the minimum size to validate the header. */
3005 size
= sizeof(X86LegacyXSaveArea
) + sizeof(X86XSaveHeader
);
3006 access_prepare(&ac
, env
, ptr
, size
, MMU_DATA_LOAD
, ra
);
3007 if (!valid_xrstor_header(&ac
, &xstate_bv
, ptr
)) {
3008 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
3012 size_ext
= xsave_area_size(rfbm
& xstate_bv
, false);
3013 if (size
< size_ext
) {
3014 /* TODO: See if existing page probe has covered extra size. */
3015 access_prepare(&ac
, env
, ptr
, size_ext
, MMU_DATA_LOAD
, ra
);
3018 do_xrstor(&ac
, ptr
, rfbm
, xstate_bv
);
3021 #if defined(CONFIG_USER_ONLY)
3022 void cpu_x86_fsave(CPUX86State
*env
, void *host
, size_t len
)
3026 .size
= 4 * 7 + 8 * 10,
3030 assert(ac
.size
<= len
);
3031 do_fsave(&ac
, 0, true);
3034 void cpu_x86_frstor(CPUX86State
*env
, void *host
, size_t len
)
3038 .size
= 4 * 7 + 8 * 10,
3042 assert(ac
.size
<= len
);
3043 do_frstor(&ac
, 0, true);
3046 void cpu_x86_fxsave(CPUX86State
*env
, void *host
, size_t len
)
3050 .size
= sizeof(X86LegacyXSaveArea
),
3054 assert(ac
.size
<= len
);
3058 void cpu_x86_fxrstor(CPUX86State
*env
, void *host
, size_t len
)
3062 .size
= sizeof(X86LegacyXSaveArea
),
3066 assert(ac
.size
<= len
);
3070 void cpu_x86_xsave(CPUX86State
*env
, void *host
, size_t len
, uint64_t rfbm
)
3078 * Since this is only called from user-level signal handling,
3079 * we should have done the job correctly there.
3081 assert((rfbm
& ~env
->xcr0
) == 0);
3082 ac
.size
= xsave_area_size(rfbm
, false);
3083 assert(ac
.size
<= len
);
3084 do_xsave_access(&ac
, 0, rfbm
, get_xinuse(env
), rfbm
);
3087 bool cpu_x86_xrstor(CPUX86State
*env
, void *host
, size_t len
, uint64_t rfbm
)
3096 * Since this is only called from user-level signal handling,
3097 * we should have done the job correctly there.
3099 assert((rfbm
& ~env
->xcr0
) == 0);
3100 ac
.size
= xsave_area_size(rfbm
, false);
3101 assert(ac
.size
<= len
);
3103 if (!valid_xrstor_header(&ac
, &xstate_bv
, 0)) {
3106 do_xrstor(&ac
, 0, rfbm
, xstate_bv
);
3111 uint64_t helper_xgetbv(CPUX86State
*env
, uint32_t ecx
)
3113 /* The OS must have enabled XSAVE. */
3114 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
3115 raise_exception_ra(env
, EXCP06_ILLOP
, GETPC());
3122 if (env
->features
[FEAT_XSAVE
] & CPUID_XSAVE_XGETBV1
) {
3123 return env
->xcr0
& get_xinuse(env
);
3127 raise_exception_ra(env
, EXCP0D_GPF
, GETPC());
3130 void helper_xsetbv(CPUX86State
*env
, uint32_t ecx
, uint64_t mask
)
3132 uint32_t dummy
, ena_lo
, ena_hi
;
3135 /* The OS must have enabled XSAVE. */
3136 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
3137 raise_exception_ra(env
, EXCP06_ILLOP
, GETPC());
3140 /* Only XCR0 is defined at present; the FPU may not be disabled. */
3141 if (ecx
!= 0 || (mask
& XSTATE_FP_MASK
) == 0) {
3145 /* SSE can be disabled, but only if AVX is disabled too. */
3146 if ((mask
& (XSTATE_SSE_MASK
| XSTATE_YMM_MASK
)) == XSTATE_YMM_MASK
) {
3150 /* Disallow enabling unimplemented features. */
3151 cpu_x86_cpuid(env
, 0x0d, 0, &ena_lo
, &dummy
, &dummy
, &ena_hi
);
3152 ena
= ((uint64_t)ena_hi
<< 32) | ena_lo
;
3157 /* Disallow enabling only half of MPX. */
3158 if ((mask
^ (mask
* (XSTATE_BNDCSR_MASK
/ XSTATE_BNDREGS_MASK
)))
3159 & XSTATE_BNDCSR_MASK
) {
3164 cpu_sync_bndcs_hflags(env
);
3165 cpu_sync_avx_hflag(env
);
3169 raise_exception_ra(env
, EXCP0D_GPF
, GETPC());
3173 /* XXX: optimize by storing fptt and fptags in the static cpu state */
3175 #define SSE_DAZ 0x0040
3176 #define SSE_RC_SHIFT 13
3177 #define SSE_RC_MASK (3 << SSE_RC_SHIFT)
3178 #define SSE_FZ 0x8000
3180 void update_mxcsr_status(CPUX86State
*env
)
3182 uint32_t mxcsr
= env
->mxcsr
;
3185 /* set rounding mode */
3186 rnd_type
= (mxcsr
& SSE_RC_MASK
) >> SSE_RC_SHIFT
;
3187 set_x86_rounding_mode(rnd_type
, &env
->sse_status
);
3189 /* Set exception flags. */
3190 set_float_exception_flags((mxcsr
& FPUS_IE
? float_flag_invalid
: 0) |
3191 (mxcsr
& FPUS_ZE
? float_flag_divbyzero
: 0) |
3192 (mxcsr
& FPUS_OE
? float_flag_overflow
: 0) |
3193 (mxcsr
& FPUS_UE
? float_flag_underflow
: 0) |
3194 (mxcsr
& FPUS_PE
? float_flag_inexact
: 0),
3197 /* set denormals are zero */
3198 set_flush_inputs_to_zero((mxcsr
& SSE_DAZ
) ? 1 : 0, &env
->sse_status
);
3200 /* set flush to zero */
3201 set_flush_to_zero((mxcsr
& SSE_FZ
) ? 1 : 0, &env
->sse_status
);
3204 void update_mxcsr_from_sse_status(CPUX86State
*env
)
3206 uint8_t flags
= get_float_exception_flags(&env
->sse_status
);
3208 * The MXCSR denormal flag has opposite semantics to
3209 * float_flag_input_denormal (the softfloat code sets that flag
3210 * only when flushing input denormals to zero, but SSE sets it
3211 * only when not flushing them to zero), so is not converted
3214 env
->mxcsr
|= ((flags
& float_flag_invalid
? FPUS_IE
: 0) |
3215 (flags
& float_flag_divbyzero
? FPUS_ZE
: 0) |
3216 (flags
& float_flag_overflow
? FPUS_OE
: 0) |
3217 (flags
& float_flag_underflow
? FPUS_UE
: 0) |
3218 (flags
& float_flag_inexact
? FPUS_PE
: 0) |
3219 (flags
& float_flag_output_denormal
? FPUS_UE
| FPUS_PE
:
3223 void helper_update_mxcsr(CPUX86State
*env
)
3225 update_mxcsr_from_sse_status(env
);
3228 void helper_ldmxcsr(CPUX86State
*env
, uint32_t val
)
3230 cpu_set_mxcsr(env
, val
);
3233 void helper_enter_mmx(CPUX86State
*env
)
3236 *(uint32_t *)(env
->fptags
) = 0;
3237 *(uint32_t *)(env
->fptags
+ 4) = 0;
3240 void helper_emms(CPUX86State
*env
)
3242 /* set to empty state */
3243 *(uint32_t *)(env
->fptags
) = 0x01010101;
3244 *(uint32_t *)(env
->fptags
+ 4) = 0x01010101;
3248 #include "ops_sse.h"
3251 #include "ops_sse.h"
3254 #include "ops_sse.h"