libqtest: Inline g_assert_no_errno()
[qemu/armbru.git] / target / i386 / fpu_helper.c
blobea5a0c4861b3bd8c0764678109646b9d96127e0d
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27 #include "fpu/softfloat.h"
29 #define FPU_RC_MASK 0xc00
30 #define FPU_RC_NEAR 0x000
31 #define FPU_RC_DOWN 0x400
32 #define FPU_RC_UP 0x800
33 #define FPU_RC_CHOP 0xc00
35 #define MAXTAN 9223372036854775808.0
37 /* the following deal with x86 long double-precision numbers */
38 #define MAXEXPD 0x7fff
39 #define EXPBIAS 16383
40 #define EXPD(fp) (fp.l.upper & 0x7fff)
41 #define SIGND(fp) ((fp.l.upper) & 0x8000)
42 #define MANTD(fp) (fp.l.lower)
43 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
45 #define FPUS_IE (1 << 0)
46 #define FPUS_DE (1 << 1)
47 #define FPUS_ZE (1 << 2)
48 #define FPUS_OE (1 << 3)
49 #define FPUS_UE (1 << 4)
50 #define FPUS_PE (1 << 5)
51 #define FPUS_SF (1 << 6)
52 #define FPUS_SE (1 << 7)
53 #define FPUS_B (1 << 15)
55 #define FPUC_EM 0x3f
57 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
58 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
59 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
61 static inline void fpush(CPUX86State *env)
63 env->fpstt = (env->fpstt - 1) & 7;
64 env->fptags[env->fpstt] = 0; /* validate stack entry */
67 static inline void fpop(CPUX86State *env)
69 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
70 env->fpstt = (env->fpstt + 1) & 7;
73 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
74 uintptr_t retaddr)
76 CPU_LDoubleU temp;
78 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
79 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
80 return temp.d;
83 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
84 uintptr_t retaddr)
86 CPU_LDoubleU temp;
88 temp.d = f;
89 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
90 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
93 /* x87 FPU helpers */
95 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
97 union {
98 float64 f64;
99 double d;
100 } u;
102 u.f64 = floatx80_to_float64(a, &env->fp_status);
103 return u.d;
106 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
108 union {
109 float64 f64;
110 double d;
111 } u;
113 u.d = a;
114 return float64_to_floatx80(u.f64, &env->fp_status);
117 static void fpu_set_exception(CPUX86State *env, int mask)
119 env->fpus |= mask;
120 if (env->fpus & (~env->fpuc & FPUC_EM)) {
121 env->fpus |= FPUS_SE | FPUS_B;
125 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
127 if (floatx80_is_zero(b)) {
128 fpu_set_exception(env, FPUS_ZE);
130 return floatx80_div(a, b, &env->fp_status);
133 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
135 if (env->cr[0] & CR0_NE_MASK) {
136 raise_exception_ra(env, EXCP10_COPR, retaddr);
138 #if !defined(CONFIG_USER_ONLY)
139 else {
140 cpu_set_ferr(env);
142 #endif
145 void helper_flds_FT0(CPUX86State *env, uint32_t val)
147 union {
148 float32 f;
149 uint32_t i;
150 } u;
152 u.i = val;
153 FT0 = float32_to_floatx80(u.f, &env->fp_status);
156 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
158 union {
159 float64 f;
160 uint64_t i;
161 } u;
163 u.i = val;
164 FT0 = float64_to_floatx80(u.f, &env->fp_status);
167 void helper_fildl_FT0(CPUX86State *env, int32_t val)
169 FT0 = int32_to_floatx80(val, &env->fp_status);
172 void helper_flds_ST0(CPUX86State *env, uint32_t val)
174 int new_fpstt;
175 union {
176 float32 f;
177 uint32_t i;
178 } u;
180 new_fpstt = (env->fpstt - 1) & 7;
181 u.i = val;
182 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
183 env->fpstt = new_fpstt;
184 env->fptags[new_fpstt] = 0; /* validate stack entry */
187 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
189 int new_fpstt;
190 union {
191 float64 f;
192 uint64_t i;
193 } u;
195 new_fpstt = (env->fpstt - 1) & 7;
196 u.i = val;
197 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
198 env->fpstt = new_fpstt;
199 env->fptags[new_fpstt] = 0; /* validate stack entry */
202 void helper_fildl_ST0(CPUX86State *env, int32_t val)
204 int new_fpstt;
206 new_fpstt = (env->fpstt - 1) & 7;
207 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
208 env->fpstt = new_fpstt;
209 env->fptags[new_fpstt] = 0; /* validate stack entry */
212 void helper_fildll_ST0(CPUX86State *env, int64_t val)
214 int new_fpstt;
216 new_fpstt = (env->fpstt - 1) & 7;
217 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
218 env->fpstt = new_fpstt;
219 env->fptags[new_fpstt] = 0; /* validate stack entry */
222 uint32_t helper_fsts_ST0(CPUX86State *env)
224 union {
225 float32 f;
226 uint32_t i;
227 } u;
229 u.f = floatx80_to_float32(ST0, &env->fp_status);
230 return u.i;
233 uint64_t helper_fstl_ST0(CPUX86State *env)
235 union {
236 float64 f;
237 uint64_t i;
238 } u;
240 u.f = floatx80_to_float64(ST0, &env->fp_status);
241 return u.i;
244 int32_t helper_fist_ST0(CPUX86State *env)
246 int32_t val;
248 val = floatx80_to_int32(ST0, &env->fp_status);
249 if (val != (int16_t)val) {
250 val = -32768;
252 return val;
255 int32_t helper_fistl_ST0(CPUX86State *env)
257 int32_t val;
258 signed char old_exp_flags;
260 old_exp_flags = get_float_exception_flags(&env->fp_status);
261 set_float_exception_flags(0, &env->fp_status);
263 val = floatx80_to_int32(ST0, &env->fp_status);
264 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
265 val = 0x80000000;
267 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
268 | old_exp_flags, &env->fp_status);
269 return val;
272 int64_t helper_fistll_ST0(CPUX86State *env)
274 int64_t val;
275 signed char old_exp_flags;
277 old_exp_flags = get_float_exception_flags(&env->fp_status);
278 set_float_exception_flags(0, &env->fp_status);
280 val = floatx80_to_int64(ST0, &env->fp_status);
281 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
282 val = 0x8000000000000000ULL;
284 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
285 | old_exp_flags, &env->fp_status);
286 return val;
289 int32_t helper_fistt_ST0(CPUX86State *env)
291 int32_t val;
293 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
294 if (val != (int16_t)val) {
295 val = -32768;
297 return val;
300 int32_t helper_fisttl_ST0(CPUX86State *env)
302 return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
305 int64_t helper_fisttll_ST0(CPUX86State *env)
307 return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
310 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
312 int new_fpstt;
314 new_fpstt = (env->fpstt - 1) & 7;
315 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
316 env->fpstt = new_fpstt;
317 env->fptags[new_fpstt] = 0; /* validate stack entry */
320 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
322 helper_fstt(env, ST0, ptr, GETPC());
325 void helper_fpush(CPUX86State *env)
327 fpush(env);
330 void helper_fpop(CPUX86State *env)
332 fpop(env);
335 void helper_fdecstp(CPUX86State *env)
337 env->fpstt = (env->fpstt - 1) & 7;
338 env->fpus &= ~0x4700;
341 void helper_fincstp(CPUX86State *env)
343 env->fpstt = (env->fpstt + 1) & 7;
344 env->fpus &= ~0x4700;
347 /* FPU move */
349 void helper_ffree_STN(CPUX86State *env, int st_index)
351 env->fptags[(env->fpstt + st_index) & 7] = 1;
354 void helper_fmov_ST0_FT0(CPUX86State *env)
356 ST0 = FT0;
359 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
361 FT0 = ST(st_index);
364 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
366 ST0 = ST(st_index);
369 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
371 ST(st_index) = ST0;
374 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
376 floatx80 tmp;
378 tmp = ST(st_index);
379 ST(st_index) = ST0;
380 ST0 = tmp;
383 /* FPU operations */
385 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
387 void helper_fcom_ST0_FT0(CPUX86State *env)
389 int ret;
391 ret = floatx80_compare(ST0, FT0, &env->fp_status);
392 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
395 void helper_fucom_ST0_FT0(CPUX86State *env)
397 int ret;
399 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
400 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
403 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
405 void helper_fcomi_ST0_FT0(CPUX86State *env)
407 int eflags;
408 int ret;
410 ret = floatx80_compare(ST0, FT0, &env->fp_status);
411 eflags = cpu_cc_compute_all(env, CC_OP);
412 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
413 CC_SRC = eflags;
416 void helper_fucomi_ST0_FT0(CPUX86State *env)
418 int eflags;
419 int ret;
421 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
422 eflags = cpu_cc_compute_all(env, CC_OP);
423 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
424 CC_SRC = eflags;
427 void helper_fadd_ST0_FT0(CPUX86State *env)
429 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
432 void helper_fmul_ST0_FT0(CPUX86State *env)
434 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
437 void helper_fsub_ST0_FT0(CPUX86State *env)
439 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
442 void helper_fsubr_ST0_FT0(CPUX86State *env)
444 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
447 void helper_fdiv_ST0_FT0(CPUX86State *env)
449 ST0 = helper_fdiv(env, ST0, FT0);
452 void helper_fdivr_ST0_FT0(CPUX86State *env)
454 ST0 = helper_fdiv(env, FT0, ST0);
457 /* fp operations between STN and ST0 */
459 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
461 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
464 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
466 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
469 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
471 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
474 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
476 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
479 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
481 floatx80 *p;
483 p = &ST(st_index);
484 *p = helper_fdiv(env, *p, ST0);
487 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
489 floatx80 *p;
491 p = &ST(st_index);
492 *p = helper_fdiv(env, ST0, *p);
495 /* misc FPU operations */
496 void helper_fchs_ST0(CPUX86State *env)
498 ST0 = floatx80_chs(ST0);
501 void helper_fabs_ST0(CPUX86State *env)
503 ST0 = floatx80_abs(ST0);
506 void helper_fld1_ST0(CPUX86State *env)
508 ST0 = floatx80_one;
511 void helper_fldl2t_ST0(CPUX86State *env)
513 ST0 = floatx80_l2t;
516 void helper_fldl2e_ST0(CPUX86State *env)
518 ST0 = floatx80_l2e;
521 void helper_fldpi_ST0(CPUX86State *env)
523 ST0 = floatx80_pi;
526 void helper_fldlg2_ST0(CPUX86State *env)
528 ST0 = floatx80_lg2;
531 void helper_fldln2_ST0(CPUX86State *env)
533 ST0 = floatx80_ln2;
536 void helper_fldz_ST0(CPUX86State *env)
538 ST0 = floatx80_zero;
541 void helper_fldz_FT0(CPUX86State *env)
543 FT0 = floatx80_zero;
546 uint32_t helper_fnstsw(CPUX86State *env)
548 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
551 uint32_t helper_fnstcw(CPUX86State *env)
553 return env->fpuc;
556 void update_fp_status(CPUX86State *env)
558 int rnd_type;
560 /* set rounding mode */
561 switch (env->fpuc & FPU_RC_MASK) {
562 default:
563 case FPU_RC_NEAR:
564 rnd_type = float_round_nearest_even;
565 break;
566 case FPU_RC_DOWN:
567 rnd_type = float_round_down;
568 break;
569 case FPU_RC_UP:
570 rnd_type = float_round_up;
571 break;
572 case FPU_RC_CHOP:
573 rnd_type = float_round_to_zero;
574 break;
576 set_float_rounding_mode(rnd_type, &env->fp_status);
577 switch ((env->fpuc >> 8) & 3) {
578 case 0:
579 rnd_type = 32;
580 break;
581 case 2:
582 rnd_type = 64;
583 break;
584 case 3:
585 default:
586 rnd_type = 80;
587 break;
589 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
592 void helper_fldcw(CPUX86State *env, uint32_t val)
594 cpu_set_fpuc(env, val);
597 void helper_fclex(CPUX86State *env)
599 env->fpus &= 0x7f00;
602 void helper_fwait(CPUX86State *env)
604 if (env->fpus & FPUS_SE) {
605 fpu_raise_exception(env, GETPC());
609 void helper_fninit(CPUX86State *env)
611 env->fpus = 0;
612 env->fpstt = 0;
613 cpu_set_fpuc(env, 0x37f);
614 env->fptags[0] = 1;
615 env->fptags[1] = 1;
616 env->fptags[2] = 1;
617 env->fptags[3] = 1;
618 env->fptags[4] = 1;
619 env->fptags[5] = 1;
620 env->fptags[6] = 1;
621 env->fptags[7] = 1;
624 /* BCD ops */
626 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
628 floatx80 tmp;
629 uint64_t val;
630 unsigned int v;
631 int i;
633 val = 0;
634 for (i = 8; i >= 0; i--) {
635 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
636 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
638 tmp = int64_to_floatx80(val, &env->fp_status);
639 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
640 tmp = floatx80_chs(tmp);
642 fpush(env);
643 ST0 = tmp;
646 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
648 int v;
649 target_ulong mem_ref, mem_end;
650 int64_t val;
652 val = floatx80_to_int64(ST0, &env->fp_status);
653 mem_ref = ptr;
654 mem_end = mem_ref + 9;
655 if (val < 0) {
656 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
657 val = -val;
658 } else {
659 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
661 while (mem_ref < mem_end) {
662 if (val == 0) {
663 break;
665 v = val % 100;
666 val = val / 100;
667 v = ((v / 10) << 4) | (v % 10);
668 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
670 while (mem_ref < mem_end) {
671 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
675 void helper_f2xm1(CPUX86State *env)
677 double val = floatx80_to_double(env, ST0);
679 val = pow(2.0, val) - 1.0;
680 ST0 = double_to_floatx80(env, val);
683 void helper_fyl2x(CPUX86State *env)
685 double fptemp = floatx80_to_double(env, ST0);
687 if (fptemp > 0.0) {
688 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
689 fptemp *= floatx80_to_double(env, ST1);
690 ST1 = double_to_floatx80(env, fptemp);
691 fpop(env);
692 } else {
693 env->fpus &= ~0x4700;
694 env->fpus |= 0x400;
698 void helper_fptan(CPUX86State *env)
700 double fptemp = floatx80_to_double(env, ST0);
702 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
703 env->fpus |= 0x400;
704 } else {
705 fptemp = tan(fptemp);
706 ST0 = double_to_floatx80(env, fptemp);
707 fpush(env);
708 ST0 = floatx80_one;
709 env->fpus &= ~0x400; /* C2 <-- 0 */
710 /* the above code is for |arg| < 2**52 only */
714 void helper_fpatan(CPUX86State *env)
716 double fptemp, fpsrcop;
718 fpsrcop = floatx80_to_double(env, ST1);
719 fptemp = floatx80_to_double(env, ST0);
720 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
721 fpop(env);
724 void helper_fxtract(CPUX86State *env)
726 CPU_LDoubleU temp;
728 temp.d = ST0;
730 if (floatx80_is_zero(ST0)) {
731 /* Easy way to generate -inf and raising division by 0 exception */
732 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
733 &env->fp_status);
734 fpush(env);
735 ST0 = temp.d;
736 } else {
737 int expdif;
739 expdif = EXPD(temp) - EXPBIAS;
740 /* DP exponent bias */
741 ST0 = int32_to_floatx80(expdif, &env->fp_status);
742 fpush(env);
743 BIASEXPONENT(temp);
744 ST0 = temp.d;
748 void helper_fprem1(CPUX86State *env)
750 double st0, st1, dblq, fpsrcop, fptemp;
751 CPU_LDoubleU fpsrcop1, fptemp1;
752 int expdif;
753 signed long long int q;
755 st0 = floatx80_to_double(env, ST0);
756 st1 = floatx80_to_double(env, ST1);
758 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
759 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
760 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
761 return;
764 fpsrcop = st0;
765 fptemp = st1;
766 fpsrcop1.d = ST0;
767 fptemp1.d = ST1;
768 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
770 if (expdif < 0) {
771 /* optimisation? taken from the AMD docs */
772 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
773 /* ST0 is unchanged */
774 return;
777 if (expdif < 53) {
778 dblq = fpsrcop / fptemp;
779 /* round dblq towards nearest integer */
780 dblq = rint(dblq);
781 st0 = fpsrcop - fptemp * dblq;
783 /* convert dblq to q by truncating towards zero */
784 if (dblq < 0.0) {
785 q = (signed long long int)(-dblq);
786 } else {
787 q = (signed long long int)dblq;
790 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
791 /* (C0,C3,C1) <-- (q2,q1,q0) */
792 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
793 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
794 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
795 } else {
796 env->fpus |= 0x400; /* C2 <-- 1 */
797 fptemp = pow(2.0, expdif - 50);
798 fpsrcop = (st0 / st1) / fptemp;
799 /* fpsrcop = integer obtained by chopping */
800 fpsrcop = (fpsrcop < 0.0) ?
801 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
802 st0 -= (st1 * fpsrcop * fptemp);
804 ST0 = double_to_floatx80(env, st0);
807 void helper_fprem(CPUX86State *env)
809 double st0, st1, dblq, fpsrcop, fptemp;
810 CPU_LDoubleU fpsrcop1, fptemp1;
811 int expdif;
812 signed long long int q;
814 st0 = floatx80_to_double(env, ST0);
815 st1 = floatx80_to_double(env, ST1);
817 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
818 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
819 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
820 return;
823 fpsrcop = st0;
824 fptemp = st1;
825 fpsrcop1.d = ST0;
826 fptemp1.d = ST1;
827 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
829 if (expdif < 0) {
830 /* optimisation? taken from the AMD docs */
831 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
832 /* ST0 is unchanged */
833 return;
836 if (expdif < 53) {
837 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
838 /* round dblq towards zero */
839 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
840 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
842 /* convert dblq to q by truncating towards zero */
843 if (dblq < 0.0) {
844 q = (signed long long int)(-dblq);
845 } else {
846 q = (signed long long int)dblq;
849 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
850 /* (C0,C3,C1) <-- (q2,q1,q0) */
851 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
852 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
853 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
854 } else {
855 int N = 32 + (expdif % 32); /* as per AMD docs */
857 env->fpus |= 0x400; /* C2 <-- 1 */
858 fptemp = pow(2.0, (double)(expdif - N));
859 fpsrcop = (st0 / st1) / fptemp;
860 /* fpsrcop = integer obtained by chopping */
861 fpsrcop = (fpsrcop < 0.0) ?
862 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
863 st0 -= (st1 * fpsrcop * fptemp);
865 ST0 = double_to_floatx80(env, st0);
868 void helper_fyl2xp1(CPUX86State *env)
870 double fptemp = floatx80_to_double(env, ST0);
872 if ((fptemp + 1.0) > 0.0) {
873 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
874 fptemp *= floatx80_to_double(env, ST1);
875 ST1 = double_to_floatx80(env, fptemp);
876 fpop(env);
877 } else {
878 env->fpus &= ~0x4700;
879 env->fpus |= 0x400;
883 void helper_fsqrt(CPUX86State *env)
885 if (floatx80_is_neg(ST0)) {
886 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
887 env->fpus |= 0x400;
889 ST0 = floatx80_sqrt(ST0, &env->fp_status);
892 void helper_fsincos(CPUX86State *env)
894 double fptemp = floatx80_to_double(env, ST0);
896 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
897 env->fpus |= 0x400;
898 } else {
899 ST0 = double_to_floatx80(env, sin(fptemp));
900 fpush(env);
901 ST0 = double_to_floatx80(env, cos(fptemp));
902 env->fpus &= ~0x400; /* C2 <-- 0 */
903 /* the above code is for |arg| < 2**63 only */
907 void helper_frndint(CPUX86State *env)
909 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
912 void helper_fscale(CPUX86State *env)
914 if (floatx80_is_any_nan(ST1)) {
915 ST0 = ST1;
916 } else {
917 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
918 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
922 void helper_fsin(CPUX86State *env)
924 double fptemp = floatx80_to_double(env, ST0);
926 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
927 env->fpus |= 0x400;
928 } else {
929 ST0 = double_to_floatx80(env, sin(fptemp));
930 env->fpus &= ~0x400; /* C2 <-- 0 */
931 /* the above code is for |arg| < 2**53 only */
935 void helper_fcos(CPUX86State *env)
937 double fptemp = floatx80_to_double(env, ST0);
939 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
940 env->fpus |= 0x400;
941 } else {
942 ST0 = double_to_floatx80(env, cos(fptemp));
943 env->fpus &= ~0x400; /* C2 <-- 0 */
944 /* the above code is for |arg| < 2**63 only */
948 void helper_fxam_ST0(CPUX86State *env)
950 CPU_LDoubleU temp;
951 int expdif;
953 temp.d = ST0;
955 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
956 if (SIGND(temp)) {
957 env->fpus |= 0x200; /* C1 <-- 1 */
960 /* XXX: test fptags too */
961 expdif = EXPD(temp);
962 if (expdif == MAXEXPD) {
963 if (MANTD(temp) == 0x8000000000000000ULL) {
964 env->fpus |= 0x500; /* Infinity */
965 } else {
966 env->fpus |= 0x100; /* NaN */
968 } else if (expdif == 0) {
969 if (MANTD(temp) == 0) {
970 env->fpus |= 0x4000; /* Zero */
971 } else {
972 env->fpus |= 0x4400; /* Denormal */
974 } else {
975 env->fpus |= 0x400;
979 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
980 uintptr_t retaddr)
982 int fpus, fptag, exp, i;
983 uint64_t mant;
984 CPU_LDoubleU tmp;
986 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
987 fptag = 0;
988 for (i = 7; i >= 0; i--) {
989 fptag <<= 2;
990 if (env->fptags[i]) {
991 fptag |= 3;
992 } else {
993 tmp.d = env->fpregs[i].d;
994 exp = EXPD(tmp);
995 mant = MANTD(tmp);
996 if (exp == 0 && mant == 0) {
997 /* zero */
998 fptag |= 1;
999 } else if (exp == 0 || exp == MAXEXPD
1000 || (mant & (1LL << 63)) == 0) {
1001 /* NaNs, infinity, denormal */
1002 fptag |= 2;
1006 if (data32) {
1007 /* 32 bit */
1008 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1009 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1010 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1011 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1012 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1013 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1014 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1015 } else {
1016 /* 16 bit */
1017 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1018 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1019 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1020 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1021 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1022 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1023 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1027 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1029 do_fstenv(env, ptr, data32, GETPC());
1032 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1033 uintptr_t retaddr)
1035 int i, fpus, fptag;
1037 if (data32) {
1038 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1039 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1040 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1041 } else {
1042 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1043 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1044 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1046 env->fpstt = (fpus >> 11) & 7;
1047 env->fpus = fpus & ~0x3800;
1048 for (i = 0; i < 8; i++) {
1049 env->fptags[i] = ((fptag & 3) == 3);
1050 fptag >>= 2;
1054 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1056 do_fldenv(env, ptr, data32, GETPC());
1059 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1061 floatx80 tmp;
1062 int i;
1064 do_fstenv(env, ptr, data32, GETPC());
1066 ptr += (14 << data32);
1067 for (i = 0; i < 8; i++) {
1068 tmp = ST(i);
1069 helper_fstt(env, tmp, ptr, GETPC());
1070 ptr += 10;
1073 /* fninit */
1074 env->fpus = 0;
1075 env->fpstt = 0;
1076 cpu_set_fpuc(env, 0x37f);
1077 env->fptags[0] = 1;
1078 env->fptags[1] = 1;
1079 env->fptags[2] = 1;
1080 env->fptags[3] = 1;
1081 env->fptags[4] = 1;
1082 env->fptags[5] = 1;
1083 env->fptags[6] = 1;
1084 env->fptags[7] = 1;
1087 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1089 floatx80 tmp;
1090 int i;
1092 do_fldenv(env, ptr, data32, GETPC());
1093 ptr += (14 << data32);
1095 for (i = 0; i < 8; i++) {
1096 tmp = helper_fldt(env, ptr, GETPC());
1097 ST(i) = tmp;
1098 ptr += 10;
1102 #if defined(CONFIG_USER_ONLY)
1103 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1105 helper_fsave(env, ptr, data32);
1108 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1110 helper_frstor(env, ptr, data32);
1112 #endif
1114 #define XO(X) offsetof(X86XSaveArea, X)
1116 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1118 int fpus, fptag, i;
1119 target_ulong addr;
1121 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1122 fptag = 0;
1123 for (i = 0; i < 8; i++) {
1124 fptag |= (env->fptags[i] << i);
1127 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1128 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1129 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1131 /* In 32-bit mode this is eip, sel, dp, sel.
1132 In 64-bit mode this is rip, rdp.
1133 But in either case we don't write actual data, just zeros. */
1134 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1135 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1137 addr = ptr + XO(legacy.fpregs);
1138 for (i = 0; i < 8; i++) {
1139 floatx80 tmp = ST(i);
1140 helper_fstt(env, tmp, addr, ra);
1141 addr += 16;
1145 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1147 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1148 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1151 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1153 int i, nb_xmm_regs;
1154 target_ulong addr;
1156 if (env->hflags & HF_CS64_MASK) {
1157 nb_xmm_regs = 16;
1158 } else {
1159 nb_xmm_regs = 8;
1162 addr = ptr + XO(legacy.xmm_regs);
1163 for (i = 0; i < nb_xmm_regs; i++) {
1164 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1165 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1166 addr += 16;
1170 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1172 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1173 int i;
1175 for (i = 0; i < 4; i++, addr += 16) {
1176 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1177 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1181 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1183 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1184 env->bndcs_regs.cfgu, ra);
1185 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1186 env->bndcs_regs.sts, ra);
1189 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1191 cpu_stq_data_ra(env, ptr, env->pkru, ra);
1194 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1196 uintptr_t ra = GETPC();
1198 /* The operand must be 16 byte aligned */
1199 if (ptr & 0xf) {
1200 raise_exception_ra(env, EXCP0D_GPF, ra);
1203 do_xsave_fpu(env, ptr, ra);
1205 if (env->cr[4] & CR4_OSFXSR_MASK) {
1206 do_xsave_mxcsr(env, ptr, ra);
1207 /* Fast FXSAVE leaves out the XMM registers */
1208 if (!(env->efer & MSR_EFER_FFXSR)
1209 || (env->hflags & HF_CPL_MASK)
1210 || !(env->hflags & HF_LMA_MASK)) {
1211 do_xsave_sse(env, ptr, ra);
1216 static uint64_t get_xinuse(CPUX86State *env)
1218 uint64_t inuse = -1;
1220 /* For the most part, we don't track XINUSE. We could calculate it
1221 here for all components, but it's probably less work to simply
1222 indicate in use. That said, the state of BNDREGS is important
1223 enough to track in HFLAGS, so we might as well use that here. */
1224 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1225 inuse &= ~XSTATE_BNDREGS_MASK;
1227 return inuse;
1230 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1231 uint64_t inuse, uint64_t opt, uintptr_t ra)
1233 uint64_t old_bv, new_bv;
1235 /* The OS must have enabled XSAVE. */
1236 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1237 raise_exception_ra(env, EXCP06_ILLOP, ra);
1240 /* The operand must be 64 byte aligned. */
1241 if (ptr & 63) {
1242 raise_exception_ra(env, EXCP0D_GPF, ra);
1245 /* Never save anything not enabled by XCR0. */
1246 rfbm &= env->xcr0;
1247 opt &= rfbm;
1249 if (opt & XSTATE_FP_MASK) {
1250 do_xsave_fpu(env, ptr, ra);
1252 if (rfbm & XSTATE_SSE_MASK) {
1253 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
1254 do_xsave_mxcsr(env, ptr, ra);
1256 if (opt & XSTATE_SSE_MASK) {
1257 do_xsave_sse(env, ptr, ra);
1259 if (opt & XSTATE_BNDREGS_MASK) {
1260 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1262 if (opt & XSTATE_BNDCSR_MASK) {
1263 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1265 if (opt & XSTATE_PKRU_MASK) {
1266 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1269 /* Update the XSTATE_BV field. */
1270 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1271 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1272 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1275 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1277 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1280 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1282 uint64_t inuse = get_xinuse(env);
1283 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1286 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1288 int i, fpuc, fpus, fptag;
1289 target_ulong addr;
1291 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1292 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1293 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1294 cpu_set_fpuc(env, fpuc);
1295 env->fpstt = (fpus >> 11) & 7;
1296 env->fpus = fpus & ~0x3800;
1297 fptag ^= 0xff;
1298 for (i = 0; i < 8; i++) {
1299 env->fptags[i] = ((fptag >> i) & 1);
1302 addr = ptr + XO(legacy.fpregs);
1303 for (i = 0; i < 8; i++) {
1304 floatx80 tmp = helper_fldt(env, addr, ra);
1305 ST(i) = tmp;
1306 addr += 16;
1310 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1312 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1315 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1317 int i, nb_xmm_regs;
1318 target_ulong addr;
1320 if (env->hflags & HF_CS64_MASK) {
1321 nb_xmm_regs = 16;
1322 } else {
1323 nb_xmm_regs = 8;
1326 addr = ptr + XO(legacy.xmm_regs);
1327 for (i = 0; i < nb_xmm_regs; i++) {
1328 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1329 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1330 addr += 16;
1334 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1336 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1337 int i;
1339 for (i = 0; i < 4; i++, addr += 16) {
1340 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1341 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1345 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1347 /* FIXME: Extend highest implemented bit of linear address. */
1348 env->bndcs_regs.cfgu
1349 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1350 env->bndcs_regs.sts
1351 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1354 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1356 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1359 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1361 uintptr_t ra = GETPC();
1363 /* The operand must be 16 byte aligned */
1364 if (ptr & 0xf) {
1365 raise_exception_ra(env, EXCP0D_GPF, ra);
1368 do_xrstor_fpu(env, ptr, ra);
1370 if (env->cr[4] & CR4_OSFXSR_MASK) {
1371 do_xrstor_mxcsr(env, ptr, ra);
1372 /* Fast FXRSTOR leaves out the XMM registers */
1373 if (!(env->efer & MSR_EFER_FFXSR)
1374 || (env->hflags & HF_CPL_MASK)
1375 || !(env->hflags & HF_LMA_MASK)) {
1376 do_xrstor_sse(env, ptr, ra);
1381 #if defined(CONFIG_USER_ONLY)
1382 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1384 helper_fxsave(env, ptr);
1387 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1389 helper_fxrstor(env, ptr);
1391 #endif
1393 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1395 uintptr_t ra = GETPC();
1396 uint64_t xstate_bv, xcomp_bv, reserve0;
1398 rfbm &= env->xcr0;
1400 /* The OS must have enabled XSAVE. */
1401 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1402 raise_exception_ra(env, EXCP06_ILLOP, ra);
1405 /* The operand must be 64 byte aligned. */
1406 if (ptr & 63) {
1407 raise_exception_ra(env, EXCP0D_GPF, ra);
1410 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1412 if ((int64_t)xstate_bv < 0) {
1413 /* FIXME: Compact form. */
1414 raise_exception_ra(env, EXCP0D_GPF, ra);
1417 /* Standard form. */
1419 /* The XSTATE_BV field must not set bits not present in XCR0. */
1420 if (xstate_bv & ~env->xcr0) {
1421 raise_exception_ra(env, EXCP0D_GPF, ra);
1424 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
1425 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1426 describes only XCOMP_BV, but the description of the standard form
1427 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1428 includes the next 64-bit field. */
1429 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1430 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1431 if (xcomp_bv || reserve0) {
1432 raise_exception_ra(env, EXCP0D_GPF, ra);
1435 if (rfbm & XSTATE_FP_MASK) {
1436 if (xstate_bv & XSTATE_FP_MASK) {
1437 do_xrstor_fpu(env, ptr, ra);
1438 } else {
1439 helper_fninit(env);
1440 memset(env->fpregs, 0, sizeof(env->fpregs));
1443 if (rfbm & XSTATE_SSE_MASK) {
1444 /* Note that the standard form of XRSTOR loads MXCSR from memory
1445 whether or not the XSTATE_BV bit is set. */
1446 do_xrstor_mxcsr(env, ptr, ra);
1447 if (xstate_bv & XSTATE_SSE_MASK) {
1448 do_xrstor_sse(env, ptr, ra);
1449 } else {
1450 /* ??? When AVX is implemented, we may have to be more
1451 selective in the clearing. */
1452 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1455 if (rfbm & XSTATE_BNDREGS_MASK) {
1456 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1457 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1458 env->hflags |= HF_MPX_IU_MASK;
1459 } else {
1460 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1461 env->hflags &= ~HF_MPX_IU_MASK;
1464 if (rfbm & XSTATE_BNDCSR_MASK) {
1465 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1466 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1467 } else {
1468 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1470 cpu_sync_bndcs_hflags(env);
1472 if (rfbm & XSTATE_PKRU_MASK) {
1473 uint64_t old_pkru = env->pkru;
1474 if (xstate_bv & XSTATE_PKRU_MASK) {
1475 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1476 } else {
1477 env->pkru = 0;
1479 if (env->pkru != old_pkru) {
1480 CPUState *cs = CPU(x86_env_get_cpu(env));
1481 tlb_flush(cs);
1486 #undef XO
1488 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1490 /* The OS must have enabled XSAVE. */
1491 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1492 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1495 switch (ecx) {
1496 case 0:
1497 return env->xcr0;
1498 case 1:
1499 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1500 return env->xcr0 & get_xinuse(env);
1502 break;
1504 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1507 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1509 uint32_t dummy, ena_lo, ena_hi;
1510 uint64_t ena;
1512 /* The OS must have enabled XSAVE. */
1513 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1514 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1517 /* Only XCR0 is defined at present; the FPU may not be disabled. */
1518 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1519 goto do_gpf;
1522 /* Disallow enabling unimplemented features. */
1523 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1524 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1525 if (mask & ~ena) {
1526 goto do_gpf;
1529 /* Disallow enabling only half of MPX. */
1530 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1531 & XSTATE_BNDCSR_MASK) {
1532 goto do_gpf;
1535 env->xcr0 = mask;
1536 cpu_sync_bndcs_hflags(env);
1537 return;
1539 do_gpf:
1540 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1543 /* MMX/SSE */
1544 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1546 #define SSE_DAZ 0x0040
1547 #define SSE_RC_MASK 0x6000
1548 #define SSE_RC_NEAR 0x0000
1549 #define SSE_RC_DOWN 0x2000
1550 #define SSE_RC_UP 0x4000
1551 #define SSE_RC_CHOP 0x6000
1552 #define SSE_FZ 0x8000
1554 void update_mxcsr_status(CPUX86State *env)
1556 uint32_t mxcsr = env->mxcsr;
1557 int rnd_type;
1559 /* set rounding mode */
1560 switch (mxcsr & SSE_RC_MASK) {
1561 default:
1562 case SSE_RC_NEAR:
1563 rnd_type = float_round_nearest_even;
1564 break;
1565 case SSE_RC_DOWN:
1566 rnd_type = float_round_down;
1567 break;
1568 case SSE_RC_UP:
1569 rnd_type = float_round_up;
1570 break;
1571 case SSE_RC_CHOP:
1572 rnd_type = float_round_to_zero;
1573 break;
1575 set_float_rounding_mode(rnd_type, &env->sse_status);
1577 /* set denormals are zero */
1578 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1580 /* set flush to zero */
1581 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1584 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1586 cpu_set_mxcsr(env, val);
1589 void helper_enter_mmx(CPUX86State *env)
1591 env->fpstt = 0;
1592 *(uint32_t *)(env->fptags) = 0;
1593 *(uint32_t *)(env->fptags + 4) = 0;
1596 void helper_emms(CPUX86State *env)
1598 /* set to empty state */
1599 *(uint32_t *)(env->fptags) = 0x01010101;
1600 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1603 /* XXX: suppress */
1604 void helper_movq(CPUX86State *env, void *d, void *s)
1606 *(uint64_t *)d = *(uint64_t *)s;
1609 #define SHIFT 0
1610 #include "ops_sse.h"
1612 #define SHIFT 1
1613 #include "ops_sse.h"