6 #include "tests/malloc.h"
9 #define XSAVE_AREA_SIZE 832
11 typedef unsigned char UChar
;
12 typedef unsigned int UInt
;
13 typedef unsigned long long int ULong
;
15 typedef unsigned long int UWord
;
17 typedef unsigned char Bool
;
18 #define True ((Bool)1)
19 #define False ((Bool)0)
21 const unsigned int vec0
[8]
22 = { 0x12345678, 0x11223344, 0x55667788, 0x87654321,
23 0x15263748, 0x91929394, 0x19293949, 0x48372615 };
25 const unsigned int vec1
[8]
26 = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA,
27 0xBADCFE10, 0xFFEE9988, 0x11667722, 0x01EFCDAB };
29 const unsigned int vecZ
[8]
30 = { 0, 0, 0, 0, 0, 0, 0, 0 };
32 /* A version of memset that doesn't use XMM or YMM registers. */
33 static __attribute__((noinline
))
34 void* my_memset(void* s
, int c
, size_t n
)
37 for (i
= 0; i
< n
; i
++) {
38 ((unsigned char*)s
)[i
] = (unsigned char)(unsigned int)c
;
39 /* Defeat any attempt at autovectorisation */
40 __asm__
__volatile__("" ::: "cc","memory");
45 /* Ditto for memcpy */
46 static __attribute__((noinline
))
47 void* my_memcpy(void *dest
, const void *src
, size_t n
)
50 for (i
= 0; i
< n
; i
++) {
51 ((unsigned char*)dest
)[i
] = ((unsigned char*)src
)[i
];
52 __asm__
__volatile__("" ::: "cc","memory");
57 static void* memalign_zeroed64(size_t size
)
59 char* p
= memalign64(size
);
61 my_memset(p
, 0, size
);
66 __attribute__((noinline
))
67 static void do_xsave ( void* p
, UInt rfbm
)
71 "movq %0, %%rax; xorq %%rdx, %%rdx; xsave (%1)"
72 : /*OUT*/ : /*IN*/ "r"((ULong
)rfbm
), "r"(p
)
73 : /*TRASH*/ "memory", "rax", "rdx"
77 __attribute__((noinline
))
78 static void do_xrstor ( void* p
, UInt rfbm
)
82 "movq %0, %%rax; xorq %%rdx, %%rdx; xrstor (%1)"
83 : /*OUT*/ : /*IN*/ "r"((ULong
)rfbm
), "r"(p
)
84 : /*TRASH*/ "rax", "rdx" /* FIXME plus all X87,SSE,AVX regs */
88 /* set up the FP, SSE and AVX state, and then dump it. */
89 static void do_setup_then_xsave ( void* p
, UInt rfbm
)
91 __asm__
__volatile__("finit");
92 __asm__
__volatile__("fldpi");
93 __asm__
__volatile__("fld1");
94 __asm__
__volatile__("fldln2");
95 __asm__
__volatile__("fldlg2");
96 __asm__
__volatile__("fld %st(3)");
97 __asm__
__volatile__("fld %st(3)");
98 __asm__
__volatile__("fld1");
99 __asm__
__volatile__("vmovups (%0), %%ymm0" : : "r"(&vec0
[0]) : "xmm0" );
100 __asm__
__volatile__("vmovups (%0), %%ymm1" : : "r"(&vec1
[0]) : "xmm1" );
101 __asm__
__volatile__("vxorps %ymm2, %ymm2, %ymm2");
102 __asm__
__volatile__("vmovaps %ymm0, %ymm3");
103 __asm__
__volatile__("vmovaps %ymm1, %ymm4");
104 __asm__
__volatile__("vmovaps %ymm2, %ymm5");
105 __asm__
__volatile__("vmovaps %ymm0, %ymm6");
106 __asm__
__volatile__("vmovaps %ymm1, %ymm7");
107 __asm__
__volatile__("vmovaps %ymm1, %ymm8");
108 __asm__
__volatile__("vmovaps %ymm2, %ymm9");
109 __asm__
__volatile__("vmovaps %ymm0, %ymm10");
110 __asm__
__volatile__("vmovaps %ymm1, %ymm11");
111 __asm__
__volatile__("vmovaps %ymm1, %ymm12");
112 __asm__
__volatile__("vmovaps %ymm2, %ymm13");
113 __asm__
__volatile__("vmovaps %ymm0, %ymm14");
114 __asm__
__volatile__("vmovaps %ymm1, %ymm15");
118 static int isFPLsbs ( int i
)
121 q
= 32; if (i
== q
|| i
== q
+1) return 1;
122 q
= 48; if (i
== q
|| i
== q
+1) return 1;
123 q
= 64; if (i
== q
|| i
== q
+1) return 1;
124 q
= 80; if (i
== q
|| i
== q
+1) return 1;
125 q
= 96; if (i
== q
|| i
== q
+1) return 1;
126 q
= 112; if (i
== q
|| i
== q
+1) return 1;
127 q
= 128; if (i
== q
|| i
== q
+1) return 1;
128 q
= 144; if (i
== q
|| i
== q
+1) return 1;
132 static void show ( unsigned char* buf
, Bool hideBits64to79
)
135 for (i
= 0; i
< XSAVE_AREA_SIZE
; i
++) {
137 fprintf(stderr
, "%3d ", i
);
138 if (hideBits64to79
&& isFPLsbs(i
))
139 fprintf(stderr
, "xx ");
141 fprintf(stderr
, "%02x ", buf
[i
]);
142 if (i
> 0 && ((i
% 16) == 15))
143 fprintf(stderr
, "\n");
147 static void cpuid ( UInt
* eax
, UInt
* ebx
, UInt
* ecx
, UInt
* edx
,
148 UInt index
, UInt ecx_in
)
151 asm volatile ("cpuid"
152 : "=a" (a
), "=b" (b
), "=c" (c
), "=d" (d
) \
153 : "0" (index
), "2"(ecx_in
) );
154 *eax
= a
; *ebx
= b
; *ecx
= c
; *edx
= d
;
155 //fprintf(stderr, "%08x %08x -> %08x %08x %08x %08x\n",
156 // index,ecx_in, a,b,c,d );
159 static void xgetbv ( UInt
* eax
, UInt
* edx
, UInt ecx_in
)
162 asm volatile ("xgetbv"
163 : "=a" (a
), "=d" (d
) \
168 static void check_for_xsave ( void )
170 UInt eax
, ebx
, ecx
, edx
;
173 eax
= ebx
= ecx
= edx
= 0;
174 cpuid(&eax
, &ebx
, &ecx
, &edx
, 1,0);
175 //fprintf(stderr, "cpuid(1).ecx[26=xsave] = %u\n", (ecx >> 26) & 1);
176 ok
= ok
&& (((ecx
>> 26) & 1) == 1);
178 eax
= ebx
= ecx
= edx
= 0;
179 cpuid(&eax
, &ebx
, &ecx
, &edx
, 1,0);
180 //fprintf(stderr, "cpuid(1).ecx[27=osxsave] = %u\n", (ecx >> 27) & 1);
181 ok
= ok
&& (((ecx
>> 27) & 1) == 1);
183 eax
= ebx
= ecx
= edx
= 0;
184 xgetbv(&eax
, &edx
, 0);
185 //fprintf(stderr, "xgetbv(0) = %u:%u\n", edx, eax);
186 ok
= ok
&& (edx
== 0) && (eax
== 7);
191 "This program must be run on a CPU that supports AVX and XSAVE.\n");
196 void test_xsave ( Bool hideBits64to79
)
200 For RBFM in 0 .. 7 (that is, all combinations): set the x87, SSE
201 and AVX registers with some values, do XSAVE to dump it, and
202 print the resulting buffer. */
205 for (rfbm
= 0; rfbm
<= 7; rfbm
++) {
206 UChar
* saved_img
= memalign_zeroed64(XSAVE_AREA_SIZE
);
208 my_memset(saved_img
, 0xAA, XSAVE_AREA_SIZE
);
210 do_setup_then_xsave(saved_img
, rfbm
);
213 "------------------ XSAVE, rfbm = %u ------------------\n", rfbm
);
214 show(saved_img
, hideBits64to79
);
215 fprintf(stderr
, "\n");
222 void test_xrstor ( Bool hideBits64to79
)
224 /* Testing XRSTOR is more complex than testing XSAVE, because the
225 loaded value(s) depend not only on what bits are requested (by
226 RBFM) but also on what bits are actually present in the image
227 (defined by XSTATE_BV). So we have to test all 64 (8 x 8)
230 The approach is to fill a memory buffer with data, do XRSTOR
231 from the buffer, them dump all components with XSAVE in a new
232 buffer, and print the result. This is complicated by the fact
233 that we need to be able to see which parts of the state (in
234 registers) are neither overwritten nor zeroed by the restore.
235 Hence the registers must be pre-filled with values which are
236 neither zero nor the data to be loaded. We choose to use 0x55
239 UChar
* fives
= memalign_zeroed64(XSAVE_AREA_SIZE
);
240 my_memset(fives
, 0x55, XSAVE_AREA_SIZE
);
241 /* Set MXCSR so that the insn doesn't fault */
246 /* Ditto for the XSAVE header area. Also set XSTATE_BV. */
249 for (i
= 1; i
<= 23; i
++) fives
[512+i
] = 0;
250 /* Fill the x87 register values with something that VEX's
251 80-vs-64-bit kludging won't mess up -- an 80 bit number which is
252 representable also as 64 bit: 123456789.0123 */
253 for (i
= 0; i
<= 7; i
++) {
254 UChar
* p
= &fives
[32 + 16 * i
];
255 p
[0]=0x00; p
[1]=0xf8; p
[2]=0xc2; p
[3]=0x64; p
[4]=0xa0;
256 p
[5]=0xa2; p
[6]=0x79; p
[7]=0xeb; p
[8]=0x19; p
[9]=0x40;
258 /* And mark the tags for all 8 dumped regs as "valid". */
259 fives
[4/*FTW*/] = 0xFF;
261 /* (1) (see comment in loop below) */
262 UChar
* standard_test_data
= memalign_zeroed64(XSAVE_AREA_SIZE
);
263 do_setup_then_xsave(standard_test_data
, 7);
265 UInt xstate_bv
, rfbm
;
266 for (xstate_bv
= 0; xstate_bv
<= 7; xstate_bv
++) {
267 for (rfbm
= 0; rfbm
<= 7; rfbm
++) {
270 /* 1. Copy the "standard test data" into registers, and dump
271 it with XSAVE. This gives us an image we can try
274 2. Set the register state to all-0x55s (as far as is
275 possible), so we can see which parts get overwritten
276 and which parts get zeroed on the test restore.
278 3. Do the restore from the image prepared in (1).
280 4. Dump the state with XSAVE and print it.
283 /* (3a). We can't use |standard_test_data| directly, since we
284 need to put in the required |xstate_bv| value. So make a
285 copy and modify that instead. */
286 UChar
* img_to_restore_from
= memalign_zeroed64(XSAVE_AREA_SIZE
);
287 my_memcpy(img_to_restore_from
, standard_test_data
, XSAVE_AREA_SIZE
);
288 img_to_restore_from
[512] = xstate_bv
;
291 UChar
* saved_img
= memalign_zeroed64(XSAVE_AREA_SIZE
);
292 my_memset(saved_img
, 0xAA, XSAVE_AREA_SIZE
);
298 // X87, SSE, AVX state LIVE
301 /* and this is what we're actually trying to test */
302 do_xrstor(img_to_restore_from
, rfbm
);
304 // X87, SSE, AVX state LIVE
307 do_xsave(saved_img
, 7);
310 "---------- XRSTOR, xstate_bv = %u, rfbm = %u ---------\n",
312 show(saved_img
, hideBits64to79
);
313 fprintf(stderr
, "\n");
316 free(img_to_restore_from
);
322 int main ( int argc
, char** argv
)
324 Bool hideBits64to79
= argc
> 1;
325 fprintf(stderr
, "Re-run with any arg to suppress least-significant\n"
326 " 16 bits of 80-bit FP numbers\n");
331 test_xsave(hideBits64to79
);
334 test_xrstor(hideBits64to79
);