Add missing zstd.h to coregrind Makefile.am noinst_HEADERS
[valgrind.git] / memcheck / tests / amd64 / xsave-avx.c
blob9a3238953a7e7a97a7576e1f5d458145940ea3ab
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <assert.h>
5 #include "tests/asm.h"
6 #include "tests/malloc.h"
7 #include <string.h>
9 #define XSAVE_AREA_SIZE 832
11 typedef unsigned char UChar;
12 typedef unsigned int UInt;
13 typedef unsigned long long int ULong;
15 typedef unsigned long int UWord;
17 typedef unsigned char Bool;
18 #define True ((Bool)1)
19 #define False ((Bool)0)
21 const unsigned int vec0[8]
22 = { 0x12345678, 0x11223344, 0x55667788, 0x87654321,
23 0x15263748, 0x91929394, 0x19293949, 0x48372615 };
25 const unsigned int vec1[8]
26 = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA,
27 0xBADCFE10, 0xFFEE9988, 0x11667722, 0x01EFCDAB };
29 const unsigned int vecZ[8]
30 = { 0, 0, 0, 0, 0, 0, 0, 0 };
32 /* A version of memset that doesn't use XMM or YMM registers. */
33 static __attribute__((noinline))
34 void* my_memset(void* s, int c, size_t n)
36 size_t i;
37 for (i = 0; i < n; i++) {
38 ((unsigned char*)s)[i] = (unsigned char)(unsigned int)c;
39 /* Defeat any attempt at autovectorisation */
40 __asm__ __volatile__("" ::: "cc","memory");
42 return s;
45 /* Ditto for memcpy */
46 static __attribute__((noinline))
47 void* my_memcpy(void *dest, const void *src, size_t n)
49 size_t i;
50 for (i = 0; i < n; i++) {
51 ((unsigned char*)dest)[i] = ((unsigned char*)src)[i];
52 __asm__ __volatile__("" ::: "cc","memory");
54 return dest;
57 static void* memalign_zeroed64(size_t size)
59 char* p = memalign64(size);
60 if (p && size > 0) {
61 my_memset(p, 0, size);
63 return p;
66 __attribute__((noinline))
67 static void do_xsave ( void* p, UInt rfbm )
69 assert(rfbm <= 7);
70 __asm__ __volatile__(
71 "movq %0, %%rax; xorq %%rdx, %%rdx; xsave (%1)"
72 : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p)
73 : /*TRASH*/ "memory", "rax", "rdx"
77 __attribute__((noinline))
78 static void do_xrstor ( void* p, UInt rfbm )
80 assert(rfbm <= 7);
81 __asm__ __volatile__(
82 "movq %0, %%rax; xorq %%rdx, %%rdx; xrstor (%1)"
83 : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p)
84 : /*TRASH*/ "rax", "rdx" /* FIXME plus all X87,SSE,AVX regs */
88 /* set up the FP, SSE and AVX state, and then dump it. */
89 static void do_setup_then_xsave ( void* p, UInt rfbm )
91 __asm__ __volatile__("finit");
92 __asm__ __volatile__("fldpi");
93 __asm__ __volatile__("fld1");
94 __asm__ __volatile__("fldln2");
95 __asm__ __volatile__("fldlg2");
96 __asm__ __volatile__("fld %st(3)");
97 __asm__ __volatile__("fld %st(3)");
98 __asm__ __volatile__("fld1");
99 __asm__ __volatile__("vmovups (%0), %%ymm0" : : "r"(&vec0[0]) : "xmm0" );
100 __asm__ __volatile__("vmovups (%0), %%ymm1" : : "r"(&vec1[0]) : "xmm1" );
101 __asm__ __volatile__("vxorps %ymm2, %ymm2, %ymm2");
102 __asm__ __volatile__("vmovaps %ymm0, %ymm3");
103 __asm__ __volatile__("vmovaps %ymm1, %ymm4");
104 __asm__ __volatile__("vmovaps %ymm2, %ymm5");
105 __asm__ __volatile__("vmovaps %ymm0, %ymm6");
106 __asm__ __volatile__("vmovaps %ymm1, %ymm7");
107 __asm__ __volatile__("vmovaps %ymm1, %ymm8");
108 __asm__ __volatile__("vmovaps %ymm2, %ymm9");
109 __asm__ __volatile__("vmovaps %ymm0, %ymm10");
110 __asm__ __volatile__("vmovaps %ymm1, %ymm11");
111 __asm__ __volatile__("vmovaps %ymm1, %ymm12");
112 __asm__ __volatile__("vmovaps %ymm2, %ymm13");
113 __asm__ __volatile__("vmovaps %ymm0, %ymm14");
114 __asm__ __volatile__("vmovaps %ymm1, %ymm15");
115 do_xsave(p, rfbm);
118 static int isFPLsbs ( int i )
120 int q;
121 q = 32; if (i == q || i == q+1) return 1;
122 q = 48; if (i == q || i == q+1) return 1;
123 q = 64; if (i == q || i == q+1) return 1;
124 q = 80; if (i == q || i == q+1) return 1;
125 q = 96; if (i == q || i == q+1) return 1;
126 q = 112; if (i == q || i == q+1) return 1;
127 q = 128; if (i == q || i == q+1) return 1;
128 q = 144; if (i == q || i == q+1) return 1;
129 return 0;
132 static void show ( unsigned char* buf, Bool hideBits64to79 )
134 int i;
135 for (i = 0; i < XSAVE_AREA_SIZE; i++) {
136 if ((i % 16) == 0)
137 fprintf(stderr, "%3d ", i);
138 if (hideBits64to79 && isFPLsbs(i))
139 fprintf(stderr, "xx ");
140 else
141 fprintf(stderr, "%02x ", buf[i]);
142 if (i > 0 && ((i % 16) == 15))
143 fprintf(stderr, "\n");
147 static void cpuid ( UInt* eax, UInt* ebx, UInt* ecx, UInt* edx,
148 UInt index, UInt ecx_in )
150 UInt a,b,c,d;
151 asm volatile ("cpuid"
152 : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
153 : "0" (index), "2"(ecx_in) );
154 *eax = a; *ebx = b; *ecx = c; *edx = d;
155 //fprintf(stderr, "%08x %08x -> %08x %08x %08x %08x\n",
156 // index,ecx_in, a,b,c,d );
159 static void xgetbv ( UInt* eax, UInt* edx, UInt ecx_in )
161 UInt a,d;
162 asm volatile ("xgetbv"
163 : "=a" (a), "=d" (d) \
164 : "c"(ecx_in) );
165 *eax = a; *edx = d;
168 static void check_for_xsave ( void )
170 UInt eax, ebx, ecx, edx;
171 Bool ok = True;
173 eax = ebx = ecx = edx = 0;
174 cpuid(&eax, &ebx, &ecx, &edx, 1,0);
175 //fprintf(stderr, "cpuid(1).ecx[26=xsave] = %u\n", (ecx >> 26) & 1);
176 ok = ok && (((ecx >> 26) & 1) == 1);
178 eax = ebx = ecx = edx = 0;
179 cpuid(&eax, &ebx, &ecx, &edx, 1,0);
180 //fprintf(stderr, "cpuid(1).ecx[27=osxsave] = %u\n", (ecx >> 27) & 1);
181 ok = ok && (((ecx >> 27) & 1) == 1);
183 eax = ebx = ecx = edx = 0;
184 xgetbv(&eax, &edx, 0);
185 //fprintf(stderr, "xgetbv(0) = %u:%u\n", edx, eax);
186 ok = ok && (edx == 0) && (eax == 7);
188 if (ok) return;
190 fprintf(stderr,
191 "This program must be run on a CPU that supports AVX and XSAVE.\n");
192 exit(1);
196 void test_xsave ( Bool hideBits64to79 )
198 /* Testing XSAVE:
200 For RBFM in 0 .. 7 (that is, all combinations): set the x87, SSE
201 and AVX registers with some values, do XSAVE to dump it, and
202 print the resulting buffer. */
204 UInt rfbm;
205 for (rfbm = 0; rfbm <= 7; rfbm++) {
206 UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE);
208 my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE);
209 saved_img[512] = 0;
210 do_setup_then_xsave(saved_img, rfbm);
212 fprintf(stderr,
213 "------------------ XSAVE, rfbm = %u ------------------\n", rfbm);
214 show(saved_img, hideBits64to79);
215 fprintf(stderr, "\n");
217 free(saved_img);
222 void test_xrstor ( Bool hideBits64to79 )
224 /* Testing XRSTOR is more complex than testing XSAVE, because the
225 loaded value(s) depend not only on what bits are requested (by
226 RBFM) but also on what bits are actually present in the image
227 (defined by XSTATE_BV). So we have to test all 64 (8 x 8)
228 combinations.
230 The approach is to fill a memory buffer with data, do XRSTOR
231 from the buffer, them dump all components with XSAVE in a new
232 buffer, and print the result. This is complicated by the fact
233 that we need to be able to see which parts of the state (in
234 registers) are neither overwritten nor zeroed by the restore.
235 Hence the registers must be pre-filled with values which are
236 neither zero nor the data to be loaded. We choose to use 0x55
237 where possible. */
239 UChar* fives = memalign_zeroed64(XSAVE_AREA_SIZE);
240 my_memset(fives, 0x55, XSAVE_AREA_SIZE);
241 /* Set MXCSR so that the insn doesn't fault */
242 fives[24] = 0x80;
243 fives[25] = 0x1f;
244 fives[26] = 0;
245 fives[27] = 0;
246 /* Ditto for the XSAVE header area. Also set XSTATE_BV. */
247 fives[512] = 7;
248 UInt i;
249 for (i = 1; i <= 23; i++) fives[512+i] = 0;
250 /* Fill the x87 register values with something that VEX's
251 80-vs-64-bit kludging won't mess up -- an 80 bit number which is
252 representable also as 64 bit: 123456789.0123 */
253 for (i = 0; i <= 7; i++) {
254 UChar* p = &fives[32 + 16 * i];
255 p[0]=0x00; p[1]=0xf8; p[2]=0xc2; p[3]=0x64; p[4]=0xa0;
256 p[5]=0xa2; p[6]=0x79; p[7]=0xeb; p[8]=0x19; p[9]=0x40;
258 /* And mark the tags for all 8 dumped regs as "valid". */
259 fives[4/*FTW*/] = 0xFF;
261 /* (1) (see comment in loop below) */
262 UChar* standard_test_data = memalign_zeroed64(XSAVE_AREA_SIZE);
263 do_setup_then_xsave(standard_test_data, 7);
265 UInt xstate_bv, rfbm;
266 for (xstate_bv = 0; xstate_bv <= 7; xstate_bv++) {
267 for (rfbm = 0; rfbm <= 7; rfbm++) {
268 //{ xstate_bv = 7;
269 // { rfbm = 6;
270 /* 1. Copy the "standard test data" into registers, and dump
271 it with XSAVE. This gives us an image we can try
272 restoring from.
274 2. Set the register state to all-0x55s (as far as is
275 possible), so we can see which parts get overwritten
276 and which parts get zeroed on the test restore.
278 3. Do the restore from the image prepared in (1).
280 4. Dump the state with XSAVE and print it.
283 /* (3a). We can't use |standard_test_data| directly, since we
284 need to put in the required |xstate_bv| value. So make a
285 copy and modify that instead. */
286 UChar* img_to_restore_from = memalign_zeroed64(XSAVE_AREA_SIZE);
287 my_memcpy(img_to_restore_from, standard_test_data, XSAVE_AREA_SIZE);
288 img_to_restore_from[512] = xstate_bv;
290 /* (4a) */
291 UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE);
292 my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE);
293 saved_img[512] = 0;
295 /* (2) */
296 do_xrstor(fives, 7);
298 // X87, SSE, AVX state LIVE
300 /* (3b) */
301 /* and this is what we're actually trying to test */
302 do_xrstor(img_to_restore_from, rfbm);
304 // X87, SSE, AVX state LIVE
306 /* (4b) */
307 do_xsave(saved_img, 7);
309 fprintf(stderr,
310 "---------- XRSTOR, xstate_bv = %u, rfbm = %u ---------\n",
311 xstate_bv, rfbm);
312 show(saved_img, hideBits64to79);
313 fprintf(stderr, "\n");
315 free(saved_img);
316 free(img_to_restore_from);
322 int main ( int argc, char** argv )
324 Bool hideBits64to79 = argc > 1;
325 fprintf(stderr, "Re-run with any arg to suppress least-significant\n"
326 " 16 bits of 80-bit FP numbers\n");
328 check_for_xsave();
330 if (1)
331 test_xsave(hideBits64to79);
333 if (1)
334 test_xrstor(hideBits64to79);
336 return 0;