1 // This program is a thorough test of the LOADVn/STOREVn shadow memory
8 #include "tests/sys_mman.h"
9 #include "memcheck/memcheck.h"
11 // All the sizes here are in *bytes*, not bits.
13 typedef unsigned char U1
;
14 typedef unsigned short U2
;
15 typedef unsigned int U4
;
16 typedef unsigned long long U8
;
21 typedef unsigned long UWord
;
23 #define PAGE_SIZE 4096ULL
26 // XXX: should check the error cases for SET/GET_VBITS also
28 // For the byte 'x', build a value of 'size' bytes from that byte, eg:
32 // size 8 --> xxxxxxxx
33 // where the 0 bits are seen by Memcheck as defined, and the 1 bits are
34 // seen as undefined (ie. the value of each bit matches its V bit, ie. the
35 // resulting value is the same as its metavalue).
37 U8
build(int size
, U1 byte
)
42 U8 res
= 0xffffffffffffffffULL
, res2
;
43 (void)VALGRIND_MAKE_MEM_UNDEFINED(&res
, 8);
44 assert(1 == size
|| 2 == size
|| 4 == size
|| 8 == size
);
46 for (i
= 0; i
< size
; i
++) {
53 // res is now considered partially defined, but we know exactly what its
54 // value is (it happens to be the same as its metavalue).
56 (void)VALGRIND_GET_VBITS(&res
, &shres
, 8);
58 (void)VALGRIND_MAKE_MEM_DEFINED(&res2
, 8); // avoid the 'undefined' warning
59 assert(res2
== shres
);
63 U1
make_defined ( U1 x
)
66 (void)VALGRIND_MAKE_MEM_DEFINED(&xx
, 1);
70 void check(U1
* arr
, int n
, char* who
)
73 U1
* shadow
= malloc(n
);
76 (void)VALGRIND_GET_VBITS(arr
, shadow
, n
);
77 for (i
= 0; i
< n
; i
++) {
78 arr_i
= make_defined(arr
[i
]);
79 if (arr_i
!= shadow
[i
]) {
80 fprintf(stderr
, "\n\nFAILURE: %s, byte %d -- "
81 "is 0x%x, should be 0x%x\n\n",
82 who
, i
, shadow
[i
], arr
[i
]);
88 printf("test passed, sum = %llu (%9.5f per byte)\n",
89 sum
, (F8
)sum
/ (F8
)n
);
92 static inline U4
randomU4 ( void )
95 /* From "Numerical Recipes in C" 2nd Edition */
96 n
= 1664525UL * n
+ 1013904223UL;
100 static inline U1
randomU1 ( void )
102 return 0xFF & (randomU4() >> 13);
105 // NB! 300000 is really not enough to shake out all failures.
106 // Increasing it by a factor of 256 is, but makes the test take
107 // the best part of an hour.
108 #define N_BYTES (300000 /* * 256 */)
109 #define N_EVENTS (5 * N_BYTES)
112 void do_test_at ( U1
* arr
)
116 U4 mv1
= 0, mv2
= 0, mv4
= 0, mv8
= 0, mv4f
= 0, mv8f
= 0;
118 /* Fill arr with random bytes whose shadows match them. */
119 if (0) printf("-------- arr = %p\n", arr
);
121 printf("initialising\n");
122 for (i
= 0; i
< N_BYTES
; i
++)
123 arr
[i
] = (U1
)build(1, randomU1());
125 printf("post-initialisation check\n");
126 check(arr
, N_BYTES
, "after initialisation");
128 /* Now do huge numbers of memory copies. */
129 printf("doing copies\n");
130 for (i
= 0; i
< N_EVENTS
; i
++) {
132 ty
= (randomU4() >> 13) % 5;
134 src
= (randomU4() >> 1) % N_BYTES
;
135 dst
= (randomU4() >> 3) % N_BYTES
;
138 *(U1
*)(arr
+dst
) = *(U1
*)(arr
+src
);
143 if (src
+2 >= N_BYTES
|| dst
+2 >= N_BYTES
)
145 *(U2
*)(arr
+dst
) = *(U2
*)(arr
+src
);
150 if (src
+4 >= N_BYTES
|| dst
+4 >= N_BYTES
)
152 *(U4
*)(arr
+dst
) = *(U4
*)(arr
+src
);
157 if (src
+8 >= N_BYTES
|| dst
+8 >= N_BYTES
)
159 *(U8
*)(arr
+dst
) = *(U8
*)(arr
+src
);
163 /* Don't bother with 32-bit floats. These cause
164 horrible complications, as discussed in sh-mem.c. */
167 if (src+4 >= N_BYTES || dst+4 >= N_BYTES)
169 *(F4*)(arr+dst) = *(F4*)(arr+src);
175 if (src
+8 >= N_BYTES
|| dst
+8 >= N_BYTES
)
177 #if defined(__i386__)
178 /* Copying via an x87 register causes the test to fail,
179 because (I think) some obscure values that are FP
180 denormals get changed during the copy due to the FPU
181 normalising, or rounding, or whatever, them. This
182 causes them to no longer bit-for-bit match the
183 accompanying metadata. Yet we still need to do a
184 genuine 8-byte load/store to test the relevant memcheck
185 {LOADV8,STOREV8} routines. Hence use the MMX registers
186 instead, as copying through them should be
188 __asm__
__volatile__(
189 "movq (%1), %%mm2\n\t"
190 "movq %%mm2, (%0)\n\t"
192 : : "r"(arr
+dst
), "r"(arr
+src
) : "memory"
194 #elif defined(__linux__) && defined(__arm__) && !defined(__aarch64__)
195 /* On arm32, many compilers generate a 64-bit float move
196 using two 32 bit integer registers, which completely
197 defeats this test. Hence force a 64-bit NEON load and
198 store. I guess this will break the build on non-NEON
200 __asm__
__volatile__ (
201 "vld1.64 {d7},[%0] ; vst1.64 {d7},[%1] "
202 : : "r"(arr
+src
), "r"(arr
+dst
) : "d7","memory"
205 /* Straightforward. On amd64, this gives a load/store of
206 the bottom half of an xmm register. On ppc32/64 this
207 is a straighforward load/store of an FP register. */
208 *(F8
*)(arr
+dst
) = *(F8
*)(arr
+src
);
214 fprintf(stderr
, "sh-mem-random: bad size\n");
219 printf("final check\n");
220 check(arr
, N_BYTES
, "final check");
222 printf("counts 1/2/4/8/F4/F8: %d %d %d %d %d %d\n",
223 mv1
, mv2
, mv4
, mv8
, mv4f
, mv8f
);
232 if (0 == RUNNING_ON_VALGRIND
) {
233 fprintf(stderr
, "error: this program only works when run under Valgrind\n");
237 printf("-------- testing non-auxmap range --------\n");
239 arr
= malloc(N_BYTES
);
244 if (sizeof(void*) == 8) {
248 // (U1*)(UWord)constULL funny casting to keep gcc quiet on
250 // https://www.kernel.org/doc/html/next/riscv/vm-layout.html
251 // Says RISC-V Linux Kernel SV39 user-space virtual memory
252 // ends at 256GB. So try at 240GB.
253 U1
* huge_addr
= (U1
*)(UWord
)0x3c00000000ULL
; // 240GB
254 // Note, kernel 2.6.? on Athlon64 refuses fixed mmap requests
257 printf("-------- testing auxmap range --------\n");
259 nbytes_p
= (N_BYTES
+ PAGE_SIZE
) & ~(PAGE_SIZE
-1);
261 for (tries
= 0; tries
< 10; tries
++) {
262 arr
= mmap(huge_addr
, nbytes_p
, PROT_READ
|PROT_WRITE
,
263 MAP_FIXED
|MAP_PRIVATE
|MAP_ANONYMOUS
, -1,0);
264 if (arr
!= MAP_FAILED
)
266 // hmm. fudge the address and try again.
267 huge_addr
+= (randomU4() & ~(PAGE_SIZE
-1));
271 fprintf(stderr
, "sh-mem-random: can't mmap hi-mem\n");
274 assert(arr
!= MAP_FAILED
);