regtest: broaden none/tests/linux/bug498317 suppression for PPC
[valgrind.git] / none / tests / amd64 / amd64locked.c
blob336790e0cbcf01981dad54cc97e2768b11948459
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <assert.h>
6 #define VERBOSE 0
8 typedef unsigned int UInt;
9 typedef unsigned char UChar;
10 typedef unsigned long long int ULong;
11 typedef signed long long int Long;
12 typedef signed int Int;
13 typedef unsigned short UShort;
14 typedef unsigned long UWord;
15 typedef char HChar;
18 __attribute__((noinline))
19 unsigned myrandom(void)
21 /* Simple multiply-with-carry random generator. */
22 static unsigned m_w = 11;
23 static unsigned m_z = 13;
25 m_z = 36969 * (m_z & 65535) + (m_z >> 16);
26 m_w = 18000 * (m_w & 65535) + (m_w >> 16);
28 return (m_z << 16) + m_w;
31 /////////////////////////////////////////////////////////////////
32 // BEGIN crc32 stuff //
33 /////////////////////////////////////////////////////////////////
35 static const UInt crc32Table[256] = {
37 /*-- Ugly, innit? --*/
39 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
40 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
41 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
42 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
43 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
44 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
45 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
46 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
47 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
48 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
49 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
50 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
51 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
52 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
53 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
54 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
55 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
56 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
57 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
58 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
59 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
60 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
61 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
62 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
63 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
64 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
65 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
66 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
67 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
68 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
69 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
70 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
71 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
72 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
73 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
74 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
75 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
76 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
77 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
78 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
79 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
80 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
81 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
82 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
83 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
84 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
85 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
86 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
87 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
88 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
89 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
90 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
91 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
92 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
93 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
94 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
95 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
96 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
97 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
98 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
99 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
100 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
101 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
102 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
105 #define UPDATE_CRC(crcVar,cha) \
107 crcVar = (crcVar << 8) ^ \
108 crc32Table[(crcVar >> 24) ^ \
109 ((UChar)cha)]; \
112 static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn )
114 UInt crc = crcIn;
115 while (nBytes >= 4) {
116 UPDATE_CRC(crc, bytes[0]);
117 UPDATE_CRC(crc, bytes[1]);
118 UPDATE_CRC(crc, bytes[2]);
119 UPDATE_CRC(crc, bytes[3]);
120 bytes += 4;
121 nBytes -= 4;
123 while (nBytes >= 1) {
124 UPDATE_CRC(crc, bytes[0]);
125 bytes += 1;
126 nBytes -= 1;
128 return crc;
131 static UInt crcFinalise ( UInt crc ) {
132 return ~crc;
135 ////////
137 static UInt theCRC = 0xFFFFFFFF;
139 static HChar outBuf[1024];
140 // take output that's in outBuf, length as specified, and
141 // update the running crc.
142 static void send ( int nbytes )
144 assert( ((unsigned int)nbytes) < sizeof(outBuf)-1);
145 assert(outBuf[nbytes] == 0);
146 theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC );
147 if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf);
151 /////////////////////////////////////////////////////////////////
152 // END crc32 stuff //
153 /////////////////////////////////////////////////////////////////
155 #if 0
157 // full version
158 #define NVALS 76
160 static ULong val[NVALS]
161 = { 0x00ULL, 0x01ULL, 0x02ULL, 0x03ULL,
162 0x3FULL, 0x40ULL, 0x41ULL,
163 0x7EULL, 0x7FULL, 0x80ULL, 0x81ULL, 0x82ULL,
164 0xBFULL, 0xC0ULL, 0xC1ULL,
165 0xFCULL, 0xFDULL, 0xFEULL, 0xFFULL,
167 0xFF00ULL, 0xFF01ULL, 0xFF02ULL, 0xFF03ULL,
168 0xFF3FULL, 0xFF40ULL, 0xFF41ULL,
169 0xFF7EULL, 0xFF7FULL, 0xFF80ULL, 0xFF81ULL, 0xFF82ULL,
170 0xFFBFULL, 0xFFC0ULL, 0xFFC1ULL,
171 0xFFFCULL, 0xFFFDULL, 0xFFFEULL, 0xFFFFULL,
173 0xFFFFFF00ULL, 0xFFFFFF01ULL, 0xFFFFFF02ULL, 0xFFFFFF03ULL,
174 0xFFFFFF3FULL, 0xFFFFFF40ULL, 0xFFFFFF41ULL,
175 0xFFFFFF7EULL, 0xFFFFFF7FULL, 0xFFFFFF80ULL, 0xFFFFFF81ULL, 0xFFFFFF82ULL,
176 0xFFFFFFBFULL, 0xFFFFFFC0ULL, 0xFFFFFFC1ULL,
177 0xFFFFFFFCULL, 0xFFFFFFFDULL, 0xFFFFFFFEULL, 0xFFFFFFFFULL,
179 0xFFFFFFFFFFFFFF00ULL, 0xFFFFFFFFFFFFFF01ULL, 0xFFFFFFFFFFFFFF02ULL,
180 0xFFFFFFFFFFFFFF03ULL,
181 0xFFFFFFFFFFFFFF3FULL, 0xFFFFFFFFFFFFFF40ULL, 0xFFFFFFFFFFFFFF41ULL,
182 0xFFFFFFFFFFFFFF7EULL, 0xFFFFFFFFFFFFFF7FULL, 0xFFFFFFFFFFFFFF80ULL,
183 0xFFFFFFFFFFFFFF81ULL, 0xFFFFFFFFFFFFFF82ULL,
184 0xFFFFFFFFFFFFFFBFULL, 0xFFFFFFFFFFFFFFC0ULL, 0xFFFFFFFFFFFFFFC1ULL,
185 0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFEULL,
186 0xFFFFFFFFFFFFFFFFULL
189 #else
191 // shortened version, for use as valgrind regtest
192 #define NVALS 36
194 static ULong val[NVALS]
195 = { 0x00ULL, 0x01ULL,
196 0x3FULL, 0x40ULL,
197 0x7FULL, 0x80ULL,
198 0xBFULL, 0xC0ULL,
199 0xFFULL,
201 0xFF00ULL, 0xFF01ULL,
202 0xFF3FULL, 0xFF40ULL,
203 0xFF7FULL, 0xFF80ULL,
204 0xFFBFULL, 0xFFC0ULL,
205 0xFFFFULL,
207 0xFFFFFF00ULL, 0xFFFFFF01ULL,
208 0xFFFFFF3FULL, 0xFFFFFF40ULL,
209 0xFFFFFF7EULL, 0xFFFFFF7FULL,
210 0xFFFFFFBFULL, 0xFFFFFFC0ULL,
211 0xFFFFFFFFULL,
213 0xFFFFFFFFFFFFFF00ULL, 0xFFFFFFFFFFFFFF01ULL,
214 0xFFFFFFFFFFFFFF3FULL, 0xFFFFFFFFFFFFFF40ULL,
215 0xFFFFFFFFFFFFFF7FULL, 0xFFFFFFFFFFFFFF80ULL,
216 0xFFFFFFFFFFFFFFBFULL, 0xFFFFFFFFFFFFFFC0ULL,
217 0xFFFFFFFFFFFFFFFFULL
220 #endif
222 /////////////////////////////////////
224 #define CC_C 0x0001
225 #define CC_P 0x0004
226 #define CC_A 0x0010
227 #define CC_Z 0x0040
228 #define CC_S 0x0080
229 #define CC_O 0x0800
231 #define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O)
233 #define GEN_do_locked_G_E(_name,_eax) \
235 __attribute__((noinline)) void do_locked_G_E_##_name ( void ) \
237 volatile Long e_val, g_val, e_val_before; \
238 Long o, s, z, a, c, p, v1, v2, flags_in; \
239 Long block[4]; \
241 for (v1 = 0; v1 < NVALS; v1++) { \
242 for (v2 = 0; v2 < NVALS; v2++) { \
244 for (o = 0; o < 2; o++) { \
245 for (s = 0; s < 2; s++) { \
246 for (z = 0; z < 2; z++) { \
247 for (a = 0; a < 2; a++) { \
248 for (c = 0; c < 2; c++) { \
249 for (p = 0; p < 2; p++) { \
251 flags_in = (o ? CC_O : 0) \
252 | (s ? CC_S : 0) \
253 | (z ? CC_Z : 0) \
254 | (a ? CC_A : 0) \
255 | (c ? CC_C : 0) \
256 | (p ? CC_P : 0); \
258 g_val = val[v1]; \
259 e_val = val[v2]; \
260 e_val_before = e_val; \
262 block[0] = flags_in; \
263 block[1] = g_val; \
264 block[2] = (long)&e_val; \
265 block[3] = 0; \
266 __asm__ __volatile__( \
267 "movq 0(%0), %%rax\n\t" \
268 "pushq %%rax\n\t" \
269 "popfq\n\t" \
270 "movq 8(%0), %%rax\n\t" \
271 "movq 16(%0), %%rbx\n\t" \
272 "lock; " #_name " %%" #_eax ",(%%rbx)\n\t" \
273 "pushfq\n\t" \
274 "popq %%rax\n\t" \
275 "movq %%rax, 24(%0)\n\t" \
276 : : "r"(&block[0]) : "rax","rbx","cc","memory" \
277 ); \
279 send( \
280 sprintf(outBuf, \
281 "%s G=%016llx E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \
282 #_name, g_val, e_val_before, flags_in, \
283 e_val, block[3] & CC_MASK)); \
285 }}}}}} \
287 }} \
290 GEN_do_locked_G_E(addb,al)
291 GEN_do_locked_G_E(addw,ax)
292 GEN_do_locked_G_E(addl,eax)
293 GEN_do_locked_G_E(addq,rax)
295 GEN_do_locked_G_E(orb, al)
296 GEN_do_locked_G_E(orw, ax)
297 GEN_do_locked_G_E(orl, eax)
298 GEN_do_locked_G_E(orq, rax)
300 GEN_do_locked_G_E(adcb,al)
301 GEN_do_locked_G_E(adcw,ax)
302 GEN_do_locked_G_E(adcl,eax)
303 GEN_do_locked_G_E(adcq,rax)
305 GEN_do_locked_G_E(sbbb,al)
306 GEN_do_locked_G_E(sbbw,ax)
307 GEN_do_locked_G_E(sbbl,eax)
308 GEN_do_locked_G_E(sbbq,rax)
310 GEN_do_locked_G_E(andb,al)
311 GEN_do_locked_G_E(andw,ax)
312 GEN_do_locked_G_E(andl,eax)
313 GEN_do_locked_G_E(andq,rax)
315 GEN_do_locked_G_E(subb,al)
316 GEN_do_locked_G_E(subw,ax)
317 GEN_do_locked_G_E(subl,eax)
318 GEN_do_locked_G_E(subq,rax)
320 GEN_do_locked_G_E(xorb,al)
321 GEN_do_locked_G_E(xorw,ax)
322 GEN_do_locked_G_E(xorl,eax)
323 GEN_do_locked_G_E(xorq,rax)
328 #define GEN_do_locked_imm_E(_name,_eax,_imm) \
330 __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void ) \
332 volatile Long e_val, e_val_before; \
333 Long o, s, z, a, c, p, v2, flags_in; \
334 Long block[3]; \
336 for (v2 = 0; v2 < NVALS; v2++) { \
338 for (o = 0; o < 2; o++) { \
339 for (s = 0; s < 2; s++) { \
340 for (z = 0; z < 2; z++) { \
341 for (a = 0; a < 2; a++) { \
342 for (c = 0; c < 2; c++) { \
343 for (p = 0; p < 2; p++) { \
345 flags_in = (o ? CC_O : 0) \
346 | (s ? CC_S : 0) \
347 | (z ? CC_Z : 0) \
348 | (a ? CC_A : 0) \
349 | (c ? CC_C : 0) \
350 | (p ? CC_P : 0); \
352 e_val = val[v2]; \
353 e_val_before = e_val; \
355 block[0] = flags_in; \
356 block[1] = (long)&e_val; \
357 block[2] = 0; \
358 __asm__ __volatile__( \
359 "movq 0(%0), %%rax\n\t" \
360 "pushq %%rax\n\t" \
361 "popfq\n\t" \
362 "movq 8(%0), %%rbx\n\t" \
363 "lock; " #_name " $" #_imm ",(%%rbx)\n\t" \
364 "pushfq\n\t" \
365 "popq %%rax\n\t" \
366 "movq %%rax, 16(%0)\n\t" \
367 : : "r"(&block[0]) : "rax","rbx","cc","memory" \
368 ); \
370 send( \
371 sprintf(outBuf, \
372 "%s I=%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \
373 #_name, #_imm, e_val_before, flags_in, \
374 e_val, block[2] & CC_MASK)); \
376 }}}}}} \
381 GEN_do_locked_imm_E(addb,al,0x7F)
382 GEN_do_locked_imm_E(addb,al,0xF1)
383 GEN_do_locked_imm_E(addw,ax,0x7E)
384 GEN_do_locked_imm_E(addw,ax,0x9325)
385 GEN_do_locked_imm_E(addl,eax,0x7D)
386 GEN_do_locked_imm_E(addl,eax,0x31415927)
387 GEN_do_locked_imm_E(addq,rax,0x7D)
388 GEN_do_locked_imm_E(addq,rax,0x31415927)
390 GEN_do_locked_imm_E(orb,al,0x7F)
391 GEN_do_locked_imm_E(orb,al,0xF1)
392 GEN_do_locked_imm_E(orw,ax,0x7E)
393 GEN_do_locked_imm_E(orw,ax,0x9325)
394 GEN_do_locked_imm_E(orl,eax,0x7D)
395 GEN_do_locked_imm_E(orl,eax,0x31415927)
396 GEN_do_locked_imm_E(orq,rax,0x7D)
397 GEN_do_locked_imm_E(orq,rax,0x31415927)
399 GEN_do_locked_imm_E(adcb,al,0x7F)
400 GEN_do_locked_imm_E(adcb,al,0xF1)
401 GEN_do_locked_imm_E(adcw,ax,0x7E)
402 GEN_do_locked_imm_E(adcw,ax,0x9325)
403 GEN_do_locked_imm_E(adcl,eax,0x7D)
404 GEN_do_locked_imm_E(adcl,eax,0x31415927)
405 GEN_do_locked_imm_E(adcq,rax,0x7D)
406 GEN_do_locked_imm_E(adcq,rax,0x31415927)
408 GEN_do_locked_imm_E(sbbb,al,0x7F)
409 GEN_do_locked_imm_E(sbbb,al,0xF1)
410 GEN_do_locked_imm_E(sbbw,ax,0x7E)
411 GEN_do_locked_imm_E(sbbw,ax,0x9325)
412 GEN_do_locked_imm_E(sbbl,eax,0x7D)
413 GEN_do_locked_imm_E(sbbl,eax,0x31415927)
414 GEN_do_locked_imm_E(sbbq,rax,0x7D)
415 GEN_do_locked_imm_E(sbbq,rax,0x31415927)
417 GEN_do_locked_imm_E(andb,al,0x7F)
418 GEN_do_locked_imm_E(andb,al,0xF1)
419 GEN_do_locked_imm_E(andw,ax,0x7E)
420 GEN_do_locked_imm_E(andw,ax,0x9325)
421 GEN_do_locked_imm_E(andl,eax,0x7D)
422 GEN_do_locked_imm_E(andl,eax,0x31415927)
423 GEN_do_locked_imm_E(andq,rax,0x7D)
424 GEN_do_locked_imm_E(andq,rax,0x31415927)
426 GEN_do_locked_imm_E(subb,al,0x7F)
427 GEN_do_locked_imm_E(subb,al,0xF1)
428 GEN_do_locked_imm_E(subw,ax,0x7E)
429 GEN_do_locked_imm_E(subw,ax,0x9325)
430 GEN_do_locked_imm_E(subl,eax,0x7D)
431 GEN_do_locked_imm_E(subl,eax,0x31415927)
432 GEN_do_locked_imm_E(subq,rax,0x7D)
433 GEN_do_locked_imm_E(subq,rax,0x31415927)
435 GEN_do_locked_imm_E(xorb,al,0x7F)
436 GEN_do_locked_imm_E(xorb,al,0xF1)
437 GEN_do_locked_imm_E(xorw,ax,0x7E)
438 GEN_do_locked_imm_E(xorw,ax,0x9325)
439 GEN_do_locked_imm_E(xorl,eax,0x7D)
440 GEN_do_locked_imm_E(xorl,eax,0x31415927)
441 GEN_do_locked_imm_E(xorq,rax,0x7D)
442 GEN_do_locked_imm_E(xorq,rax,0x31415927)
444 #define GEN_do_locked_unary_E(_name,_eax) \
446 __attribute__((noinline)) void do_locked_unary_E_##_name ( void ) \
448 volatile Long e_val, e_val_before; \
449 Long o, s, z, a, c, p, v2, flags_in; \
450 Long block[3]; \
452 for (v2 = 0; v2 < NVALS; v2++) { \
454 for (o = 0; o < 2; o++) { \
455 for (s = 0; s < 2; s++) { \
456 for (z = 0; z < 2; z++) { \
457 for (a = 0; a < 2; a++) { \
458 for (c = 0; c < 2; c++) { \
459 for (p = 0; p < 2; p++) { \
461 flags_in = (o ? CC_O : 0) \
462 | (s ? CC_S : 0) \
463 | (z ? CC_Z : 0) \
464 | (a ? CC_A : 0) \
465 | (c ? CC_C : 0) \
466 | (p ? CC_P : 0); \
468 e_val = val[v2]; \
469 e_val_before = e_val; \
471 block[0] = flags_in; \
472 block[1] = (long)&e_val; \
473 block[2] = 0; \
474 __asm__ __volatile__( \
475 "movq 0(%0), %%rax\n\t" \
476 "pushq %%rax\n\t" \
477 "popfq\n\t" \
478 "movq 8(%0), %%rbx\n\t" \
479 "lock; " #_name " (%%rbx)\n\t" \
480 "pushfq\n\t" \
481 "popq %%rax\n\t" \
482 "movq %%rax, 16(%0)\n\t" \
483 : : "r"(&block[0]) : "rax","rbx","cc","memory" \
484 ); \
486 send( \
487 sprintf(outBuf, \
488 "%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \
489 #_name, e_val_before, flags_in, \
490 e_val, block[2] & CC_MASK)); \
492 }}}}}} \
497 GEN_do_locked_unary_E(decb,al)
498 GEN_do_locked_unary_E(decw,ax)
499 GEN_do_locked_unary_E(decl,eax)
500 GEN_do_locked_unary_E(decq,rax)
502 GEN_do_locked_unary_E(incb,al)
503 GEN_do_locked_unary_E(incw,ax)
504 GEN_do_locked_unary_E(incl,eax)
505 GEN_do_locked_unary_E(incq,rax)
507 GEN_do_locked_unary_E(negb,al)
508 GEN_do_locked_unary_E(negw,ax)
509 GEN_do_locked_unary_E(negl,eax)
510 GEN_do_locked_unary_E(negq,rax)
512 GEN_do_locked_unary_E(notb,al)
513 GEN_do_locked_unary_E(notw,ax)
514 GEN_do_locked_unary_E(notl,eax)
515 GEN_do_locked_unary_E(notq,rax)
518 /////////////////////////////////////////////////////////////////
520 ULong btsq_mem ( UChar* base, int bitno )
522 ULong res;
523 __asm__
524 __volatile__("lock; btsq\t%2, %0\n\t"
525 "setc %%dl\n\t"
526 "movzbq %%dl,%1\n"
527 : "=m" (*base), "=r" (res)
528 : "r" ((ULong)bitno) : "rdx","cc","memory" );
529 /* Pretty meaningless to dereference base here, but that's what you
530 have to do to get a btsl insn which refers to memory starting at
531 base. */
532 return res;
534 ULong btsl_mem ( UChar* base, int bitno )
536 ULong res;
537 __asm__
538 __volatile__("lock; btsl\t%2, %0\n\t"
539 "setc %%dl\n\t"
540 "movzbq %%dl,%1\n"
541 : "=m" (*base), "=r" (res)
542 : "r" ((UInt)bitno));
543 return res;
545 ULong btsw_mem ( UChar* base, int bitno )
547 ULong res;
548 __asm__
549 __volatile__("lock; btsw\t%w2, %0\n\t"
550 "setc %%dl\n\t"
551 "movzbq %%dl,%1\n"
552 : "=m" (*base), "=r" (res)
553 : "r" ((ULong)bitno));
554 return res;
557 ULong btrq_mem ( UChar* base, int bitno )
559 ULong res;
560 __asm__
561 __volatile__("lock; btrq\t%2, %0\n\t"
562 "setc %%dl\n\t"
563 "movzbq %%dl,%1\n"
564 : "=m" (*base), "=r" (res)
565 : "r" ((ULong)bitno));
566 return res;
568 ULong btrl_mem ( UChar* base, int bitno )
570 ULong res;
571 __asm__
572 __volatile__("lock; btrl\t%2, %0\n\t"
573 "setc %%dl\n\t"
574 "movzbq %%dl,%1\n"
575 : "=m" (*base), "=r" (res)
576 : "r" ((UInt)bitno));
577 return res;
579 ULong btrw_mem ( UChar* base, int bitno )
581 ULong res;
582 __asm__
583 __volatile__("lock; btrw\t%w2, %0\n\t"
584 "setc %%dl\n\t"
585 "movzbq %%dl,%1\n"
586 : "=m" (*base), "=r" (res)
587 : "r" ((ULong)bitno));
588 return res;
591 ULong btcq_mem ( UChar* base, int bitno )
593 ULong res;
594 __asm__
595 __volatile__("lock; btcq\t%2, %0\n\t"
596 "setc %%dl\n\t"
597 "movzbq %%dl,%1\n"
598 : "=m" (*base), "=r" (res)
599 : "r" ((ULong)bitno));
600 return res;
602 ULong btcl_mem ( UChar* base, int bitno )
604 ULong res;
605 __asm__
606 __volatile__("lock; btcl\t%2, %0\n\t"
607 "setc %%dl\n\t"
608 "movzbq %%dl,%1\n"
609 : "=m" (*base), "=r" (res)
610 : "r" ((UInt)bitno));
611 return res;
613 ULong btcw_mem ( UChar* base, int bitno )
615 ULong res;
616 __asm__
617 __volatile__("lock; btcw\t%w2, %0\n\t"
618 "setc %%dl\n\t"
619 "movzbq %%dl,%1\n"
620 : "=m" (*base), "=r" (res)
621 : "r" ((ULong)bitno));
622 return res;
625 ULong btq_mem ( UChar* base, int bitno )
627 ULong res;
628 __asm__
629 __volatile__("btq\t%2, %0\n\t"
630 "setc %%dl\n\t"
631 "movzbq %%dl,%1\n"
632 : "=m" (*base), "=r" (res)
633 : "r" ((ULong)bitno)
634 : "cc", "memory");
635 return res;
637 ULong btl_mem ( UChar* base, int bitno )
639 ULong res;
640 __asm__
641 __volatile__("btl\t%2, %0\n\t"
642 "setc %%dl\n\t"
643 "movzbq %%dl,%1\n"
644 : "=m" (*base), "=r" (res)
645 : "r" ((UInt)bitno)
646 : "cc", "memory");
647 return res;
649 ULong btw_mem ( UChar* base, int bitno )
651 ULong res;
652 __asm__
653 __volatile__("btw\t%w2, %0\n\t"
654 "setc %%dl\n\t"
655 "movzbq %%dl,%1\n"
656 : "=m" (*base), "=r" (res)
657 : "r" ((ULong)bitno));
658 return res;
661 ULong rol1 ( ULong x )
663 return (x << 1) | (x >> 63);
666 void do_bt_G_E_tests ( void )
668 ULong n, bitoff, op;
669 ULong c;
670 UChar* block;
671 ULong carrydep, res;;
673 /*------------------------ MEM-Q -----------------------*/
675 carrydep = 0;
676 block = calloc(200,1);
677 block += 100;
678 /* Valid bit offsets are -800 .. 799 inclusive. */
680 for (n = 0; n < 10000; n++) {
681 bitoff = (myrandom() % 1600) - 800;
682 op = myrandom() % 4;
683 c = 2;
684 switch (op) {
685 case 0: c = btsq_mem(block, bitoff); break;
686 case 1: c = btrq_mem(block, bitoff); break;
687 case 2: c = btcq_mem(block, bitoff); break;
688 case 3: c = btq_mem(block, bitoff); break;
690 c &= 255;
691 assert(c == 0 || c == 1);
692 carrydep = c ? (rol1(carrydep) ^ (Long)bitoff) : carrydep;
695 /* Compute final result */
696 block -= 100;
697 res = 0;
698 for (n = 0; n < 200; n++) {
699 UChar ch = block[n];
700 /* printf("%d ", (int)block[n]); */
701 res = rol1(res) ^ (ULong)ch;
704 send( sprintf(outBuf,
705 "bt{s,r,c}q: final res 0x%llx, carrydep 0x%llx\n",
706 res, carrydep));
707 free(block);
709 /*------------------------ MEM-L -----------------------*/
711 carrydep = 0;
712 block = calloc(200,1);
713 block += 100;
714 /* Valid bit offsets are -800 .. 799 inclusive. */
716 for (n = 0; n < 10000; n++) {
717 bitoff = (myrandom() % 1600) - 800;
718 op = myrandom() % 4;
719 c = 2;
720 switch (op) {
721 case 0: c = btsl_mem(block, bitoff); break;
722 case 1: c = btrl_mem(block, bitoff); break;
723 case 2: c = btcl_mem(block, bitoff); break;
724 case 3: c = btl_mem(block, bitoff); break;
726 c &= 255;
727 assert(c == 0 || c == 1);
728 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
731 /* Compute final result */
732 block -= 100;
733 res = 0;
734 for (n = 0; n < 200; n++) {
735 UChar ch = block[n];
736 /* printf("%d ", (int)block[n]); */
737 res = rol1(res) ^ (ULong)ch;
740 send( sprintf(outBuf,
741 "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n",
742 res, carrydep));
743 free(block);
745 /*------------------------ MEM-W -----------------------*/
747 carrydep = 0;
748 block = calloc(200,1);
749 block += 100;
750 /* Valid bit offsets are -800 .. 799 inclusive. */
752 for (n = 0; n < 10000; n++) {
753 bitoff = (myrandom() % 1600) - 800;
754 op = myrandom() % 4;
755 c = 2;
756 switch (op) {
757 case 0: c = btsw_mem(block, bitoff); break;
758 case 1: c = btrw_mem(block, bitoff); break;
759 case 2: c = btcw_mem(block, bitoff); break;
760 case 3: c = btw_mem(block, bitoff); break;
762 c &= 255;
763 assert(c == 0 || c == 1);
764 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
767 /* Compute final result */
768 block -= 100;
769 res = 0;
770 for (n = 0; n < 200; n++) {
771 UChar ch = block[n];
772 /* printf("%d ", (int)block[n]); */
773 res = rol1(res) ^ (ULong)ch;
776 send(sprintf(outBuf,
777 "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n",
778 res, carrydep));
779 free(block);
783 /////////////////////////////////////////////////////////////////
785 /* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and
786 also reconstruct the original bits 0, 1, 2, 3 by looking at the
787 carry flag. Returned result has mashed bits 0-3 at the bottom and
788 the reconstructed original bits 0-3 as 4-7. */
790 ULong mash_mem_Q ( ULong* origp )
792 ULong reconstructed, mashed;
793 __asm__ __volatile__ (
794 "movq %2, %%rdx\n\t"
796 "movq $0, %%rax\n\t"
797 "\n\t"
798 "btq $0, (%%rdx)\n\t"
799 "setb %%cl\n\t"
800 "movzbq %%cl, %%rcx\n\t"
801 "orq %%rcx, %%rax\n\t"
802 "\n\t"
803 "lock; btsq $1, (%%rdx)\n\t"
804 "setb %%cl\n\t"
805 "movzbq %%cl, %%rcx\n\t"
806 "shlq $1, %%rcx\n\t"
807 "orq %%rcx, %%rax\n\t"
808 "\n\t"
809 "lock; btrq $2, (%%rdx)\n\t"
810 "setb %%cl\n\t"
811 "movzbq %%cl, %%rcx\n\t"
812 "shlq $2, %%rcx\n\t"
813 "orq %%rcx, %%rax\n\t"
814 "\n\t"
815 "lock; btcq $3, (%%rdx)\n\t"
816 "setb %%cl\n\t"
817 "movzbq %%cl, %%rcx\n\t"
818 "shlq $3, %%rcx\n\t"
819 "orq %%rcx, %%rax\n\t"
820 "\n\t"
821 "movq %%rax, %0\n\t"
822 "movq (%%rdx), %1"
823 : "=r" (reconstructed), "=r" (mashed)
824 : "r" (origp)
825 : "rax", "rcx", "rdx", "cc");
826 return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
829 ULong mash_mem_L ( UInt* origp )
831 ULong reconstructed; UInt mashed;
832 __asm__ __volatile__ (
833 "movq %2, %%rdx\n\t"
835 "movq $0, %%rax\n\t"
836 "\n\t"
837 "btl $0, (%%rdx)\n\t"
838 "setb %%cl\n\t"
839 "movzbq %%cl, %%rcx\n\t"
840 "orq %%rcx, %%rax\n\t"
841 "\n\t"
842 "lock; btsl $1, (%%rdx)\n\t"
843 "setb %%cl\n\t"
844 "movzbq %%cl, %%rcx\n\t"
845 "shlq $1, %%rcx\n\t"
846 "orq %%rcx, %%rax\n\t"
847 "\n\t"
848 "lock; btrl $2, (%%rdx)\n\t"
849 "setb %%cl\n\t"
850 "movzbq %%cl, %%rcx\n\t"
851 "shlq $2, %%rcx\n\t"
852 "orq %%rcx, %%rax\n\t"
853 "\n\t"
854 "lock; btcl $3, (%%rdx)\n\t"
855 "setb %%cl\n\t"
856 "movzbq %%cl, %%rcx\n\t"
857 "shlq $3, %%rcx\n\t"
858 "orq %%rcx, %%rax\n\t"
859 "\n\t"
860 "movq %%rax, %0\n\t"
861 "movl (%%rdx), %1"
862 : "=r" (reconstructed), "=r" (mashed)
863 : "r" (origp)
864 : "rax", "rcx", "rdx", "cc");
865 return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
868 ULong mash_mem_W ( UShort* origp )
870 ULong reconstructed, mashed;
871 __asm__ __volatile__ (
872 "movq %2, %%rdx\n\t"
874 "movq $0, %%rax\n\t"
875 "\n\t"
876 "btw $0, (%%rdx)\n\t"
877 "setb %%cl\n\t"
878 "movzbq %%cl, %%rcx\n\t"
879 "orq %%rcx, %%rax\n\t"
880 "\n\t"
881 "lock; btsw $1, (%%rdx)\n\t"
882 "setb %%cl\n\t"
883 "movzbq %%cl, %%rcx\n\t"
884 "shlq $1, %%rcx\n\t"
885 "orq %%rcx, %%rax\n\t"
886 "\n\t"
887 "lock; btrw $2, (%%rdx)\n\t"
888 "setb %%cl\n\t"
889 "movzbq %%cl, %%rcx\n\t"
890 "shlq $2, %%rcx\n\t"
891 "orq %%rcx, %%rax\n\t"
892 "\n\t"
893 "lock; btcw $3, (%%rdx)\n\t"
894 "setb %%cl\n\t"
895 "movzbq %%cl, %%rcx\n\t"
896 "shlq $3, %%rcx\n\t"
897 "orq %%rcx, %%rax\n\t"
898 "\n\t"
899 "movq %%rax, %0\n\t"
900 "movzwq (%%rdx), %1"
901 : "=r" (reconstructed), "=r" (mashed)
902 : "r" (origp)
903 : "rax", "rcx", "rdx", "cc");
904 return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
908 void do_bt_imm_E_tests( void )
910 ULong i;
911 ULong* iiq = malloc(sizeof(ULong));
912 UInt* iil = malloc(sizeof(UInt));
913 UShort* iiw = malloc(sizeof(UShort));
914 for (i = 0; i < 0x10; i++) {
915 *iiq = i;
916 *iil = i;
917 *iiw = i;
918 send(sprintf(outBuf,"0x%llx -> 0x%02llx 0x%02llx 0x%02llx\n", i,
919 mash_mem_Q(iiq), mash_mem_L(iil), mash_mem_W(iiw)));
921 free(iiq);
922 free(iil);
923 free(iiw);
927 /////////////////////////////////////////////////////////////////
929 int main ( void )
931 do_locked_G_E_addb();
932 do_locked_G_E_addw();
933 do_locked_G_E_addl();
934 do_locked_G_E_addq();
936 do_locked_G_E_orb();
937 do_locked_G_E_orw();
938 do_locked_G_E_orl();
939 do_locked_G_E_orq();
941 do_locked_G_E_adcb();
942 do_locked_G_E_adcw();
943 do_locked_G_E_adcl();
944 do_locked_G_E_adcq();
946 do_locked_G_E_sbbb();
947 do_locked_G_E_sbbw();
948 do_locked_G_E_sbbl();
949 do_locked_G_E_sbbq();
951 do_locked_G_E_andb();
952 do_locked_G_E_andw();
953 do_locked_G_E_andl();
954 do_locked_G_E_andq();
956 do_locked_G_E_subb();
957 do_locked_G_E_subw();
958 do_locked_G_E_subl();
959 do_locked_G_E_subq();
961 do_locked_G_E_xorb();
962 do_locked_G_E_xorw();
963 do_locked_G_E_xorl();
964 do_locked_G_E_xorq();
965 // 4 * 7
967 do_locked_imm_E_addb_0x7F();
968 do_locked_imm_E_addb_0xF1();
969 do_locked_imm_E_addw_0x7E();
970 do_locked_imm_E_addw_0x9325();
971 do_locked_imm_E_addl_0x7D();
972 do_locked_imm_E_addl_0x31415927();
973 do_locked_imm_E_addq_0x7D();
974 do_locked_imm_E_addq_0x31415927();
976 do_locked_imm_E_orb_0x7F();
977 do_locked_imm_E_orb_0xF1();
978 do_locked_imm_E_orw_0x7E();
979 do_locked_imm_E_orw_0x9325();
980 do_locked_imm_E_orl_0x7D();
981 do_locked_imm_E_orl_0x31415927();
982 do_locked_imm_E_orq_0x7D();
983 do_locked_imm_E_orq_0x31415927();
985 do_locked_imm_E_adcb_0x7F();
986 do_locked_imm_E_adcb_0xF1();
987 do_locked_imm_E_adcw_0x7E();
988 do_locked_imm_E_adcw_0x9325();
989 do_locked_imm_E_adcl_0x7D();
990 do_locked_imm_E_adcl_0x31415927();
991 do_locked_imm_E_adcq_0x7D();
992 do_locked_imm_E_adcq_0x31415927();
994 do_locked_imm_E_sbbb_0x7F();
995 do_locked_imm_E_sbbb_0xF1();
996 do_locked_imm_E_sbbw_0x7E();
997 do_locked_imm_E_sbbw_0x9325();
998 do_locked_imm_E_sbbl_0x7D();
999 do_locked_imm_E_sbbl_0x31415927();
1000 do_locked_imm_E_sbbq_0x7D();
1001 do_locked_imm_E_sbbq_0x31415927();
1003 do_locked_imm_E_andb_0x7F();
1004 do_locked_imm_E_andb_0xF1();
1005 do_locked_imm_E_andw_0x7E();
1006 do_locked_imm_E_andw_0x9325();
1007 do_locked_imm_E_andl_0x7D();
1008 do_locked_imm_E_andl_0x31415927();
1009 do_locked_imm_E_andq_0x7D();
1010 do_locked_imm_E_andq_0x31415927();
1012 do_locked_imm_E_subb_0x7F();
1013 do_locked_imm_E_subb_0xF1();
1014 do_locked_imm_E_subw_0x7E();
1015 do_locked_imm_E_subw_0x9325();
1016 do_locked_imm_E_subl_0x7D();
1017 do_locked_imm_E_subl_0x31415927();
1018 do_locked_imm_E_subq_0x7D();
1019 do_locked_imm_E_subq_0x31415927();
1021 do_locked_imm_E_xorb_0x7F();
1022 do_locked_imm_E_xorb_0xF1();
1023 do_locked_imm_E_xorw_0x7E();
1024 do_locked_imm_E_xorw_0x9325();
1025 do_locked_imm_E_xorl_0x7D();
1026 do_locked_imm_E_xorl_0x31415927();
1027 do_locked_imm_E_xorq_0x7D();
1028 do_locked_imm_E_xorq_0x31415927();
1029 // 4 * 7 + 8 * 7 == 84
1031 do_locked_unary_E_decb();
1032 do_locked_unary_E_decw();
1033 do_locked_unary_E_decl();
1034 do_locked_unary_E_decq();
1036 do_locked_unary_E_incb();
1037 do_locked_unary_E_incw();
1038 do_locked_unary_E_incl();
1039 do_locked_unary_E_incq();
1041 do_locked_unary_E_negb();
1042 do_locked_unary_E_negw();
1043 do_locked_unary_E_negl();
1044 do_locked_unary_E_negq();
1046 do_locked_unary_E_notb();
1047 do_locked_unary_E_notw();
1048 do_locked_unary_E_notl();
1049 do_locked_unary_E_notq();
1050 // 100
1052 do_bt_G_E_tests();
1053 // 109
1054 do_bt_imm_E_tests();
1055 // 118
1057 // So there should be 118 lock-prefixed instructions in the
1058 // disassembly of this compilation unit.
1059 // confirm with
1060 // objdump -d ./amd64locked | grep lock | grep -v do_lock | grep -v elf64 | wc
1063 { UInt crcExpd = 0xDF0656F1;
1064 theCRC = crcFinalise( theCRC );
1065 if (theCRC == crcExpd) {
1066 printf("amd64locked: PASS: CRCs actual 0x%08X expected 0x%08X\n",
1067 theCRC, crcExpd);
1068 } else {
1069 printf("amd64locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n",
1070 theCRC, crcExpd);
1071 printf("amd64locked: set #define VERBOSE 1 to diagnose\n");
1075 return 0;