8 typedef unsigned int UInt
;
9 typedef unsigned char UChar
;
10 typedef unsigned long long int ULong
;
11 typedef signed long long int Long
;
12 typedef signed int Int
;
13 typedef unsigned short UShort
;
14 typedef unsigned long UWord
;
18 __attribute__((noinline
))
19 unsigned myrandom(void)
21 /* Simple multiply-with-carry random generator. */
22 static unsigned m_w
= 11;
23 static unsigned m_z
= 13;
25 m_z
= 36969 * (m_z
& 65535) + (m_z
>> 16);
26 m_w
= 18000 * (m_w
& 65535) + (m_w
>> 16);
28 return (m_z
<< 16) + m_w
;
31 /////////////////////////////////////////////////////////////////
32 // BEGIN crc32 stuff //
33 /////////////////////////////////////////////////////////////////
35 static const UInt crc32Table
[256] = {
37 /*-- Ugly, innit? --*/
39 0x00000000L
, 0x04c11db7L
, 0x09823b6eL
, 0x0d4326d9L
,
40 0x130476dcL
, 0x17c56b6bL
, 0x1a864db2L
, 0x1e475005L
,
41 0x2608edb8L
, 0x22c9f00fL
, 0x2f8ad6d6L
, 0x2b4bcb61L
,
42 0x350c9b64L
, 0x31cd86d3L
, 0x3c8ea00aL
, 0x384fbdbdL
,
43 0x4c11db70L
, 0x48d0c6c7L
, 0x4593e01eL
, 0x4152fda9L
,
44 0x5f15adacL
, 0x5bd4b01bL
, 0x569796c2L
, 0x52568b75L
,
45 0x6a1936c8L
, 0x6ed82b7fL
, 0x639b0da6L
, 0x675a1011L
,
46 0x791d4014L
, 0x7ddc5da3L
, 0x709f7b7aL
, 0x745e66cdL
,
47 0x9823b6e0L
, 0x9ce2ab57L
, 0x91a18d8eL
, 0x95609039L
,
48 0x8b27c03cL
, 0x8fe6dd8bL
, 0x82a5fb52L
, 0x8664e6e5L
,
49 0xbe2b5b58L
, 0xbaea46efL
, 0xb7a96036L
, 0xb3687d81L
,
50 0xad2f2d84L
, 0xa9ee3033L
, 0xa4ad16eaL
, 0xa06c0b5dL
,
51 0xd4326d90L
, 0xd0f37027L
, 0xddb056feL
, 0xd9714b49L
,
52 0xc7361b4cL
, 0xc3f706fbL
, 0xceb42022L
, 0xca753d95L
,
53 0xf23a8028L
, 0xf6fb9d9fL
, 0xfbb8bb46L
, 0xff79a6f1L
,
54 0xe13ef6f4L
, 0xe5ffeb43L
, 0xe8bccd9aL
, 0xec7dd02dL
,
55 0x34867077L
, 0x30476dc0L
, 0x3d044b19L
, 0x39c556aeL
,
56 0x278206abL
, 0x23431b1cL
, 0x2e003dc5L
, 0x2ac12072L
,
57 0x128e9dcfL
, 0x164f8078L
, 0x1b0ca6a1L
, 0x1fcdbb16L
,
58 0x018aeb13L
, 0x054bf6a4L
, 0x0808d07dL
, 0x0cc9cdcaL
,
59 0x7897ab07L
, 0x7c56b6b0L
, 0x71159069L
, 0x75d48ddeL
,
60 0x6b93dddbL
, 0x6f52c06cL
, 0x6211e6b5L
, 0x66d0fb02L
,
61 0x5e9f46bfL
, 0x5a5e5b08L
, 0x571d7dd1L
, 0x53dc6066L
,
62 0x4d9b3063L
, 0x495a2dd4L
, 0x44190b0dL
, 0x40d816baL
,
63 0xaca5c697L
, 0xa864db20L
, 0xa527fdf9L
, 0xa1e6e04eL
,
64 0xbfa1b04bL
, 0xbb60adfcL
, 0xb6238b25L
, 0xb2e29692L
,
65 0x8aad2b2fL
, 0x8e6c3698L
, 0x832f1041L
, 0x87ee0df6L
,
66 0x99a95df3L
, 0x9d684044L
, 0x902b669dL
, 0x94ea7b2aL
,
67 0xe0b41de7L
, 0xe4750050L
, 0xe9362689L
, 0xedf73b3eL
,
68 0xf3b06b3bL
, 0xf771768cL
, 0xfa325055L
, 0xfef34de2L
,
69 0xc6bcf05fL
, 0xc27dede8L
, 0xcf3ecb31L
, 0xcbffd686L
,
70 0xd5b88683L
, 0xd1799b34L
, 0xdc3abdedL
, 0xd8fba05aL
,
71 0x690ce0eeL
, 0x6dcdfd59L
, 0x608edb80L
, 0x644fc637L
,
72 0x7a089632L
, 0x7ec98b85L
, 0x738aad5cL
, 0x774bb0ebL
,
73 0x4f040d56L
, 0x4bc510e1L
, 0x46863638L
, 0x42472b8fL
,
74 0x5c007b8aL
, 0x58c1663dL
, 0x558240e4L
, 0x51435d53L
,
75 0x251d3b9eL
, 0x21dc2629L
, 0x2c9f00f0L
, 0x285e1d47L
,
76 0x36194d42L
, 0x32d850f5L
, 0x3f9b762cL
, 0x3b5a6b9bL
,
77 0x0315d626L
, 0x07d4cb91L
, 0x0a97ed48L
, 0x0e56f0ffL
,
78 0x1011a0faL
, 0x14d0bd4dL
, 0x19939b94L
, 0x1d528623L
,
79 0xf12f560eL
, 0xf5ee4bb9L
, 0xf8ad6d60L
, 0xfc6c70d7L
,
80 0xe22b20d2L
, 0xe6ea3d65L
, 0xeba91bbcL
, 0xef68060bL
,
81 0xd727bbb6L
, 0xd3e6a601L
, 0xdea580d8L
, 0xda649d6fL
,
82 0xc423cd6aL
, 0xc0e2d0ddL
, 0xcda1f604L
, 0xc960ebb3L
,
83 0xbd3e8d7eL
, 0xb9ff90c9L
, 0xb4bcb610L
, 0xb07daba7L
,
84 0xae3afba2L
, 0xaafbe615L
, 0xa7b8c0ccL
, 0xa379dd7bL
,
85 0x9b3660c6L
, 0x9ff77d71L
, 0x92b45ba8L
, 0x9675461fL
,
86 0x8832161aL
, 0x8cf30badL
, 0x81b02d74L
, 0x857130c3L
,
87 0x5d8a9099L
, 0x594b8d2eL
, 0x5408abf7L
, 0x50c9b640L
,
88 0x4e8ee645L
, 0x4a4ffbf2L
, 0x470cdd2bL
, 0x43cdc09cL
,
89 0x7b827d21L
, 0x7f436096L
, 0x7200464fL
, 0x76c15bf8L
,
90 0x68860bfdL
, 0x6c47164aL
, 0x61043093L
, 0x65c52d24L
,
91 0x119b4be9L
, 0x155a565eL
, 0x18197087L
, 0x1cd86d30L
,
92 0x029f3d35L
, 0x065e2082L
, 0x0b1d065bL
, 0x0fdc1becL
,
93 0x3793a651L
, 0x3352bbe6L
, 0x3e119d3fL
, 0x3ad08088L
,
94 0x2497d08dL
, 0x2056cd3aL
, 0x2d15ebe3L
, 0x29d4f654L
,
95 0xc5a92679L
, 0xc1683bceL
, 0xcc2b1d17L
, 0xc8ea00a0L
,
96 0xd6ad50a5L
, 0xd26c4d12L
, 0xdf2f6bcbL
, 0xdbee767cL
,
97 0xe3a1cbc1L
, 0xe760d676L
, 0xea23f0afL
, 0xeee2ed18L
,
98 0xf0a5bd1dL
, 0xf464a0aaL
, 0xf9278673L
, 0xfde69bc4L
,
99 0x89b8fd09L
, 0x8d79e0beL
, 0x803ac667L
, 0x84fbdbd0L
,
100 0x9abc8bd5L
, 0x9e7d9662L
, 0x933eb0bbL
, 0x97ffad0cL
,
101 0xafb010b1L
, 0xab710d06L
, 0xa6322bdfL
, 0xa2f33668L
,
102 0xbcb4666dL
, 0xb8757bdaL
, 0xb5365d03L
, 0xb1f740b4L
105 #define UPDATE_CRC(crcVar,cha) \
107 crcVar = (crcVar << 8) ^ \
108 crc32Table[(crcVar >> 24) ^ \
112 static UInt
crcBytes ( UChar
* bytes
, UWord nBytes
, UInt crcIn
)
115 while (nBytes
>= 4) {
116 UPDATE_CRC(crc
, bytes
[0]);
117 UPDATE_CRC(crc
, bytes
[1]);
118 UPDATE_CRC(crc
, bytes
[2]);
119 UPDATE_CRC(crc
, bytes
[3]);
123 while (nBytes
>= 1) {
124 UPDATE_CRC(crc
, bytes
[0]);
131 static UInt
crcFinalise ( UInt crc
) {
137 static UInt theCRC
= 0xFFFFFFFF;
139 static HChar outBuf
[1024];
140 // take output that's in outBuf, length as specified, and
141 // update the running crc.
142 static void send ( int nbytes
)
144 assert( ((unsigned int)nbytes
) < sizeof(outBuf
)-1);
145 assert(outBuf
[nbytes
] == 0);
146 theCRC
= crcBytes( (UChar
*)&outBuf
[0], nbytes
, theCRC
);
147 if (VERBOSE
) printf("SEND %08x %s", theCRC
, outBuf
);
151 /////////////////////////////////////////////////////////////////
152 // END crc32 stuff //
153 /////////////////////////////////////////////////////////////////
160 static ULong val
[NVALS
]
161 = { 0x00ULL
, 0x01ULL
, 0x02ULL
, 0x03ULL
,
162 0x3FULL
, 0x40ULL
, 0x41ULL
,
163 0x7EULL
, 0x7FULL
, 0x80ULL
, 0x81ULL
, 0x82ULL
,
164 0xBFULL
, 0xC0ULL
, 0xC1ULL
,
165 0xFCULL
, 0xFDULL
, 0xFEULL
, 0xFFULL
,
167 0xFF00ULL
, 0xFF01ULL
, 0xFF02ULL
, 0xFF03ULL
,
168 0xFF3FULL
, 0xFF40ULL
, 0xFF41ULL
,
169 0xFF7EULL
, 0xFF7FULL
, 0xFF80ULL
, 0xFF81ULL
, 0xFF82ULL
,
170 0xFFBFULL
, 0xFFC0ULL
, 0xFFC1ULL
,
171 0xFFFCULL
, 0xFFFDULL
, 0xFFFEULL
, 0xFFFFULL
,
173 0xFFFFFF00ULL
, 0xFFFFFF01ULL
, 0xFFFFFF02ULL
, 0xFFFFFF03ULL
,
174 0xFFFFFF3FULL
, 0xFFFFFF40ULL
, 0xFFFFFF41ULL
,
175 0xFFFFFF7EULL
, 0xFFFFFF7FULL
, 0xFFFFFF80ULL
, 0xFFFFFF81ULL
, 0xFFFFFF82ULL
,
176 0xFFFFFFBFULL
, 0xFFFFFFC0ULL
, 0xFFFFFFC1ULL
,
177 0xFFFFFFFCULL
, 0xFFFFFFFDULL
, 0xFFFFFFFEULL
, 0xFFFFFFFFULL
,
179 0xFFFFFFFFFFFFFF00ULL
, 0xFFFFFFFFFFFFFF01ULL
, 0xFFFFFFFFFFFFFF02ULL
,
180 0xFFFFFFFFFFFFFF03ULL
,
181 0xFFFFFFFFFFFFFF3FULL
, 0xFFFFFFFFFFFFFF40ULL
, 0xFFFFFFFFFFFFFF41ULL
,
182 0xFFFFFFFFFFFFFF7EULL
, 0xFFFFFFFFFFFFFF7FULL
, 0xFFFFFFFFFFFFFF80ULL
,
183 0xFFFFFFFFFFFFFF81ULL
, 0xFFFFFFFFFFFFFF82ULL
,
184 0xFFFFFFFFFFFFFFBFULL
, 0xFFFFFFFFFFFFFFC0ULL
, 0xFFFFFFFFFFFFFFC1ULL
,
185 0xFFFFFFFFFFFFFFFCULL
, 0xFFFFFFFFFFFFFFFDULL
, 0xFFFFFFFFFFFFFFFEULL
,
186 0xFFFFFFFFFFFFFFFFULL
191 // shortened version, for use as valgrind regtest
194 static ULong val
[NVALS
]
195 = { 0x00ULL
, 0x01ULL
,
201 0xFF00ULL
, 0xFF01ULL
,
202 0xFF3FULL
, 0xFF40ULL
,
203 0xFF7FULL
, 0xFF80ULL
,
204 0xFFBFULL
, 0xFFC0ULL
,
207 0xFFFFFF00ULL
, 0xFFFFFF01ULL
,
208 0xFFFFFF3FULL
, 0xFFFFFF40ULL
,
209 0xFFFFFF7EULL
, 0xFFFFFF7FULL
,
210 0xFFFFFFBFULL
, 0xFFFFFFC0ULL
,
213 0xFFFFFFFFFFFFFF00ULL
, 0xFFFFFFFFFFFFFF01ULL
,
214 0xFFFFFFFFFFFFFF3FULL
, 0xFFFFFFFFFFFFFF40ULL
,
215 0xFFFFFFFFFFFFFF7FULL
, 0xFFFFFFFFFFFFFF80ULL
,
216 0xFFFFFFFFFFFFFFBFULL
, 0xFFFFFFFFFFFFFFC0ULL
,
217 0xFFFFFFFFFFFFFFFFULL
222 /////////////////////////////////////
231 #define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O)
233 #define GEN_do_locked_G_E(_name,_eax) \
235 __attribute__((noinline)) void do_locked_G_E_##_name ( void ) \
237 volatile Long e_val, g_val, e_val_before; \
238 Long o, s, z, a, c, p, v1, v2, flags_in; \
241 for (v1 = 0; v1 < NVALS; v1++) { \
242 for (v2 = 0; v2 < NVALS; v2++) { \
244 for (o = 0; o < 2; o++) { \
245 for (s = 0; s < 2; s++) { \
246 for (z = 0; z < 2; z++) { \
247 for (a = 0; a < 2; a++) { \
248 for (c = 0; c < 2; c++) { \
249 for (p = 0; p < 2; p++) { \
251 flags_in = (o ? CC_O : 0) \
260 e_val_before = e_val; \
262 block[0] = flags_in; \
264 block[2] = (long)&e_val; \
266 __asm__ __volatile__( \
267 "movq 0(%0), %%rax\n\t" \
270 "movq 8(%0), %%rax\n\t" \
271 "movq 16(%0), %%rbx\n\t" \
272 "lock; " #_name " %%" #_eax ",(%%rbx)\n\t" \
275 "movq %%rax, 24(%0)\n\t" \
276 : : "r"(&block[0]) : "rax","rbx","cc","memory" \
281 "%s G=%016llx E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \
282 #_name, g_val, e_val_before, flags_in, \
283 e_val, block[3] & CC_MASK)); \
290 GEN_do_locked_G_E(addb
,al
)
291 GEN_do_locked_G_E(addw
,ax
)
292 GEN_do_locked_G_E(addl
,eax
)
293 GEN_do_locked_G_E(addq
,rax
)
295 GEN_do_locked_G_E(orb
, al
)
296 GEN_do_locked_G_E(orw
, ax
)
297 GEN_do_locked_G_E(orl
, eax
)
298 GEN_do_locked_G_E(orq
, rax
)
300 GEN_do_locked_G_E(adcb
,al
)
301 GEN_do_locked_G_E(adcw
,ax
)
302 GEN_do_locked_G_E(adcl
,eax
)
303 GEN_do_locked_G_E(adcq
,rax
)
305 GEN_do_locked_G_E(sbbb
,al
)
306 GEN_do_locked_G_E(sbbw
,ax
)
307 GEN_do_locked_G_E(sbbl
,eax
)
308 GEN_do_locked_G_E(sbbq
,rax
)
310 GEN_do_locked_G_E(andb
,al
)
311 GEN_do_locked_G_E(andw
,ax
)
312 GEN_do_locked_G_E(andl
,eax
)
313 GEN_do_locked_G_E(andq
,rax
)
315 GEN_do_locked_G_E(subb
,al
)
316 GEN_do_locked_G_E(subw
,ax
)
317 GEN_do_locked_G_E(subl
,eax
)
318 GEN_do_locked_G_E(subq
,rax
)
320 GEN_do_locked_G_E(xorb
,al
)
321 GEN_do_locked_G_E(xorw
,ax
)
322 GEN_do_locked_G_E(xorl
,eax
)
323 GEN_do_locked_G_E(xorq
,rax
)
328 #define GEN_do_locked_imm_E(_name,_eax,_imm) \
330 __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void ) \
332 volatile Long e_val, e_val_before; \
333 Long o, s, z, a, c, p, v2, flags_in; \
336 for (v2 = 0; v2 < NVALS; v2++) { \
338 for (o = 0; o < 2; o++) { \
339 for (s = 0; s < 2; s++) { \
340 for (z = 0; z < 2; z++) { \
341 for (a = 0; a < 2; a++) { \
342 for (c = 0; c < 2; c++) { \
343 for (p = 0; p < 2; p++) { \
345 flags_in = (o ? CC_O : 0) \
353 e_val_before = e_val; \
355 block[0] = flags_in; \
356 block[1] = (long)&e_val; \
358 __asm__ __volatile__( \
359 "movq 0(%0), %%rax\n\t" \
362 "movq 8(%0), %%rbx\n\t" \
363 "lock; " #_name " $" #_imm ",(%%rbx)\n\t" \
366 "movq %%rax, 16(%0)\n\t" \
367 : : "r"(&block[0]) : "rax","rbx","cc","memory" \
372 "%s I=%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \
373 #_name, #_imm, e_val_before, flags_in, \
374 e_val, block[2] & CC_MASK)); \
381 GEN_do_locked_imm_E(addb
,al
,0x7F)
382 GEN_do_locked_imm_E(addb
,al
,0xF1)
383 GEN_do_locked_imm_E(addw
,ax
,0x7E)
384 GEN_do_locked_imm_E(addw
,ax
,0x9325)
385 GEN_do_locked_imm_E(addl
,eax
,0x7D)
386 GEN_do_locked_imm_E(addl
,eax
,0x31415927)
387 GEN_do_locked_imm_E(addq
,rax
,0x7D)
388 GEN_do_locked_imm_E(addq
,rax
,0x31415927)
390 GEN_do_locked_imm_E(orb
,al
,0x7F)
391 GEN_do_locked_imm_E(orb
,al
,0xF1)
392 GEN_do_locked_imm_E(orw
,ax
,0x7E)
393 GEN_do_locked_imm_E(orw
,ax
,0x9325)
394 GEN_do_locked_imm_E(orl
,eax
,0x7D)
395 GEN_do_locked_imm_E(orl
,eax
,0x31415927)
396 GEN_do_locked_imm_E(orq
,rax
,0x7D)
397 GEN_do_locked_imm_E(orq
,rax
,0x31415927)
399 GEN_do_locked_imm_E(adcb
,al
,0x7F)
400 GEN_do_locked_imm_E(adcb
,al
,0xF1)
401 GEN_do_locked_imm_E(adcw
,ax
,0x7E)
402 GEN_do_locked_imm_E(adcw
,ax
,0x9325)
403 GEN_do_locked_imm_E(adcl
,eax
,0x7D)
404 GEN_do_locked_imm_E(adcl
,eax
,0x31415927)
405 GEN_do_locked_imm_E(adcq
,rax
,0x7D)
406 GEN_do_locked_imm_E(adcq
,rax
,0x31415927)
408 GEN_do_locked_imm_E(sbbb
,al
,0x7F)
409 GEN_do_locked_imm_E(sbbb
,al
,0xF1)
410 GEN_do_locked_imm_E(sbbw
,ax
,0x7E)
411 GEN_do_locked_imm_E(sbbw
,ax
,0x9325)
412 GEN_do_locked_imm_E(sbbl
,eax
,0x7D)
413 GEN_do_locked_imm_E(sbbl
,eax
,0x31415927)
414 GEN_do_locked_imm_E(sbbq
,rax
,0x7D)
415 GEN_do_locked_imm_E(sbbq
,rax
,0x31415927)
417 GEN_do_locked_imm_E(andb
,al
,0x7F)
418 GEN_do_locked_imm_E(andb
,al
,0xF1)
419 GEN_do_locked_imm_E(andw
,ax
,0x7E)
420 GEN_do_locked_imm_E(andw
,ax
,0x9325)
421 GEN_do_locked_imm_E(andl
,eax
,0x7D)
422 GEN_do_locked_imm_E(andl
,eax
,0x31415927)
423 GEN_do_locked_imm_E(andq
,rax
,0x7D)
424 GEN_do_locked_imm_E(andq
,rax
,0x31415927)
426 GEN_do_locked_imm_E(subb
,al
,0x7F)
427 GEN_do_locked_imm_E(subb
,al
,0xF1)
428 GEN_do_locked_imm_E(subw
,ax
,0x7E)
429 GEN_do_locked_imm_E(subw
,ax
,0x9325)
430 GEN_do_locked_imm_E(subl
,eax
,0x7D)
431 GEN_do_locked_imm_E(subl
,eax
,0x31415927)
432 GEN_do_locked_imm_E(subq
,rax
,0x7D)
433 GEN_do_locked_imm_E(subq
,rax
,0x31415927)
435 GEN_do_locked_imm_E(xorb
,al
,0x7F)
436 GEN_do_locked_imm_E(xorb
,al
,0xF1)
437 GEN_do_locked_imm_E(xorw
,ax
,0x7E)
438 GEN_do_locked_imm_E(xorw
,ax
,0x9325)
439 GEN_do_locked_imm_E(xorl
,eax
,0x7D)
440 GEN_do_locked_imm_E(xorl
,eax
,0x31415927)
441 GEN_do_locked_imm_E(xorq
,rax
,0x7D)
442 GEN_do_locked_imm_E(xorq
,rax
,0x31415927)
444 #define GEN_do_locked_unary_E(_name,_eax) \
446 __attribute__((noinline)) void do_locked_unary_E_##_name ( void ) \
448 volatile Long e_val, e_val_before; \
449 Long o, s, z, a, c, p, v2, flags_in; \
452 for (v2 = 0; v2 < NVALS; v2++) { \
454 for (o = 0; o < 2; o++) { \
455 for (s = 0; s < 2; s++) { \
456 for (z = 0; z < 2; z++) { \
457 for (a = 0; a < 2; a++) { \
458 for (c = 0; c < 2; c++) { \
459 for (p = 0; p < 2; p++) { \
461 flags_in = (o ? CC_O : 0) \
469 e_val_before = e_val; \
471 block[0] = flags_in; \
472 block[1] = (long)&e_val; \
474 __asm__ __volatile__( \
475 "movq 0(%0), %%rax\n\t" \
478 "movq 8(%0), %%rbx\n\t" \
479 "lock; " #_name " (%%rbx)\n\t" \
482 "movq %%rax, 16(%0)\n\t" \
483 : : "r"(&block[0]) : "rax","rbx","cc","memory" \
488 "%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \
489 #_name, e_val_before, flags_in, \
490 e_val, block[2] & CC_MASK)); \
497 GEN_do_locked_unary_E(decb
,al
)
498 GEN_do_locked_unary_E(decw
,ax
)
499 GEN_do_locked_unary_E(decl
,eax
)
500 GEN_do_locked_unary_E(decq
,rax
)
502 GEN_do_locked_unary_E(incb
,al
)
503 GEN_do_locked_unary_E(incw
,ax
)
504 GEN_do_locked_unary_E(incl
,eax
)
505 GEN_do_locked_unary_E(incq
,rax
)
507 GEN_do_locked_unary_E(negb
,al
)
508 GEN_do_locked_unary_E(negw
,ax
)
509 GEN_do_locked_unary_E(negl
,eax
)
510 GEN_do_locked_unary_E(negq
,rax
)
512 GEN_do_locked_unary_E(notb
,al
)
513 GEN_do_locked_unary_E(notw
,ax
)
514 GEN_do_locked_unary_E(notl
,eax
)
515 GEN_do_locked_unary_E(notq
,rax
)
518 /////////////////////////////////////////////////////////////////
520 ULong
btsq_mem ( UChar
* base
, int bitno
)
524 __volatile__("lock; btsq\t%2, %0\n\t"
527 : "=m" (*base
), "=r" (res
)
528 : "r" ((ULong
)bitno
) : "rdx","cc","memory" );
529 /* Pretty meaningless to dereference base here, but that's what you
530 have to do to get a btsl insn which refers to memory starting at
534 ULong
btsl_mem ( UChar
* base
, int bitno
)
538 __volatile__("lock; btsl\t%2, %0\n\t"
541 : "=m" (*base
), "=r" (res
)
542 : "r" ((UInt
)bitno
));
545 ULong
btsw_mem ( UChar
* base
, int bitno
)
549 __volatile__("lock; btsw\t%w2, %0\n\t"
552 : "=m" (*base
), "=r" (res
)
553 : "r" ((ULong
)bitno
));
557 ULong
btrq_mem ( UChar
* base
, int bitno
)
561 __volatile__("lock; btrq\t%2, %0\n\t"
564 : "=m" (*base
), "=r" (res
)
565 : "r" ((ULong
)bitno
));
568 ULong
btrl_mem ( UChar
* base
, int bitno
)
572 __volatile__("lock; btrl\t%2, %0\n\t"
575 : "=m" (*base
), "=r" (res
)
576 : "r" ((UInt
)bitno
));
579 ULong
btrw_mem ( UChar
* base
, int bitno
)
583 __volatile__("lock; btrw\t%w2, %0\n\t"
586 : "=m" (*base
), "=r" (res
)
587 : "r" ((ULong
)bitno
));
591 ULong
btcq_mem ( UChar
* base
, int bitno
)
595 __volatile__("lock; btcq\t%2, %0\n\t"
598 : "=m" (*base
), "=r" (res
)
599 : "r" ((ULong
)bitno
));
602 ULong
btcl_mem ( UChar
* base
, int bitno
)
606 __volatile__("lock; btcl\t%2, %0\n\t"
609 : "=m" (*base
), "=r" (res
)
610 : "r" ((UInt
)bitno
));
613 ULong
btcw_mem ( UChar
* base
, int bitno
)
617 __volatile__("lock; btcw\t%w2, %0\n\t"
620 : "=m" (*base
), "=r" (res
)
621 : "r" ((ULong
)bitno
));
625 ULong
btq_mem ( UChar
* base
, int bitno
)
629 __volatile__("btq\t%2, %0\n\t"
632 : "=m" (*base
), "=r" (res
)
637 ULong
btl_mem ( UChar
* base
, int bitno
)
641 __volatile__("btl\t%2, %0\n\t"
644 : "=m" (*base
), "=r" (res
)
649 ULong
btw_mem ( UChar
* base
, int bitno
)
653 __volatile__("btw\t%w2, %0\n\t"
656 : "=m" (*base
), "=r" (res
)
657 : "r" ((ULong
)bitno
));
661 ULong
rol1 ( ULong x
)
663 return (x
<< 1) | (x
>> 63);
666 void do_bt_G_E_tests ( void )
671 ULong carrydep
, res
;;
673 /*------------------------ MEM-Q -----------------------*/
676 block
= calloc(200,1);
678 /* Valid bit offsets are -800 .. 799 inclusive. */
680 for (n
= 0; n
< 10000; n
++) {
681 bitoff
= (myrandom() % 1600) - 800;
685 case 0: c
= btsq_mem(block
, bitoff
); break;
686 case 1: c
= btrq_mem(block
, bitoff
); break;
687 case 2: c
= btcq_mem(block
, bitoff
); break;
688 case 3: c
= btq_mem(block
, bitoff
); break;
691 assert(c
== 0 || c
== 1);
692 carrydep
= c
? (rol1(carrydep
) ^ (Long
)bitoff
) : carrydep
;
695 /* Compute final result */
698 for (n
= 0; n
< 200; n
++) {
700 /* printf("%d ", (int)block[n]); */
701 res
= rol1(res
) ^ (ULong
)ch
;
704 send( sprintf(outBuf
,
705 "bt{s,r,c}q: final res 0x%llx, carrydep 0x%llx\n",
709 /*------------------------ MEM-L -----------------------*/
712 block
= calloc(200,1);
714 /* Valid bit offsets are -800 .. 799 inclusive. */
716 for (n
= 0; n
< 10000; n
++) {
717 bitoff
= (myrandom() % 1600) - 800;
721 case 0: c
= btsl_mem(block
, bitoff
); break;
722 case 1: c
= btrl_mem(block
, bitoff
); break;
723 case 2: c
= btcl_mem(block
, bitoff
); break;
724 case 3: c
= btl_mem(block
, bitoff
); break;
727 assert(c
== 0 || c
== 1);
728 carrydep
= c
? (rol1(carrydep
) ^ (Long
)(Int
)bitoff
) : carrydep
;
731 /* Compute final result */
734 for (n
= 0; n
< 200; n
++) {
736 /* printf("%d ", (int)block[n]); */
737 res
= rol1(res
) ^ (ULong
)ch
;
740 send( sprintf(outBuf
,
741 "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n",
745 /*------------------------ MEM-W -----------------------*/
748 block
= calloc(200,1);
750 /* Valid bit offsets are -800 .. 799 inclusive. */
752 for (n
= 0; n
< 10000; n
++) {
753 bitoff
= (myrandom() % 1600) - 800;
757 case 0: c
= btsw_mem(block
, bitoff
); break;
758 case 1: c
= btrw_mem(block
, bitoff
); break;
759 case 2: c
= btcw_mem(block
, bitoff
); break;
760 case 3: c
= btw_mem(block
, bitoff
); break;
763 assert(c
== 0 || c
== 1);
764 carrydep
= c
? (rol1(carrydep
) ^ (Long
)(Int
)bitoff
) : carrydep
;
767 /* Compute final result */
770 for (n
= 0; n
< 200; n
++) {
772 /* printf("%d ", (int)block[n]); */
773 res
= rol1(res
) ^ (ULong
)ch
;
777 "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n",
783 /////////////////////////////////////////////////////////////////
785 /* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and
786 also reconstruct the original bits 0, 1, 2, 3 by looking at the
787 carry flag. Returned result has mashed bits 0-3 at the bottom and
788 the reconstructed original bits 0-3 as 4-7. */
790 ULong
mash_mem_Q ( ULong
* origp
)
792 ULong reconstructed
, mashed
;
793 __asm__
__volatile__ (
798 "btq $0, (%%rdx)\n\t"
800 "movzbq %%cl, %%rcx\n\t"
801 "orq %%rcx, %%rax\n\t"
803 "lock; btsq $1, (%%rdx)\n\t"
805 "movzbq %%cl, %%rcx\n\t"
807 "orq %%rcx, %%rax\n\t"
809 "lock; btrq $2, (%%rdx)\n\t"
811 "movzbq %%cl, %%rcx\n\t"
813 "orq %%rcx, %%rax\n\t"
815 "lock; btcq $3, (%%rdx)\n\t"
817 "movzbq %%cl, %%rcx\n\t"
819 "orq %%rcx, %%rax\n\t"
823 : "=r" (reconstructed
), "=r" (mashed
)
825 : "rax", "rcx", "rdx", "cc");
826 return (mashed
& 0xF) | ((reconstructed
& 0xF) << 4);
829 ULong
mash_mem_L ( UInt
* origp
)
831 ULong reconstructed
; UInt mashed
;
832 __asm__
__volatile__ (
837 "btl $0, (%%rdx)\n\t"
839 "movzbq %%cl, %%rcx\n\t"
840 "orq %%rcx, %%rax\n\t"
842 "lock; btsl $1, (%%rdx)\n\t"
844 "movzbq %%cl, %%rcx\n\t"
846 "orq %%rcx, %%rax\n\t"
848 "lock; btrl $2, (%%rdx)\n\t"
850 "movzbq %%cl, %%rcx\n\t"
852 "orq %%rcx, %%rax\n\t"
854 "lock; btcl $3, (%%rdx)\n\t"
856 "movzbq %%cl, %%rcx\n\t"
858 "orq %%rcx, %%rax\n\t"
862 : "=r" (reconstructed
), "=r" (mashed
)
864 : "rax", "rcx", "rdx", "cc");
865 return (mashed
& 0xF) | ((reconstructed
& 0xF) << 4);
868 ULong
mash_mem_W ( UShort
* origp
)
870 ULong reconstructed
, mashed
;
871 __asm__
__volatile__ (
876 "btw $0, (%%rdx)\n\t"
878 "movzbq %%cl, %%rcx\n\t"
879 "orq %%rcx, %%rax\n\t"
881 "lock; btsw $1, (%%rdx)\n\t"
883 "movzbq %%cl, %%rcx\n\t"
885 "orq %%rcx, %%rax\n\t"
887 "lock; btrw $2, (%%rdx)\n\t"
889 "movzbq %%cl, %%rcx\n\t"
891 "orq %%rcx, %%rax\n\t"
893 "lock; btcw $3, (%%rdx)\n\t"
895 "movzbq %%cl, %%rcx\n\t"
897 "orq %%rcx, %%rax\n\t"
901 : "=r" (reconstructed
), "=r" (mashed
)
903 : "rax", "rcx", "rdx", "cc");
904 return (mashed
& 0xF) | ((reconstructed
& 0xF) << 4);
908 void do_bt_imm_E_tests( void )
911 ULong
* iiq
= malloc(sizeof(ULong
));
912 UInt
* iil
= malloc(sizeof(UInt
));
913 UShort
* iiw
= malloc(sizeof(UShort
));
914 for (i
= 0; i
< 0x10; i
++) {
918 send(sprintf(outBuf
,"0x%llx -> 0x%02llx 0x%02llx 0x%02llx\n", i
,
919 mash_mem_Q(iiq
), mash_mem_L(iil
), mash_mem_W(iiw
)));
927 /////////////////////////////////////////////////////////////////
931 do_locked_G_E_addb();
932 do_locked_G_E_addw();
933 do_locked_G_E_addl();
934 do_locked_G_E_addq();
941 do_locked_G_E_adcb();
942 do_locked_G_E_adcw();
943 do_locked_G_E_adcl();
944 do_locked_G_E_adcq();
946 do_locked_G_E_sbbb();
947 do_locked_G_E_sbbw();
948 do_locked_G_E_sbbl();
949 do_locked_G_E_sbbq();
951 do_locked_G_E_andb();
952 do_locked_G_E_andw();
953 do_locked_G_E_andl();
954 do_locked_G_E_andq();
956 do_locked_G_E_subb();
957 do_locked_G_E_subw();
958 do_locked_G_E_subl();
959 do_locked_G_E_subq();
961 do_locked_G_E_xorb();
962 do_locked_G_E_xorw();
963 do_locked_G_E_xorl();
964 do_locked_G_E_xorq();
967 do_locked_imm_E_addb_0x7F();
968 do_locked_imm_E_addb_0xF1();
969 do_locked_imm_E_addw_0x7E();
970 do_locked_imm_E_addw_0x9325();
971 do_locked_imm_E_addl_0x7D();
972 do_locked_imm_E_addl_0x31415927();
973 do_locked_imm_E_addq_0x7D();
974 do_locked_imm_E_addq_0x31415927();
976 do_locked_imm_E_orb_0x7F();
977 do_locked_imm_E_orb_0xF1();
978 do_locked_imm_E_orw_0x7E();
979 do_locked_imm_E_orw_0x9325();
980 do_locked_imm_E_orl_0x7D();
981 do_locked_imm_E_orl_0x31415927();
982 do_locked_imm_E_orq_0x7D();
983 do_locked_imm_E_orq_0x31415927();
985 do_locked_imm_E_adcb_0x7F();
986 do_locked_imm_E_adcb_0xF1();
987 do_locked_imm_E_adcw_0x7E();
988 do_locked_imm_E_adcw_0x9325();
989 do_locked_imm_E_adcl_0x7D();
990 do_locked_imm_E_adcl_0x31415927();
991 do_locked_imm_E_adcq_0x7D();
992 do_locked_imm_E_adcq_0x31415927();
994 do_locked_imm_E_sbbb_0x7F();
995 do_locked_imm_E_sbbb_0xF1();
996 do_locked_imm_E_sbbw_0x7E();
997 do_locked_imm_E_sbbw_0x9325();
998 do_locked_imm_E_sbbl_0x7D();
999 do_locked_imm_E_sbbl_0x31415927();
1000 do_locked_imm_E_sbbq_0x7D();
1001 do_locked_imm_E_sbbq_0x31415927();
1003 do_locked_imm_E_andb_0x7F();
1004 do_locked_imm_E_andb_0xF1();
1005 do_locked_imm_E_andw_0x7E();
1006 do_locked_imm_E_andw_0x9325();
1007 do_locked_imm_E_andl_0x7D();
1008 do_locked_imm_E_andl_0x31415927();
1009 do_locked_imm_E_andq_0x7D();
1010 do_locked_imm_E_andq_0x31415927();
1012 do_locked_imm_E_subb_0x7F();
1013 do_locked_imm_E_subb_0xF1();
1014 do_locked_imm_E_subw_0x7E();
1015 do_locked_imm_E_subw_0x9325();
1016 do_locked_imm_E_subl_0x7D();
1017 do_locked_imm_E_subl_0x31415927();
1018 do_locked_imm_E_subq_0x7D();
1019 do_locked_imm_E_subq_0x31415927();
1021 do_locked_imm_E_xorb_0x7F();
1022 do_locked_imm_E_xorb_0xF1();
1023 do_locked_imm_E_xorw_0x7E();
1024 do_locked_imm_E_xorw_0x9325();
1025 do_locked_imm_E_xorl_0x7D();
1026 do_locked_imm_E_xorl_0x31415927();
1027 do_locked_imm_E_xorq_0x7D();
1028 do_locked_imm_E_xorq_0x31415927();
1029 // 4 * 7 + 8 * 7 == 84
1031 do_locked_unary_E_decb();
1032 do_locked_unary_E_decw();
1033 do_locked_unary_E_decl();
1034 do_locked_unary_E_decq();
1036 do_locked_unary_E_incb();
1037 do_locked_unary_E_incw();
1038 do_locked_unary_E_incl();
1039 do_locked_unary_E_incq();
1041 do_locked_unary_E_negb();
1042 do_locked_unary_E_negw();
1043 do_locked_unary_E_negl();
1044 do_locked_unary_E_negq();
1046 do_locked_unary_E_notb();
1047 do_locked_unary_E_notw();
1048 do_locked_unary_E_notl();
1049 do_locked_unary_E_notq();
1054 do_bt_imm_E_tests();
1057 // So there should be 118 lock-prefixed instructions in the
1058 // disassembly of this compilation unit.
1060 // objdump -d ./amd64locked | grep lock | grep -v do_lock | grep -v elf64 | wc
1063 { UInt crcExpd
= 0xDF0656F1;
1064 theCRC
= crcFinalise( theCRC
);
1065 if (theCRC
== crcExpd
) {
1066 printf("amd64locked: PASS: CRCs actual 0x%08X expected 0x%08X\n",
1069 printf("amd64locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n",
1071 printf("amd64locked: set #define VERBOSE 1 to diagnose\n");