regtest: broaden none/tests/linux/bug498317 suppression for PPC
[valgrind.git] / none / tests / amd64 / fb_test_amd64.c
blob3c38b1384ec6955368cbd28188d6fcf3edee3ddd
2 /* Contrary to what the next comment says, this is now an amd64 CPU
3 test. */
5 /*
6 * x86 CPU test
7 *
8 * Copyright (c) 2003 Fabrice Bellard
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, see <http://www.gnu.org/licenses/>.
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <inttypes.h>
28 #include <math.h>
29 #include <stdarg.h>
30 #include <assert.h>
33 //////////////////////////////////////////////////////////////////
34 //////////////////////////////////////////////////////////////////
37 * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
38 * MD5 Message-Digest Algorithm (RFC 1321).
40 * Homepage:
41 * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
43 * Author:
44 * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
46 * This software was written by Alexander Peslyak in 2001. No copyright is
47 * claimed, and the software is hereby placed in the public domain.
48 * In case this attempt to disclaim copyright and place the software in the
49 * public domain is deemed null and void, then the software is
50 * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
51 * general public under the following terms:
53 * Redistribution and use in source and binary forms, with or without
54 * modification, are permitted.
56 * There's ABSOLUTELY NO WARRANTY, express or implied.
58 * (This is a heavily cut-down "BSD license".)
60 * This differs from Colin Plumb's older public domain implementation in that
61 * no exactly 32-bit integer data type is required (any 32-bit or wider
62 * unsigned integer data type will do), there's no compile-time endianness
63 * configuration, and the function prototypes match OpenSSL's. No code from
64 * Colin Plumb's implementation has been reused; this comment merely compares
65 * the properties of the two independent implementations.
67 * The primary goals of this implementation are portability and ease of use.
68 * It is meant to be fast, but not as fast as possible. Some known
69 * optimizations are not included to reduce source code size and avoid
70 * compile-time configuration.
73 #include <string.h>
75 // BEGIN #include "md5.h"
76 /* Any 32-bit or wider unsigned integer data type will do */
77 typedef unsigned int MD5_u32plus;
79 typedef struct {
80 MD5_u32plus lo, hi;
81 MD5_u32plus a, b, c, d;
82 unsigned char buffer[64];
83 MD5_u32plus block[16];
84 } MD5_CTX;
86 void MD5_Init(MD5_CTX *ctx);
87 void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size);
88 void MD5_Final(unsigned char *result, MD5_CTX *ctx);
89 // END #include "md5.h"
92 * The basic MD5 functions.
94 * F and G are optimized compared to their RFC 1321 definitions for
95 * architectures that lack an AND-NOT instruction, just like in Colin Plumb's
96 * implementation.
98 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
99 #define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
100 #define H(x, y, z) (((x) ^ (y)) ^ (z))
101 #define H2(x, y, z) ((x) ^ ((y) ^ (z)))
102 #define I(x, y, z) ((y) ^ ((x) | ~(z)))
105 * The MD5 transformation for all four rounds.
107 #define STEP(f, a, b, c, d, x, t, s) \
108 (a) += f((b), (c), (d)) + (x) + (t); \
109 (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
110 (a) += (b);
113 * SET reads 4 input bytes in little-endian byte order and stores them in a
114 * properly aligned word in host byte order.
116 * The check for little-endian architectures that tolerate unaligned memory
117 * accesses is just an optimization. Nothing will break if it fails to detect
118 * a suitable architecture.
120 * Unfortunately, this optimization may be a C strict aliasing rules violation
121 * if the caller's data buffer has effective type that cannot be aliased by
122 * MD5_u32plus. In practice, this problem may occur if these MD5 routines are
123 * inlined into a calling function, or with future and dangerously advanced
124 * link-time optimizations. For the time being, keeping these MD5 routines in
125 * their own translation unit avoids the problem.
127 #if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
128 #define SET(n) \
129 (*(MD5_u32plus *)&ptr[(n) * 4])
130 #define GET(n) \
131 SET(n)
132 #else
133 #define SET(n) \
134 (ctx->block[(n)] = \
135 (MD5_u32plus)ptr[(n) * 4] | \
136 ((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
137 ((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
138 ((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
139 #define GET(n) \
140 (ctx->block[(n)])
141 #endif
144 * This processes one or more 64-byte data blocks, but does NOT update the bit
145 * counters. There are no alignment requirements.
147 static const void *body(MD5_CTX *ctx, const void *data, unsigned long size)
149 const unsigned char *ptr;
150 MD5_u32plus a, b, c, d;
151 MD5_u32plus saved_a, saved_b, saved_c, saved_d;
153 ptr = (const unsigned char *)data;
155 a = ctx->a;
156 b = ctx->b;
157 c = ctx->c;
158 d = ctx->d;
160 do {
161 saved_a = a;
162 saved_b = b;
163 saved_c = c;
164 saved_d = d;
166 /* Round 1 */
167 STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
168 STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
169 STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
170 STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
171 STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
172 STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
173 STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
174 STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
175 STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
176 STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
177 STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
178 STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
179 STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
180 STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
181 STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
182 STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
184 /* Round 2 */
185 STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
186 STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
187 STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
188 STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
189 STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
190 STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
191 STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
192 STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
193 STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
194 STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
195 STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
196 STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
197 STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
198 STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
199 STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
200 STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
202 /* Round 3 */
203 STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
204 STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
205 STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
206 STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
207 STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
208 STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
209 STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
210 STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
211 STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
212 STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
213 STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
214 STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
215 STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
216 STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
217 STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
218 STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
220 /* Round 4 */
221 STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
222 STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
223 STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
224 STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
225 STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
226 STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
227 STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
228 STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
229 STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
230 STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
231 STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
232 STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
233 STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
234 STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
235 STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
236 STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
238 a += saved_a;
239 b += saved_b;
240 c += saved_c;
241 d += saved_d;
243 ptr += 64;
244 } while (size -= 64);
246 ctx->a = a;
247 ctx->b = b;
248 ctx->c = c;
249 ctx->d = d;
251 return ptr;
254 void MD5_Init(MD5_CTX *ctx)
256 ctx->a = 0x67452301;
257 ctx->b = 0xefcdab89;
258 ctx->c = 0x98badcfe;
259 ctx->d = 0x10325476;
261 ctx->lo = 0;
262 ctx->hi = 0;
265 void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size)
267 MD5_u32plus saved_lo;
268 unsigned long used, available;
270 saved_lo = ctx->lo;
271 if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
272 ctx->hi++;
273 ctx->hi += size >> 29;
275 used = saved_lo & 0x3f;
277 if (used) {
278 available = 64 - used;
280 if (size < available) {
281 memcpy(&ctx->buffer[used], data, size);
282 return;
285 memcpy(&ctx->buffer[used], data, available);
286 data = (const unsigned char *)data + available;
287 size -= available;
288 body(ctx, ctx->buffer, 64);
291 if (size >= 64) {
292 data = body(ctx, data, size & ~(unsigned long)0x3f);
293 size &= 0x3f;
296 memcpy(ctx->buffer, data, size);
299 #define OUT(dst, src) \
300 (dst)[0] = (unsigned char)(src); \
301 (dst)[1] = (unsigned char)((src) >> 8); \
302 (dst)[2] = (unsigned char)((src) >> 16); \
303 (dst)[3] = (unsigned char)((src) >> 24);
305 void MD5_Final(unsigned char *result, MD5_CTX *ctx)
307 unsigned long used, available;
309 used = ctx->lo & 0x3f;
311 ctx->buffer[used++] = 0x80;
313 available = 64 - used;
315 if (available < 8) {
316 memset(&ctx->buffer[used], 0, available);
317 body(ctx, ctx->buffer, 64);
318 used = 0;
319 available = 64;
322 memset(&ctx->buffer[used], 0, available - 8);
324 ctx->lo <<= 3;
325 OUT(&ctx->buffer[56], ctx->lo)
326 OUT(&ctx->buffer[60], ctx->hi)
328 body(ctx, ctx->buffer, 64);
330 OUT(&result[0], ctx->a)
331 OUT(&result[4], ctx->b)
332 OUT(&result[8], ctx->c)
333 OUT(&result[12], ctx->d)
335 memset(ctx, 0, sizeof(*ctx));
339 //////////////////////////////////////////////////////////////////
340 //////////////////////////////////////////////////////////////////
342 static MD5_CTX md5ctx;
344 void xxprintf_start(void)
346 MD5_Init(&md5ctx);
349 void xxprintf_done(void)
351 const char hexchar[16] = "0123456789abcdef";
352 unsigned char result[100];
353 memset(result, 0, sizeof(result));
354 MD5_Final(&result[0], &md5ctx);
355 printf("final MD5 = ");
356 int i;
357 for (i = 0; i < 16; i++) {
358 printf("%c%c", hexchar[0xF & (result[i] >> 4)],
359 hexchar[0xF & (result[i] >> 0)]);
361 printf("\n");
364 __attribute__((format(__printf__, 1, 2)))
365 void xxprintf (const char *format, ...)
367 char buf[128];
368 memset(buf, 0, sizeof(buf));
370 va_list vargs;
371 va_start(vargs, format);
372 int n = vsnprintf(buf, sizeof(buf)-1, format, vargs);
373 va_end(vargs);
375 assert(n < sizeof(buf)-1);
376 assert(buf[sizeof(buf)-1] == 0);
377 assert(buf[sizeof(buf)-2] == 0);
379 MD5_Update(&md5ctx, buf, strlen(buf));
380 if (0) printf("QQQ %s", buf);
383 //////////////////////////////////////////////////////////////////
384 //////////////////////////////////////////////////////////////////
387 /* Setting this to 1 creates a very comprehensive test of
388 integer condition codes. */
389 #define TEST_INTEGER_VERBOSE 1
391 typedef long long int int64;
393 //#define LINUX_VM86_IOPL_FIX
394 //#define TEST_P4_FLAGS
396 #define xglue(x, y) x ## y
397 #define glue(x, y) xglue(x, y)
398 #define stringify(s) tostring(s)
399 #define tostring(s) #s
401 #define CC_C 0x0001
402 #define CC_P 0x0004
403 #define CC_A 0x0010
404 #define CC_Z 0x0040
405 #define CC_S 0x0080
406 #define CC_O 0x0800
408 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
410 #define OP add
411 #include "fb_test_amd64.h"
413 #define OP sub
414 #include "fb_test_amd64.h"
416 #define OP xor
417 #include "fb_test_amd64.h"
419 #define OP and
420 #include "fb_test_amd64.h"
422 #define OP or
423 #include "fb_test_amd64.h"
425 #define OP cmp
426 #include "fb_test_amd64.h"
428 #define OP adc
429 #define OP_CC
430 #include "fb_test_amd64.h"
432 #define OP sbb
433 #define OP_CC
434 #include "fb_test_amd64.h"
436 #define OP adcx
437 #define NSH
438 #define OP_CC
439 #include "fb_test_amd64.h"
441 #define OP adox
442 #define NSH
443 #define OP_CC
444 #include "fb_test_amd64.h"
446 #define OP inc
447 #define OP_CC
448 #define OP1
449 #include "fb_test_amd64.h"
451 #define OP dec
452 #define OP_CC
453 #define OP1
454 #include "fb_test_amd64.h"
456 #define OP neg
457 #define OP_CC
458 #define OP1
459 #include "fb_test_amd64.h"
461 #define OP not
462 #define OP_CC
463 #define OP1
464 #include "fb_test_amd64.h"
466 #undef CC_MASK
467 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O)
469 #define OP shl
470 #include "fb_test_amd64_shift.h"
472 #define OP shr
473 #include "fb_test_amd64_shift.h"
475 #define OP sar
476 #include "fb_test_amd64_shift.h"
478 #define OP rol
479 #include "fb_test_amd64_shift.h"
481 #define OP ror
482 #include "fb_test_amd64_shift.h"
484 #define OP rcr
485 #define OP_CC
486 #include "fb_test_amd64_shift.h"
488 #define OP rcl
489 #define OP_CC
490 #include "fb_test_amd64_shift.h"
492 /* XXX: should be more precise ? */
493 #undef CC_MASK
494 #define CC_MASK (CC_C)
496 /* lea test (modrm support) */
497 #define TEST_LEA(STR)\
499 asm("leaq " STR ", %0"\
500 : "=r" (res)\
501 : "a" (rax), "b" (rbx), "c" (rcx), "d" (rdx), "S" (rsi), "D" (rdi));\
502 xxprintf("lea %s = %016llx\n", STR, res);\
505 #define TEST_LEA16(STR)\
507 asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\
508 : "=wq" (res)\
509 : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
510 xxprintf("lea %s = %08x\n", STR, res);\
514 void test_lea(void)
516 int64 rax, rbx, rcx, rdx, rsi, rdi, res;
517 rax = 0x0001;
518 rbx = 0x0002;
519 rcx = 0x0004;
520 rdx = 0x0008;
521 rsi = 0x0010;
522 rdi = 0x0020;
524 TEST_LEA("0x4000");
526 TEST_LEA("(%%rax)");
527 TEST_LEA("(%%rbx)");
528 TEST_LEA("(%%rcx)");
529 TEST_LEA("(%%rdx)");
530 TEST_LEA("(%%rsi)");
531 TEST_LEA("(%%rdi)");
533 TEST_LEA("0x40(%%rax)");
534 TEST_LEA("0x40(%%rbx)");
535 TEST_LEA("0x40(%%rcx)");
536 TEST_LEA("0x40(%%rdx)");
537 TEST_LEA("0x40(%%rsi)");
538 TEST_LEA("0x40(%%rdi)");
540 TEST_LEA("0x4000(%%rax)");
541 TEST_LEA("0x4000(%%rbx)");
542 TEST_LEA("0x4000(%%rcx)");
543 TEST_LEA("0x4000(%%rdx)");
544 TEST_LEA("0x4000(%%rsi)");
545 TEST_LEA("0x4000(%%rdi)");
547 TEST_LEA("(%%rax, %%rcx)");
548 TEST_LEA("(%%rbx, %%rdx)");
549 TEST_LEA("(%%rcx, %%rcx)");
550 TEST_LEA("(%%rdx, %%rcx)");
551 TEST_LEA("(%%rsi, %%rcx)");
552 TEST_LEA("(%%rdi, %%rcx)");
554 TEST_LEA("0x40(%%rax, %%rcx)");
555 TEST_LEA("0x4000(%%rbx, %%rdx)");
557 TEST_LEA("(%%rcx, %%rcx, 2)");
558 TEST_LEA("(%%rdx, %%rcx, 4)");
559 TEST_LEA("(%%rsi, %%rcx, 8)");
561 TEST_LEA("(,%%rax, 2)");
562 TEST_LEA("(,%%rbx, 4)");
563 TEST_LEA("(,%%rcx, 8)");
565 TEST_LEA("0x40(,%%rax, 2)");
566 TEST_LEA("0x40(,%%rbx, 4)");
567 TEST_LEA("0x40(,%%rcx, 8)");
570 TEST_LEA("-10(%%rcx, %%rcx, 2)");
571 TEST_LEA("-10(%%rdx, %%rcx, 4)");
572 TEST_LEA("-10(%%rsi, %%rcx, 8)");
574 TEST_LEA("0x4000(%%rcx, %%rcx, 2)");
575 TEST_LEA("0x4000(%%rdx, %%rcx, 4)");
576 TEST_LEA("0x4000(%%rsi, %%rcx, 8)");
579 #define TEST_JCC(JCC, v1, v2)\
580 { int one = 1; \
581 int res;\
582 asm("movl $1, %0\n\t"\
583 "cmpl %2, %1\n\t"\
584 "j" JCC " 1f\n\t"\
585 "movl $0, %0\n\t"\
586 "1:\n\t"\
587 : "=r" (res)\
588 : "r" (v1), "r" (v2));\
589 xxprintf("%-10s %d\n", "j" JCC, res);\
591 asm("movl $0, %0\n\t"\
592 "cmpl %2, %1\n\t"\
593 "set" JCC " %b0\n\t"\
594 : "=r" (res)\
595 : "r" (v1), "r" (v2));\
596 xxprintf("%-10s %d\n", "set" JCC, res);\
598 asm("movl $0x12345678, %0\n\t"\
599 "cmpl %2, %1\n\t"\
600 "cmov" JCC "l %3, %0\n\t"\
601 : "=r" (res)\
602 : "r" (v1), "r" (v2), "m" (one));\
603 xxprintf("%-10s R=0x%08x\n", "cmov" JCC "l", res);\
604 asm("movl $0x12345678, %0\n\t"\
605 "cmpl %2, %1\n\t"\
606 "cmov" JCC "w %w3, %w0\n\t"\
607 : "=r" (res)\
608 : "r" (v1), "r" (v2), "r" (one));\
609 xxprintf("%-10s R=0x%08x\n", "cmov" JCC "w", res);\
613 /* various jump tests */
614 void test_jcc(void)
616 TEST_JCC("ne", 1, 1);
617 TEST_JCC("ne", 1, 0);
619 TEST_JCC("e", 1, 1);
620 TEST_JCC("e", 1, 0);
622 TEST_JCC("l", 1, 1);
623 TEST_JCC("l", 1, 0);
624 TEST_JCC("l", 1, -1);
626 TEST_JCC("le", 1, 1);
627 TEST_JCC("le", 1, 0);
628 TEST_JCC("le", 1, -1);
630 TEST_JCC("ge", 1, 1);
631 TEST_JCC("ge", 1, 0);
632 TEST_JCC("ge", -1, 1);
634 TEST_JCC("g", 1, 1);
635 TEST_JCC("g", 1, 0);
636 TEST_JCC("g", 1, -1);
638 TEST_JCC("b", 1, 1);
639 TEST_JCC("b", 1, 0);
640 TEST_JCC("b", 1, -1);
642 TEST_JCC("be", 1, 1);
643 TEST_JCC("be", 1, 0);
644 TEST_JCC("be", 1, -1);
646 TEST_JCC("ae", 1, 1);
647 TEST_JCC("ae", 1, 0);
648 TEST_JCC("ae", 1, -1);
650 TEST_JCC("a", 1, 1);
651 TEST_JCC("a", 1, 0);
652 TEST_JCC("a", 1, -1);
655 TEST_JCC("p", 1, 1);
656 TEST_JCC("p", 1, 0);
658 TEST_JCC("np", 1, 1);
659 TEST_JCC("np", 1, 0);
661 TEST_JCC("o", 0x7fffffff, 0);
662 TEST_JCC("o", 0x7fffffff, -1);
664 TEST_JCC("no", 0x7fffffff, 0);
665 TEST_JCC("no", 0x7fffffff, -1);
667 TEST_JCC("s", 0, 1);
668 TEST_JCC("s", 0, -1);
669 TEST_JCC("s", 0, 0);
671 TEST_JCC("ns", 0, 1);
672 TEST_JCC("ns", 0, -1);
673 TEST_JCC("ns", 0, 0);
676 #undef CC_MASK
677 #ifdef TEST_P4_FLAGS
678 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
679 #else
680 #define CC_MASK (CC_O | CC_C)
681 #endif
683 #define OP mul
684 #include "fb_test_amd64_muldiv.h"
686 #define OP imul
687 #include "fb_test_amd64_muldiv.h"
689 void test_imulw2(int64 op0, int64 op1)
691 int64 res, s1, s0, flags;
692 s0 = op0;
693 s1 = op1;
694 res = s0;
695 flags = 0;
696 asm ("pushq %4\n\t"
697 "popfq\n\t"
698 "imulw %w2, %w0\n\t"
699 "pushfq\n\t"
700 "popq %1\n\t"
701 : "=q" (res), "=g" (flags)
702 : "q" (s1), "0" (res), "1" (flags));
703 xxprintf("%-10s A=%016llx B=%016llx R=%016llx CC=%04llx\n",
704 "imulw", s0, s1, res, flags & CC_MASK);
707 void test_imull2(int64 op0, int64 op1)
709 int res, s1;
710 int64 s0, flags;
711 s0 = op0;
712 s1 = op1;
713 res = s0;
714 flags = 0;
715 asm ("pushq %4\n\t"
716 "popfq\n\t"
717 "imull %2, %0\n\t"
718 "pushfq\n\t"
719 "popq %1\n\t"
720 : "=q" (res), "=g" (flags)
721 : "q" (s1), "0" (res), "1" (flags));
722 xxprintf("%-10s A=%016llx B=%08x R=%08x CC=%04llx\n",
723 "imull", s0, s1, res, flags & CC_MASK);
726 #define TEST_IMUL_IM(size, size1, op0, op1)\
728 int64 res, flags;\
729 flags = 0;\
730 res = 0;\
731 asm ("pushq %3\n\t"\
732 "popfq\n\t"\
733 "imul" size " $" #op0 ", %" size1 "2, %" size1 "0\n\t" \
734 "pushfq\n\t"\
735 "popq %1\n\t"\
736 : "=r" (res), "=g" (flags)\
737 : "r" (op1), "1" (flags), "0" (res));\
738 xxprintf("%-10s A=%08x B=%08x R=%016llx CC=%04llx\n",\
739 "imul" size, op0, op1, res, flags & CC_MASK);\
742 #define TEST_IMUL_IM_L(op0, op1)\
744 int64 flags = 0;\
745 int res = 0;\
746 int res64 = 0;\
747 asm ("pushq %3\n\t"\
748 "popfq\n\t"\
749 "imul $" #op0 ", %2, %0\n\t" \
750 "pushfq\n\t"\
751 "popq %1\n\t"\
752 : "=r" (res64), "=g" (flags)\
753 : "r" (op1), "1" (flags), "0" (res));\
754 xxprintf("%-10s A=%08x B=%08x R=%08x CC=%04llx\n",\
755 "imull", op0, op1, res, flags & CC_MASK);\
759 #undef CC_MASK
760 #define CC_MASK (0)
762 #define OP div
763 #include "fb_test_amd64_muldiv.h"
765 #define OP idiv
766 #include "fb_test_amd64_muldiv.h"
768 void test_mul(void)
770 test_imulb(0x1234561d, 4);
771 test_imulb(3, -4);
772 test_imulb(0x80, 0x80);
773 test_imulb(0x10, 0x10);
775 test_imulw(0, 0, 0);
776 test_imulw(0, 0xFF, 0xFF);
777 test_imulw(0, 0xFF, 0x100);
778 test_imulw(0, 0x1234001d, 45);
779 test_imulw(0, 23, -45);
780 test_imulw(0, 0x8000, 0x8000);
781 test_imulw(0, 0x100, 0x100);
783 test_imull(0, 0, 0);
784 test_imull(0, 0xFFFF, 0xFFFF);
785 test_imull(0, 0xFFFF, 0x10000);
786 test_imull(0, 0x1234001d, 45);
787 test_imull(0, 23, -45);
788 test_imull(0, 0x80000000, 0x80000000);
789 test_imull(0, 0x10000, 0x10000);
791 test_mulb(0x1234561d, 4);
792 test_mulb(3, -4);
793 test_mulb(0x80, 0x80);
794 test_mulb(0x10, 0x10);
796 test_mulw(0, 0x1234001d, 45);
797 test_mulw(0, 23, -45);
798 test_mulw(0, 0x8000, 0x8000);
799 test_mulw(0, 0x100, 0x100);
801 test_mull(0, 0x1234001d, 45);
802 test_mull(0, 23, -45);
803 test_mull(0, 0x80000000, 0x80000000);
804 test_mull(0, 0x10000, 0x10000);
806 test_imulw2(0x1234001d, 45);
807 test_imulw2(23, -45);
808 test_imulw2(0x8000, 0x8000);
809 test_imulw2(0x100, 0x100);
811 test_imull2(0x1234001d, 45);
812 test_imull2(23, -45);
813 test_imull2(0x80000000, 0x80000000);
814 test_imull2(0x10000, 0x10000);
816 TEST_IMUL_IM("w", "w", 45, 0x1234);
817 TEST_IMUL_IM("w", "w", -45, 23);
818 TEST_IMUL_IM("w", "w", 0x8000, 0x80000000);
819 TEST_IMUL_IM("w", "w", 0x7fff, 0x1000);
821 TEST_IMUL_IM_L(45, 0x1234);
822 TEST_IMUL_IM_L(-45, 23);
823 TEST_IMUL_IM_L(0x8000, 0x80000000);
824 TEST_IMUL_IM_L(0x7fff, 0x1000);
826 test_idivb(0x12341678, 0x127e);
827 test_idivb(0x43210123, -5);
828 test_idivb(0x12340004, -1);
830 test_idivw(0, 0x12345678, 12347);
831 test_idivw(0, -23223, -45);
832 test_idivw(0, 0x12348000, -1);
833 test_idivw(0x12343, 0x12345678, 0x81238567);
835 test_idivl(0, 0x12345678, 12347);
836 test_idivl(0, -233223, -45);
837 test_idivl(0, 0x80000000, -1);
838 test_idivl(0x12343, 0x12345678, 0x81234567);
840 test_idivq(0, 0x12345678, 12347);
841 test_idivq(0, -233223, -45);
842 test_idivq(0, 0x80000000, -1);
843 test_idivq(0x12343, 0x12345678, 0x81234567);
845 test_divb(0x12341678, 0x127e);
846 test_divb(0x43210123, -5);
847 test_divb(0x12340004, -1);
849 test_divw(0, 0x12345678, 12347);
850 test_divw(0, -23223, -45);
851 test_divw(0, 0x12348000, -1);
852 test_divw(0x12343, 0x12345678, 0x81238567);
854 test_divl(0, 0x12345678, 12347);
855 test_divl(0, -233223, -45);
856 test_divl(0, 0x80000000, -1);
857 test_divl(0x12343, 0x12345678, 0x81234567);
859 test_divq(0, 0x12345678, 12347);
860 test_divq(0, -233223, -45);
861 test_divq(0, 0x80000000, -1);
862 test_divq(0x12343, 0x12345678, 0x81234567);
865 #define TEST_BSX(op, size, op0)\
867 int res, val, resz;\
868 val = op0;\
869 asm("xorl %1, %1\n"\
870 "movl $0x12345678, %0\n"\
871 #op " %" size "2, %" size "0 ; setz %b1" \
872 : "=r" (res), "=q" (resz)\
873 : "r" (val));\
874 xxprintf("%-10s A=%08x R=%08x %d\n", #op, val, res, resz);\
877 void test_bsx(void)
879 TEST_BSX(bsrw, "w", 0);
880 TEST_BSX(bsrw, "w", 0x12340128);
881 TEST_BSX(bsrl, "", 0);
882 TEST_BSX(bsrl, "", 0x00340128);
883 TEST_BSX(bsfw, "w", 0);
884 TEST_BSX(bsfw, "w", 0x12340128);
885 TEST_BSX(bsfl, "", 0);
886 TEST_BSX(bsfl, "", 0x00340128);
889 /**********************************************/
891 void test_fops(double a, double b)
893 xxprintf("a=%f b=%f a+b=%f\n", a, b, a + b);
894 xxprintf("a=%f b=%f a-b=%f\n", a, b, a - b);
895 xxprintf("a=%f b=%f a*b=%f\n", a, b, a * b);
896 xxprintf("a=%f b=%f a/b=%f\n", a, b, a / b);
897 xxprintf("a=%f b=%f fmod(a, b)=%f\n", a, b, fmod(a, b));
898 xxprintf("a=%f sqrt(a)=%f\n", a, sqrt(a));
899 xxprintf("a=%f sin(a)=%f\n", a, sin(a));
900 xxprintf("a=%f cos(a)=%f\n", a, cos(a));
901 xxprintf("a=%f tan(a)=%f\n", a, tan(a));
902 xxprintf("a=%f log(a)=%f\n", a, log(a));
903 xxprintf("a=%f exp(a)=%f\n", a, exp(a));
904 xxprintf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b));
905 /* just to test some op combining */
906 xxprintf("a=%f asin(sin(a))=%f\n", a, asin(sin(a)));
907 xxprintf("a=%f acos(cos(a))=%f\n", a, acos(cos(a)));
908 xxprintf("a=%f atan(tan(a))=%f\n", a, atan(tan(a)));
911 void test_fcmp(double a, double b)
913 xxprintf("(%f<%f)=%d\n",
914 a, b, a < b);
915 xxprintf("(%f<=%f)=%d\n",
916 a, b, a <= b);
917 xxprintf("(%f==%f)=%d\n",
918 a, b, a == b);
919 xxprintf("(%f>%f)=%d\n",
920 a, b, a > b);
921 xxprintf("(%f<=%f)=%d\n",
922 a, b, a >= b);
924 unsigned long long int rflags;
925 /* test f(u)comi instruction */
926 asm("fcomi %2, %1\n"
927 "pushfq\n"
928 "popq %0\n"
929 : "=r" (rflags)
930 : "t" (a), "u" (b));
931 xxprintf("fcomi(%f %f)=%016llx\n", a, b, rflags & (CC_Z | CC_P | CC_C));
935 void test_fcvt(double a)
937 float fa;
938 long double la;
939 int16_t fpuc;
940 int i;
941 int64 lla;
942 int ia;
943 int16_t wa;
944 double ra;
946 fa = a;
947 la = a;
948 xxprintf("(float)%f = %f\n", a, fa);
949 xxprintf("(long double)%f = %Lf\n", a, la);
950 xxprintf("a=%016llx\n", *(unsigned long long int *) &a);
951 xxprintf("la=%016llx %04x\n", *(unsigned long long int *) &la,
952 *(unsigned short *) ((char *)(&la) + 8));
954 /* test all roundings */
955 asm volatile ("fstcw %0" : "=m" (fpuc));
956 for(i=0;i<4;i++) {
957 short zz = (fpuc & ~0x0c00) | (i << 10);
958 asm volatile ("fldcw %0" : : "m" (zz));
959 asm volatile ("fists %0" : "=m" (wa) : "t" (a));
960 asm volatile ("fistl %0" : "=m" (ia) : "t" (a));
961 asm volatile ("fistpll %0" : "=m" (lla) : "t" (a) : "st");
962 asm volatile ("frndint ; fstl %0" : "=m" (ra) : "t" (a));
963 asm volatile ("fldcw %0" : : "m" (fpuc));
964 xxprintf("(short)a = %d\n", wa);
965 xxprintf("(int)a = %d\n", ia);
966 xxprintf("(int64_t)a = %lld\n", lla);
967 xxprintf("rint(a) = %f\n", ra);
971 #define TEST(N) \
972 asm("fld" #N : "=t" (a)); \
973 xxprintf("fld" #N "= %f\n", a);
975 void test_fconst(void)
977 double a;
978 TEST(1);
979 TEST(l2t);
980 TEST(l2e);
981 TEST(pi);
982 TEST(lg2);
983 TEST(ln2);
984 TEST(z);
987 void test_fbcd(double a)
989 unsigned short bcd[5];
990 double b;
992 asm("fbstp %0" : "=m" (bcd[0]) : "t" (a) : "st");
993 asm("fbld %1" : "=t" (b) : "m" (bcd[0]));
994 xxprintf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n",
995 a, bcd[4], bcd[3], bcd[2], bcd[1], bcd[0], b);
998 #define TEST_ENV(env, save, restore)\
1000 memset((env), 0xaa, sizeof(*(env)));\
1001 for(i=0;i<5;i++)\
1002 asm volatile ("fldl %0" : : "m" (dtab[i]));\
1003 asm(save " %0\n" : : "m" (*(env)));\
1004 asm(restore " %0\n": : "m" (*(env)));\
1005 for(i=0;i<5;i++)\
1006 asm volatile ("fstpl %0" : "=m" (rtab[i]));\
1007 for(i=0;i<5;i++)\
1008 xxprintf("res[%d]=%f\n", i, rtab[i]);\
1009 xxprintf("fpuc=%04x fpus=%04x fptag=%04x\n",\
1010 (env)->fpuc,\
1011 (env)->fpus & 0xff00,\
1012 (env)->fptag);\
1015 void test_fenv(void)
1017 struct __attribute__((packed)) {
1018 uint16_t fpuc;
1019 uint16_t dummy1;
1020 uint16_t fpus;
1021 uint16_t dummy2;
1022 uint16_t fptag;
1023 uint16_t dummy3;
1024 uint32_t ignored[4];
1025 long double fpregs[8];
1026 } float_env32;
1027 double dtab[8];
1028 double rtab[8];
1029 int i;
1031 for(i=0;i<8;i++)
1032 dtab[i] = i + 1;
1034 TEST_ENV(&float_env32, "fnstenv", "fldenv");
1035 TEST_ENV(&float_env32, "fnsave", "frstor");
1037 /* test for ffree */
1038 for(i=0;i<5;i++)
1039 asm volatile ("fldl %0" : : "m" (dtab[i]));
1040 asm volatile("ffree %st(2)");
1041 asm volatile ("fnstenv %0\n" : : "m" (float_env32));
1042 asm volatile ("fninit");
1043 xxprintf("fptag=%04x\n", float_env32.fptag);
1047 #define TEST_FCMOV(a, b, rflags, CC)\
1049 double res;\
1050 asm("pushq %3\n"\
1051 "popfq\n"\
1052 "fcmov" CC " %2, %0\n"\
1053 : "=t" (res)\
1054 : "0" (a), "u" (b), "g" (rflags));\
1055 xxprintf("fcmov%s rflags=0x%04llx-> %f\n", \
1056 CC, rflags, res);\
1059 void test_fcmov(void)
1061 double a, b;
1062 int64 rflags, i;
1064 a = 1.0;
1065 b = 2.0;
1066 for(i = 0; i < 4; i++) {
1067 rflags = 0;
1068 if (i & 1)
1069 rflags |= CC_C;
1070 if (i & 2)
1071 rflags |= CC_Z;
1072 TEST_FCMOV(a, b, rflags, "b");
1073 TEST_FCMOV(a, b, rflags, "e");
1074 TEST_FCMOV(a, b, rflags, "be");
1075 TEST_FCMOV(a, b, rflags, "nb");
1076 TEST_FCMOV(a, b, rflags, "ne");
1077 TEST_FCMOV(a, b, rflags, "nbe");
1079 TEST_FCMOV(a, b, (int64)0, "u");
1080 TEST_FCMOV(a, b, (int64)CC_P, "u");
1081 TEST_FCMOV(a, b, (int64)0, "nu");
1082 TEST_FCMOV(a, b, (int64)CC_P, "nu");
1085 void test_floats(void)
1087 test_fops(2, 3);
1088 test_fops(1.4, -5);
1089 test_fcmp(2, -1);
1090 test_fcmp(2, 2);
1091 test_fcmp(2, 3);
1092 test_fcvt(0.5);
1093 test_fcvt(-0.5);
1094 test_fcvt(1.0/7.0);
1095 test_fcvt(-1.0/9.0);
1096 test_fcvt(32768);
1097 test_fcvt(-1e20);
1098 test_fconst();
1099 // REINSTATE (maybe): test_fbcd(1234567890123456);
1100 // REINSTATE (maybe): test_fbcd(-123451234567890);
1101 // REINSTATE: test_fenv();
1102 // REINSTATE: test_fcmov();
1105 /**********************************************/
1107 #define TEST_XCHG(op, size, opconst)\
1109 int op0, op1;\
1110 op0 = 0x12345678;\
1111 op1 = 0xfbca7654;\
1112 asm(#op " %" size "0, %" size "1" \
1113 : "=q" (op0), opconst (op1) \
1114 : "0" (op0), "1" (op1));\
1115 xxprintf("%-10s A=%08x B=%08x\n",\
1116 #op, op0, op1);\
1119 #define TEST_CMPXCHG(op, size, opconst, eax)\
1121 int op0, op1;\
1122 op0 = 0x12345678;\
1123 op1 = 0xfbca7654;\
1124 asm(#op " %" size "0, %" size "1" \
1125 : "=q" (op0), opconst (op1) \
1126 : "0" (op0), "1" (op1), "a" (eax));\
1127 xxprintf("%-10s EAX=%08x A=%08x C=%08x\n",\
1128 #op, eax, op0, op1);\
1132 /**********************************************/
1133 /* segmentation tests */
1135 extern char func_lret32;
1136 extern char func_iret32;
1138 uint8_t str_buffer[4096];
1140 #define TEST_STRING1(OP, size, DF, REP)\
1142 int64 rsi, rdi, rax, rcx, rflags;\
1144 rsi = (long)(str_buffer + sizeof(str_buffer) / 2);\
1145 rdi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\
1146 rax = 0x12345678;\
1147 rcx = 17;\
1149 asm volatile ("pushq $0\n\t"\
1150 "popfq\n\t"\
1151 DF "\n\t"\
1152 REP #OP size "\n\t"\
1153 "cld\n\t"\
1154 "pushfq\n\t"\
1155 "popq %4\n\t"\
1156 : "=S" (rsi), "=D" (rdi), "=a" (rax), "=c" (rcx), "=g" (rflags)\
1157 : "0" (rsi), "1" (rdi), "2" (rax), "3" (rcx));\
1158 xxprintf("%-10s ESI=%016llx EDI=%016llx EAX=%016llx ECX=%016llx EFL=%04llx\n",\
1159 REP #OP size, rsi, rdi, rax, rcx,\
1160 rflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\
1163 #define TEST_STRING(OP, REP)\
1164 TEST_STRING1(OP, "b", "", REP);\
1165 TEST_STRING1(OP, "w", "", REP);\
1166 TEST_STRING1(OP, "l", "", REP);\
1167 TEST_STRING1(OP, "b", "std", REP);\
1168 TEST_STRING1(OP, "w", "std", REP);\
1169 TEST_STRING1(OP, "l", "std", REP)
1171 void test_string(void)
1173 int64 i;
1174 for(i = 0;i < sizeof(str_buffer); i++)
1175 str_buffer[i] = i + 0x56;
1176 TEST_STRING(stos, "");
1177 TEST_STRING(stos, "rep ");
1178 TEST_STRING(lods, ""); /* to verify stos */
1179 // TEST_STRING(lods, "rep ");
1180 TEST_STRING(movs, "");
1181 TEST_STRING(movs, "rep ");
1182 TEST_STRING(lods, ""); /* to verify stos */
1184 /* XXX: better tests */
1185 TEST_STRING(scas, "");
1186 TEST_STRING(scas, "repz ");
1187 TEST_STRING(scas, "repnz ");
1188 // REINSTATE? TEST_STRING(cmps, "");
1189 TEST_STRING(cmps, "repz ");
1190 // REINSTATE? TEST_STRING(cmps, "repnz ");
1193 int main(int argc, char **argv)
1195 // The three commented out test cases produce different results at different
1196 // compiler optimisation levels. This suggests to me that their inline
1197 // assembly is incorrect. I don't have time to investigate now, though. So
1198 // they are disabled.
1199 xxprintf_start();
1200 test_adc();
1201 test_adcx();
1202 test_add();
1203 test_adox();
1204 test_and();
1205 // test_bsx();
1206 test_cmp();
1207 test_dec();
1208 test_fcmov();
1209 test_fconst();
1210 test_fenv();
1211 test_floats();
1212 test_inc();
1213 // test_jcc();
1214 test_lea();
1215 test_mul();
1216 test_neg();
1217 test_not();
1218 test_or();
1219 test_rcl();
1220 test_rcr();
1221 test_rol();
1222 test_ror();
1223 test_sar();
1224 test_sbb();
1225 test_shl();
1226 test_shr();
1227 // test_string();
1228 test_sub();
1229 test_xor();
1230 xxprintf_done();
1231 // the expected MD5SUM is 66802c845574c7c69f30d29ef85f7ca3
1232 return 0;