2 /* Contrary to what the next comment says, this is now an amd64 CPU
8 * Copyright (c) 2003 Fabrice Bellard
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, see <http://www.gnu.org/licenses/>.
33 //////////////////////////////////////////////////////////////////
34 //////////////////////////////////////////////////////////////////
37 * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
38 * MD5 Message-Digest Algorithm (RFC 1321).
41 * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
44 * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
46 * This software was written by Alexander Peslyak in 2001. No copyright is
47 * claimed, and the software is hereby placed in the public domain.
48 * In case this attempt to disclaim copyright and place the software in the
49 * public domain is deemed null and void, then the software is
50 * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
51 * general public under the following terms:
53 * Redistribution and use in source and binary forms, with or without
54 * modification, are permitted.
56 * There's ABSOLUTELY NO WARRANTY, express or implied.
58 * (This is a heavily cut-down "BSD license".)
60 * This differs from Colin Plumb's older public domain implementation in that
61 * no exactly 32-bit integer data type is required (any 32-bit or wider
62 * unsigned integer data type will do), there's no compile-time endianness
63 * configuration, and the function prototypes match OpenSSL's. No code from
64 * Colin Plumb's implementation has been reused; this comment merely compares
65 * the properties of the two independent implementations.
67 * The primary goals of this implementation are portability and ease of use.
68 * It is meant to be fast, but not as fast as possible. Some known
69 * optimizations are not included to reduce source code size and avoid
70 * compile-time configuration.
75 // BEGIN #include "md5.h"
76 /* Any 32-bit or wider unsigned integer data type will do */
77 typedef unsigned int MD5_u32plus
;
81 MD5_u32plus a
, b
, c
, d
;
82 unsigned char buffer
[64];
83 MD5_u32plus block
[16];
86 void MD5_Init(MD5_CTX
*ctx
);
87 void MD5_Update(MD5_CTX
*ctx
, const void *data
, unsigned long size
);
88 void MD5_Final(unsigned char *result
, MD5_CTX
*ctx
);
89 // END #include "md5.h"
92 * The basic MD5 functions.
94 * F and G are optimized compared to their RFC 1321 definitions for
95 * architectures that lack an AND-NOT instruction, just like in Colin Plumb's
98 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
99 #define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
100 #define H(x, y, z) (((x) ^ (y)) ^ (z))
101 #define H2(x, y, z) ((x) ^ ((y) ^ (z)))
102 #define I(x, y, z) ((y) ^ ((x) | ~(z)))
105 * The MD5 transformation for all four rounds.
107 #define STEP(f, a, b, c, d, x, t, s) \
108 (a) += f((b), (c), (d)) + (x) + (t); \
109 (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
113 * SET reads 4 input bytes in little-endian byte order and stores them in a
114 * properly aligned word in host byte order.
116 * The check for little-endian architectures that tolerate unaligned memory
117 * accesses is just an optimization. Nothing will break if it fails to detect
118 * a suitable architecture.
120 * Unfortunately, this optimization may be a C strict aliasing rules violation
121 * if the caller's data buffer has effective type that cannot be aliased by
122 * MD5_u32plus. In practice, this problem may occur if these MD5 routines are
123 * inlined into a calling function, or with future and dangerously advanced
124 * link-time optimizations. For the time being, keeping these MD5 routines in
125 * their own translation unit avoids the problem.
127 #if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
129 (*(MD5_u32plus *)&ptr[(n) * 4])
135 (MD5_u32plus)ptr[(n) * 4] | \
136 ((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
137 ((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
138 ((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
144 * This processes one or more 64-byte data blocks, but does NOT update the bit
145 * counters. There are no alignment requirements.
147 static const void *body(MD5_CTX
*ctx
, const void *data
, unsigned long size
)
149 const unsigned char *ptr
;
150 MD5_u32plus a
, b
, c
, d
;
151 MD5_u32plus saved_a
, saved_b
, saved_c
, saved_d
;
153 ptr
= (const unsigned char *)data
;
167 STEP(F
, a
, b
, c
, d
, SET(0), 0xd76aa478, 7)
168 STEP(F
, d
, a
, b
, c
, SET(1), 0xe8c7b756, 12)
169 STEP(F
, c
, d
, a
, b
, SET(2), 0x242070db, 17)
170 STEP(F
, b
, c
, d
, a
, SET(3), 0xc1bdceee, 22)
171 STEP(F
, a
, b
, c
, d
, SET(4), 0xf57c0faf, 7)
172 STEP(F
, d
, a
, b
, c
, SET(5), 0x4787c62a, 12)
173 STEP(F
, c
, d
, a
, b
, SET(6), 0xa8304613, 17)
174 STEP(F
, b
, c
, d
, a
, SET(7), 0xfd469501, 22)
175 STEP(F
, a
, b
, c
, d
, SET(8), 0x698098d8, 7)
176 STEP(F
, d
, a
, b
, c
, SET(9), 0x8b44f7af, 12)
177 STEP(F
, c
, d
, a
, b
, SET(10), 0xffff5bb1, 17)
178 STEP(F
, b
, c
, d
, a
, SET(11), 0x895cd7be, 22)
179 STEP(F
, a
, b
, c
, d
, SET(12), 0x6b901122, 7)
180 STEP(F
, d
, a
, b
, c
, SET(13), 0xfd987193, 12)
181 STEP(F
, c
, d
, a
, b
, SET(14), 0xa679438e, 17)
182 STEP(F
, b
, c
, d
, a
, SET(15), 0x49b40821, 22)
185 STEP(G
, a
, b
, c
, d
, GET(1), 0xf61e2562, 5)
186 STEP(G
, d
, a
, b
, c
, GET(6), 0xc040b340, 9)
187 STEP(G
, c
, d
, a
, b
, GET(11), 0x265e5a51, 14)
188 STEP(G
, b
, c
, d
, a
, GET(0), 0xe9b6c7aa, 20)
189 STEP(G
, a
, b
, c
, d
, GET(5), 0xd62f105d, 5)
190 STEP(G
, d
, a
, b
, c
, GET(10), 0x02441453, 9)
191 STEP(G
, c
, d
, a
, b
, GET(15), 0xd8a1e681, 14)
192 STEP(G
, b
, c
, d
, a
, GET(4), 0xe7d3fbc8, 20)
193 STEP(G
, a
, b
, c
, d
, GET(9), 0x21e1cde6, 5)
194 STEP(G
, d
, a
, b
, c
, GET(14), 0xc33707d6, 9)
195 STEP(G
, c
, d
, a
, b
, GET(3), 0xf4d50d87, 14)
196 STEP(G
, b
, c
, d
, a
, GET(8), 0x455a14ed, 20)
197 STEP(G
, a
, b
, c
, d
, GET(13), 0xa9e3e905, 5)
198 STEP(G
, d
, a
, b
, c
, GET(2), 0xfcefa3f8, 9)
199 STEP(G
, c
, d
, a
, b
, GET(7), 0x676f02d9, 14)
200 STEP(G
, b
, c
, d
, a
, GET(12), 0x8d2a4c8a, 20)
203 STEP(H
, a
, b
, c
, d
, GET(5), 0xfffa3942, 4)
204 STEP(H2
, d
, a
, b
, c
, GET(8), 0x8771f681, 11)
205 STEP(H
, c
, d
, a
, b
, GET(11), 0x6d9d6122, 16)
206 STEP(H2
, b
, c
, d
, a
, GET(14), 0xfde5380c, 23)
207 STEP(H
, a
, b
, c
, d
, GET(1), 0xa4beea44, 4)
208 STEP(H2
, d
, a
, b
, c
, GET(4), 0x4bdecfa9, 11)
209 STEP(H
, c
, d
, a
, b
, GET(7), 0xf6bb4b60, 16)
210 STEP(H2
, b
, c
, d
, a
, GET(10), 0xbebfbc70, 23)
211 STEP(H
, a
, b
, c
, d
, GET(13), 0x289b7ec6, 4)
212 STEP(H2
, d
, a
, b
, c
, GET(0), 0xeaa127fa, 11)
213 STEP(H
, c
, d
, a
, b
, GET(3), 0xd4ef3085, 16)
214 STEP(H2
, b
, c
, d
, a
, GET(6), 0x04881d05, 23)
215 STEP(H
, a
, b
, c
, d
, GET(9), 0xd9d4d039, 4)
216 STEP(H2
, d
, a
, b
, c
, GET(12), 0xe6db99e5, 11)
217 STEP(H
, c
, d
, a
, b
, GET(15), 0x1fa27cf8, 16)
218 STEP(H2
, b
, c
, d
, a
, GET(2), 0xc4ac5665, 23)
221 STEP(I
, a
, b
, c
, d
, GET(0), 0xf4292244, 6)
222 STEP(I
, d
, a
, b
, c
, GET(7), 0x432aff97, 10)
223 STEP(I
, c
, d
, a
, b
, GET(14), 0xab9423a7, 15)
224 STEP(I
, b
, c
, d
, a
, GET(5), 0xfc93a039, 21)
225 STEP(I
, a
, b
, c
, d
, GET(12), 0x655b59c3, 6)
226 STEP(I
, d
, a
, b
, c
, GET(3), 0x8f0ccc92, 10)
227 STEP(I
, c
, d
, a
, b
, GET(10), 0xffeff47d, 15)
228 STEP(I
, b
, c
, d
, a
, GET(1), 0x85845dd1, 21)
229 STEP(I
, a
, b
, c
, d
, GET(8), 0x6fa87e4f, 6)
230 STEP(I
, d
, a
, b
, c
, GET(15), 0xfe2ce6e0, 10)
231 STEP(I
, c
, d
, a
, b
, GET(6), 0xa3014314, 15)
232 STEP(I
, b
, c
, d
, a
, GET(13), 0x4e0811a1, 21)
233 STEP(I
, a
, b
, c
, d
, GET(4), 0xf7537e82, 6)
234 STEP(I
, d
, a
, b
, c
, GET(11), 0xbd3af235, 10)
235 STEP(I
, c
, d
, a
, b
, GET(2), 0x2ad7d2bb, 15)
236 STEP(I
, b
, c
, d
, a
, GET(9), 0xeb86d391, 21)
244 } while (size
-= 64);
254 void MD5_Init(MD5_CTX
*ctx
)
265 void MD5_Update(MD5_CTX
*ctx
, const void *data
, unsigned long size
)
267 MD5_u32plus saved_lo
;
268 unsigned long used
, available
;
271 if ((ctx
->lo
= (saved_lo
+ size
) & 0x1fffffff) < saved_lo
)
273 ctx
->hi
+= size
>> 29;
275 used
= saved_lo
& 0x3f;
278 available
= 64 - used
;
280 if (size
< available
) {
281 memcpy(&ctx
->buffer
[used
], data
, size
);
285 memcpy(&ctx
->buffer
[used
], data
, available
);
286 data
= (const unsigned char *)data
+ available
;
288 body(ctx
, ctx
->buffer
, 64);
292 data
= body(ctx
, data
, size
& ~(unsigned long)0x3f);
296 memcpy(ctx
->buffer
, data
, size
);
299 #define OUT(dst, src) \
300 (dst)[0] = (unsigned char)(src); \
301 (dst)[1] = (unsigned char)((src) >> 8); \
302 (dst)[2] = (unsigned char)((src) >> 16); \
303 (dst)[3] = (unsigned char)((src) >> 24);
305 void MD5_Final(unsigned char *result
, MD5_CTX
*ctx
)
307 unsigned long used
, available
;
309 used
= ctx
->lo
& 0x3f;
311 ctx
->buffer
[used
++] = 0x80;
313 available
= 64 - used
;
316 memset(&ctx
->buffer
[used
], 0, available
);
317 body(ctx
, ctx
->buffer
, 64);
322 memset(&ctx
->buffer
[used
], 0, available
- 8);
325 OUT(&ctx
->buffer
[56], ctx
->lo
)
326 OUT(&ctx
->buffer
[60], ctx
->hi
)
328 body(ctx
, ctx
->buffer
, 64);
330 OUT(&result
[0], ctx
->a
)
331 OUT(&result
[4], ctx
->b
)
332 OUT(&result
[8], ctx
->c
)
333 OUT(&result
[12], ctx
->d
)
335 memset(ctx
, 0, sizeof(*ctx
));
339 //////////////////////////////////////////////////////////////////
340 //////////////////////////////////////////////////////////////////
342 static MD5_CTX md5ctx
;
344 void xxprintf_start(void)
349 void xxprintf_done(void)
351 const char hexchar
[16] = "0123456789abcdef";
352 unsigned char result
[100];
353 memset(result
, 0, sizeof(result
));
354 MD5_Final(&result
[0], &md5ctx
);
355 printf("final MD5 = ");
357 for (i
= 0; i
< 16; i
++) {
358 printf("%c%c", hexchar
[0xF & (result
[i
] >> 4)],
359 hexchar
[0xF & (result
[i
] >> 0)]);
364 __attribute__((format(__printf__
, 1, 2)))
365 void xxprintf (const char *format
, ...)
368 memset(buf
, 0, sizeof(buf
));
371 va_start(vargs
, format
);
372 int n
= vsnprintf(buf
, sizeof(buf
)-1, format
, vargs
);
375 assert(n
< sizeof(buf
)-1);
376 assert(buf
[sizeof(buf
)-1] == 0);
377 assert(buf
[sizeof(buf
)-2] == 0);
379 MD5_Update(&md5ctx
, buf
, strlen(buf
));
380 if (0) printf("QQQ %s", buf
);
383 //////////////////////////////////////////////////////////////////
384 //////////////////////////////////////////////////////////////////
387 /* Setting this to 1 creates a very comprehensive test of
388 integer condition codes. */
389 #define TEST_INTEGER_VERBOSE 1
391 typedef long long int int64
;
393 //#define LINUX_VM86_IOPL_FIX
394 //#define TEST_P4_FLAGS
396 #define xglue(x, y) x ## y
397 #define glue(x, y) xglue(x, y)
398 #define stringify(s) tostring(s)
399 #define tostring(s) #s
408 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
411 #include "fb_test_amd64.h"
414 #include "fb_test_amd64.h"
417 #include "fb_test_amd64.h"
420 #include "fb_test_amd64.h"
423 #include "fb_test_amd64.h"
426 #include "fb_test_amd64.h"
430 #include "fb_test_amd64.h"
434 #include "fb_test_amd64.h"
439 #include "fb_test_amd64.h"
444 #include "fb_test_amd64.h"
449 #include "fb_test_amd64.h"
454 #include "fb_test_amd64.h"
459 #include "fb_test_amd64.h"
464 #include "fb_test_amd64.h"
467 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O)
470 #include "fb_test_amd64_shift.h"
473 #include "fb_test_amd64_shift.h"
476 #include "fb_test_amd64_shift.h"
479 #include "fb_test_amd64_shift.h"
482 #include "fb_test_amd64_shift.h"
486 #include "fb_test_amd64_shift.h"
490 #include "fb_test_amd64_shift.h"
492 /* XXX: should be more precise ? */
494 #define CC_MASK (CC_C)
496 /* lea test (modrm support) */
497 #define TEST_LEA(STR)\
499 asm("leaq " STR ", %0"\
501 : "a" (rax), "b" (rbx), "c" (rcx), "d" (rdx), "S" (rsi), "D" (rdi));\
502 xxprintf("lea %s = %016llx\n", STR, res);\
505 #define TEST_LEA16(STR)\
507 asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\
509 : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
510 xxprintf("lea %s = %08x\n", STR, res);\
516 int64 rax
, rbx
, rcx
, rdx
, rsi
, rdi
, res
;
533 TEST_LEA("0x40(%%rax)");
534 TEST_LEA("0x40(%%rbx)");
535 TEST_LEA("0x40(%%rcx)");
536 TEST_LEA("0x40(%%rdx)");
537 TEST_LEA("0x40(%%rsi)");
538 TEST_LEA("0x40(%%rdi)");
540 TEST_LEA("0x4000(%%rax)");
541 TEST_LEA("0x4000(%%rbx)");
542 TEST_LEA("0x4000(%%rcx)");
543 TEST_LEA("0x4000(%%rdx)");
544 TEST_LEA("0x4000(%%rsi)");
545 TEST_LEA("0x4000(%%rdi)");
547 TEST_LEA("(%%rax, %%rcx)");
548 TEST_LEA("(%%rbx, %%rdx)");
549 TEST_LEA("(%%rcx, %%rcx)");
550 TEST_LEA("(%%rdx, %%rcx)");
551 TEST_LEA("(%%rsi, %%rcx)");
552 TEST_LEA("(%%rdi, %%rcx)");
554 TEST_LEA("0x40(%%rax, %%rcx)");
555 TEST_LEA("0x4000(%%rbx, %%rdx)");
557 TEST_LEA("(%%rcx, %%rcx, 2)");
558 TEST_LEA("(%%rdx, %%rcx, 4)");
559 TEST_LEA("(%%rsi, %%rcx, 8)");
561 TEST_LEA("(,%%rax, 2)");
562 TEST_LEA("(,%%rbx, 4)");
563 TEST_LEA("(,%%rcx, 8)");
565 TEST_LEA("0x40(,%%rax, 2)");
566 TEST_LEA("0x40(,%%rbx, 4)");
567 TEST_LEA("0x40(,%%rcx, 8)");
570 TEST_LEA("-10(%%rcx, %%rcx, 2)");
571 TEST_LEA("-10(%%rdx, %%rcx, 4)");
572 TEST_LEA("-10(%%rsi, %%rcx, 8)");
574 TEST_LEA("0x4000(%%rcx, %%rcx, 2)");
575 TEST_LEA("0x4000(%%rdx, %%rcx, 4)");
576 TEST_LEA("0x4000(%%rsi, %%rcx, 8)");
579 #define TEST_JCC(JCC, v1, v2)\
582 asm("movl $1, %0\n\t"\
588 : "r" (v1), "r" (v2));\
589 xxprintf("%-10s %d\n", "j" JCC, res);\
591 asm("movl $0, %0\n\t"\
593 "set" JCC " %b0\n\t"\
595 : "r" (v1), "r" (v2));\
596 xxprintf("%-10s %d\n", "set" JCC, res);\
598 asm("movl $0x12345678, %0\n\t"\
600 "cmov" JCC "l %3, %0\n\t"\
602 : "r" (v1), "r" (v2), "m" (one));\
603 xxprintf("%-10s R=0x%08x\n", "cmov" JCC "l", res);\
604 asm("movl $0x12345678, %0\n\t"\
606 "cmov" JCC "w %w3, %w0\n\t"\
608 : "r" (v1), "r" (v2), "r" (one));\
609 xxprintf("%-10s R=0x%08x\n", "cmov" JCC "w", res);\
613 /* various jump tests */
616 TEST_JCC("ne", 1, 1);
617 TEST_JCC("ne", 1, 0);
624 TEST_JCC("l", 1, -1);
626 TEST_JCC("le", 1, 1);
627 TEST_JCC("le", 1, 0);
628 TEST_JCC("le", 1, -1);
630 TEST_JCC("ge", 1, 1);
631 TEST_JCC("ge", 1, 0);
632 TEST_JCC("ge", -1, 1);
636 TEST_JCC("g", 1, -1);
640 TEST_JCC("b", 1, -1);
642 TEST_JCC("be", 1, 1);
643 TEST_JCC("be", 1, 0);
644 TEST_JCC("be", 1, -1);
646 TEST_JCC("ae", 1, 1);
647 TEST_JCC("ae", 1, 0);
648 TEST_JCC("ae", 1, -1);
652 TEST_JCC("a", 1, -1);
658 TEST_JCC("np", 1, 1);
659 TEST_JCC("np", 1, 0);
661 TEST_JCC("o", 0x7fffffff, 0);
662 TEST_JCC("o", 0x7fffffff, -1);
664 TEST_JCC("no", 0x7fffffff, 0);
665 TEST_JCC("no", 0x7fffffff, -1);
668 TEST_JCC("s", 0, -1);
671 TEST_JCC("ns", 0, 1);
672 TEST_JCC("ns", 0, -1);
673 TEST_JCC("ns", 0, 0);
678 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
680 #define CC_MASK (CC_O | CC_C)
684 #include "fb_test_amd64_muldiv.h"
687 #include "fb_test_amd64_muldiv.h"
689 void test_imulw2(int64 op0
, int64 op1
)
691 int64 res
, s1
, s0
, flags
;
701 : "=q" (res
), "=g" (flags
)
702 : "q" (s1
), "0" (res
), "1" (flags
));
703 xxprintf("%-10s A=%016llx B=%016llx R=%016llx CC=%04llx\n",
704 "imulw", s0
, s1
, res
, flags
& CC_MASK
);
707 void test_imull2(int64 op0
, int64 op1
)
720 : "=q" (res
), "=g" (flags
)
721 : "q" (s1
), "0" (res
), "1" (flags
));
722 xxprintf("%-10s A=%016llx B=%08x R=%08x CC=%04llx\n",
723 "imull", s0
, s1
, res
, flags
& CC_MASK
);
726 #define TEST_IMUL_IM(size, size1, op0, op1)\
733 "imul" size " $" #op0 ", %" size1 "2, %" size1 "0\n\t" \
736 : "=r" (res), "=g" (flags)\
737 : "r" (op1), "1" (flags), "0" (res));\
738 xxprintf("%-10s A=%08x B=%08x R=%016llx CC=%04llx\n",\
739 "imul" size, op0, op1, res, flags & CC_MASK);\
742 #define TEST_IMUL_IM_L(op0, op1)\
749 "imul $" #op0 ", %2, %0\n\t" \
752 : "=r" (res64), "=g" (flags)\
753 : "r" (op1), "1" (flags), "0" (res));\
754 xxprintf("%-10s A=%08x B=%08x R=%08x CC=%04llx\n",\
755 "imull", op0, op1, res, flags & CC_MASK);\
763 #include "fb_test_amd64_muldiv.h"
766 #include "fb_test_amd64_muldiv.h"
770 test_imulb(0x1234561d, 4);
772 test_imulb(0x80, 0x80);
773 test_imulb(0x10, 0x10);
776 test_imulw(0, 0xFF, 0xFF);
777 test_imulw(0, 0xFF, 0x100);
778 test_imulw(0, 0x1234001d, 45);
779 test_imulw(0, 23, -45);
780 test_imulw(0, 0x8000, 0x8000);
781 test_imulw(0, 0x100, 0x100);
784 test_imull(0, 0xFFFF, 0xFFFF);
785 test_imull(0, 0xFFFF, 0x10000);
786 test_imull(0, 0x1234001d, 45);
787 test_imull(0, 23, -45);
788 test_imull(0, 0x80000000, 0x80000000);
789 test_imull(0, 0x10000, 0x10000);
791 test_mulb(0x1234561d, 4);
793 test_mulb(0x80, 0x80);
794 test_mulb(0x10, 0x10);
796 test_mulw(0, 0x1234001d, 45);
797 test_mulw(0, 23, -45);
798 test_mulw(0, 0x8000, 0x8000);
799 test_mulw(0, 0x100, 0x100);
801 test_mull(0, 0x1234001d, 45);
802 test_mull(0, 23, -45);
803 test_mull(0, 0x80000000, 0x80000000);
804 test_mull(0, 0x10000, 0x10000);
806 test_imulw2(0x1234001d, 45);
807 test_imulw2(23, -45);
808 test_imulw2(0x8000, 0x8000);
809 test_imulw2(0x100, 0x100);
811 test_imull2(0x1234001d, 45);
812 test_imull2(23, -45);
813 test_imull2(0x80000000, 0x80000000);
814 test_imull2(0x10000, 0x10000);
816 TEST_IMUL_IM("w", "w", 45, 0x1234);
817 TEST_IMUL_IM("w", "w", -45, 23);
818 TEST_IMUL_IM("w", "w", 0x8000, 0x80000000);
819 TEST_IMUL_IM("w", "w", 0x7fff, 0x1000);
821 TEST_IMUL_IM_L(45, 0x1234);
822 TEST_IMUL_IM_L(-45, 23);
823 TEST_IMUL_IM_L(0x8000, 0x80000000);
824 TEST_IMUL_IM_L(0x7fff, 0x1000);
826 test_idivb(0x12341678, 0x127e);
827 test_idivb(0x43210123, -5);
828 test_idivb(0x12340004, -1);
830 test_idivw(0, 0x12345678, 12347);
831 test_idivw(0, -23223, -45);
832 test_idivw(0, 0x12348000, -1);
833 test_idivw(0x12343, 0x12345678, 0x81238567);
835 test_idivl(0, 0x12345678, 12347);
836 test_idivl(0, -233223, -45);
837 test_idivl(0, 0x80000000, -1);
838 test_idivl(0x12343, 0x12345678, 0x81234567);
840 test_idivq(0, 0x12345678, 12347);
841 test_idivq(0, -233223, -45);
842 test_idivq(0, 0x80000000, -1);
843 test_idivq(0x12343, 0x12345678, 0x81234567);
845 test_divb(0x12341678, 0x127e);
846 test_divb(0x43210123, -5);
847 test_divb(0x12340004, -1);
849 test_divw(0, 0x12345678, 12347);
850 test_divw(0, -23223, -45);
851 test_divw(0, 0x12348000, -1);
852 test_divw(0x12343, 0x12345678, 0x81238567);
854 test_divl(0, 0x12345678, 12347);
855 test_divl(0, -233223, -45);
856 test_divl(0, 0x80000000, -1);
857 test_divl(0x12343, 0x12345678, 0x81234567);
859 test_divq(0, 0x12345678, 12347);
860 test_divq(0, -233223, -45);
861 test_divq(0, 0x80000000, -1);
862 test_divq(0x12343, 0x12345678, 0x81234567);
865 #define TEST_BSX(op, size, op0)\
870 "movl $0x12345678, %0\n"\
871 #op " %" size "2, %" size "0 ; setz %b1" \
872 : "=r" (res), "=q" (resz)\
874 xxprintf("%-10s A=%08x R=%08x %d\n", #op, val, res, resz);\
879 TEST_BSX(bsrw
, "w", 0);
880 TEST_BSX(bsrw
, "w", 0x12340128);
881 TEST_BSX(bsrl
, "", 0);
882 TEST_BSX(bsrl
, "", 0x00340128);
883 TEST_BSX(bsfw
, "w", 0);
884 TEST_BSX(bsfw
, "w", 0x12340128);
885 TEST_BSX(bsfl
, "", 0);
886 TEST_BSX(bsfl
, "", 0x00340128);
889 /**********************************************/
891 void test_fops(double a
, double b
)
893 xxprintf("a=%f b=%f a+b=%f\n", a
, b
, a
+ b
);
894 xxprintf("a=%f b=%f a-b=%f\n", a
, b
, a
- b
);
895 xxprintf("a=%f b=%f a*b=%f\n", a
, b
, a
* b
);
896 xxprintf("a=%f b=%f a/b=%f\n", a
, b
, a
/ b
);
897 xxprintf("a=%f b=%f fmod(a, b)=%f\n", a
, b
, fmod(a
, b
));
898 xxprintf("a=%f sqrt(a)=%f\n", a
, sqrt(a
));
899 xxprintf("a=%f sin(a)=%f\n", a
, sin(a
));
900 xxprintf("a=%f cos(a)=%f\n", a
, cos(a
));
901 xxprintf("a=%f tan(a)=%f\n", a
, tan(a
));
902 xxprintf("a=%f log(a)=%f\n", a
, log(a
));
903 xxprintf("a=%f exp(a)=%f\n", a
, exp(a
));
904 xxprintf("a=%f b=%f atan2(a, b)=%f\n", a
, b
, atan2(a
, b
));
905 /* just to test some op combining */
906 xxprintf("a=%f asin(sin(a))=%f\n", a
, asin(sin(a
)));
907 xxprintf("a=%f acos(cos(a))=%f\n", a
, acos(cos(a
)));
908 xxprintf("a=%f atan(tan(a))=%f\n", a
, atan(tan(a
)));
911 void test_fcmp(double a
, double b
)
913 xxprintf("(%f<%f)=%d\n",
915 xxprintf("(%f<=%f)=%d\n",
917 xxprintf("(%f==%f)=%d\n",
919 xxprintf("(%f>%f)=%d\n",
921 xxprintf("(%f<=%f)=%d\n",
924 unsigned long long int rflags
;
925 /* test f(u)comi instruction */
931 xxprintf("fcomi(%f %f)=%016llx\n", a
, b
, rflags
& (CC_Z
| CC_P
| CC_C
));
935 void test_fcvt(double a
)
948 xxprintf("(float)%f = %f\n", a
, fa
);
949 xxprintf("(long double)%f = %Lf\n", a
, la
);
950 xxprintf("a=%016llx\n", *(unsigned long long int *) &a
);
951 xxprintf("la=%016llx %04x\n", *(unsigned long long int *) &la
,
952 *(unsigned short *) ((char *)(&la
) + 8));
954 /* test all roundings */
955 asm volatile ("fstcw %0" : "=m" (fpuc
));
957 short zz
= (fpuc
& ~0x0c00) | (i
<< 10);
958 asm volatile ("fldcw %0" : : "m" (zz
));
959 asm volatile ("fists %0" : "=m" (wa
) : "t" (a
));
960 asm volatile ("fistl %0" : "=m" (ia
) : "t" (a
));
961 asm volatile ("fistpll %0" : "=m" (lla
) : "t" (a
) : "st");
962 asm volatile ("frndint ; fstl %0" : "=m" (ra
) : "t" (a
));
963 asm volatile ("fldcw %0" : : "m" (fpuc
));
964 xxprintf("(short)a = %d\n", wa
);
965 xxprintf("(int)a = %d\n", ia
);
966 xxprintf("(int64_t)a = %lld\n", lla
);
967 xxprintf("rint(a) = %f\n", ra
);
972 asm("fld" #N : "=t" (a)); \
973 xxprintf("fld" #N "= %f\n", a);
975 void test_fconst(void)
987 void test_fbcd(double a
)
989 unsigned short bcd
[5];
992 asm("fbstp %0" : "=m" (bcd
[0]) : "t" (a
) : "st");
993 asm("fbld %1" : "=t" (b
) : "m" (bcd
[0]));
994 xxprintf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n",
995 a
, bcd
[4], bcd
[3], bcd
[2], bcd
[1], bcd
[0], b
);
998 #define TEST_ENV(env, save, restore)\
1000 memset((env), 0xaa, sizeof(*(env)));\
1002 asm volatile ("fldl %0" : : "m" (dtab[i]));\
1003 asm(save " %0\n" : : "m" (*(env)));\
1004 asm(restore " %0\n": : "m" (*(env)));\
1006 asm volatile ("fstpl %0" : "=m" (rtab[i]));\
1008 xxprintf("res[%d]=%f\n", i, rtab[i]);\
1009 xxprintf("fpuc=%04x fpus=%04x fptag=%04x\n",\
1011 (env)->fpus & 0xff00,\
1015 void test_fenv(void)
1017 struct __attribute__((packed
)) {
1024 uint32_t ignored
[4];
1025 long double fpregs
[8];
1034 TEST_ENV(&float_env32
, "fnstenv", "fldenv");
1035 TEST_ENV(&float_env32
, "fnsave", "frstor");
1037 /* test for ffree */
1039 asm volatile ("fldl %0" : : "m" (dtab
[i
]));
1040 asm volatile("ffree %st(2)");
1041 asm volatile ("fnstenv %0\n" : : "m" (float_env32
));
1042 asm volatile ("fninit");
1043 xxprintf("fptag=%04x\n", float_env32
.fptag
);
1047 #define TEST_FCMOV(a, b, rflags, CC)\
1052 "fcmov" CC " %2, %0\n"\
1054 : "0" (a), "u" (b), "g" (rflags));\
1055 xxprintf("fcmov%s rflags=0x%04llx-> %f\n", \
1059 void test_fcmov(void)
1066 for(i
= 0; i
< 4; i
++) {
1072 TEST_FCMOV(a
, b
, rflags
, "b");
1073 TEST_FCMOV(a
, b
, rflags
, "e");
1074 TEST_FCMOV(a
, b
, rflags
, "be");
1075 TEST_FCMOV(a
, b
, rflags
, "nb");
1076 TEST_FCMOV(a
, b
, rflags
, "ne");
1077 TEST_FCMOV(a
, b
, rflags
, "nbe");
1079 TEST_FCMOV(a
, b
, (int64
)0, "u");
1080 TEST_FCMOV(a
, b
, (int64
)CC_P
, "u");
1081 TEST_FCMOV(a
, b
, (int64
)0, "nu");
1082 TEST_FCMOV(a
, b
, (int64
)CC_P
, "nu");
1085 void test_floats(void)
1095 test_fcvt(-1.0/9.0);
1099 // REINSTATE (maybe): test_fbcd(1234567890123456);
1100 // REINSTATE (maybe): test_fbcd(-123451234567890);
1101 // REINSTATE: test_fenv();
1102 // REINSTATE: test_fcmov();
1105 /**********************************************/
1107 #define TEST_XCHG(op, size, opconst)\
1112 asm(#op " %" size "0, %" size "1" \
1113 : "=q" (op0), opconst (op1) \
1114 : "0" (op0), "1" (op1));\
1115 xxprintf("%-10s A=%08x B=%08x\n",\
1119 #define TEST_CMPXCHG(op, size, opconst, eax)\
1124 asm(#op " %" size "0, %" size "1" \
1125 : "=q" (op0), opconst (op1) \
1126 : "0" (op0), "1" (op1), "a" (eax));\
1127 xxprintf("%-10s EAX=%08x A=%08x C=%08x\n",\
1128 #op, eax, op0, op1);\
1132 /**********************************************/
1133 /* segmentation tests */
1135 extern char func_lret32
;
1136 extern char func_iret32
;
1138 uint8_t str_buffer
[4096];
1140 #define TEST_STRING1(OP, size, DF, REP)\
1142 int64 rsi, rdi, rax, rcx, rflags;\
1144 rsi = (long)(str_buffer + sizeof(str_buffer) / 2);\
1145 rdi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\
1149 asm volatile ("pushq $0\n\t"\
1152 REP #OP size "\n\t"\
1156 : "=S" (rsi), "=D" (rdi), "=a" (rax), "=c" (rcx), "=g" (rflags)\
1157 : "0" (rsi), "1" (rdi), "2" (rax), "3" (rcx));\
1158 xxprintf("%-10s ESI=%016llx EDI=%016llx EAX=%016llx ECX=%016llx EFL=%04llx\n",\
1159 REP #OP size, rsi, rdi, rax, rcx,\
1160 rflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\
1163 #define TEST_STRING(OP, REP)\
1164 TEST_STRING1(OP, "b", "", REP);\
1165 TEST_STRING1(OP, "w", "", REP);\
1166 TEST_STRING1(OP, "l", "", REP);\
1167 TEST_STRING1(OP, "b", "std", REP);\
1168 TEST_STRING1(OP, "w", "std", REP);\
1169 TEST_STRING1(OP, "l", "std", REP)
1171 void test_string(void)
1174 for(i
= 0;i
< sizeof(str_buffer
); i
++)
1175 str_buffer
[i
] = i
+ 0x56;
1176 TEST_STRING(stos
, "");
1177 TEST_STRING(stos
, "rep ");
1178 TEST_STRING(lods
, ""); /* to verify stos */
1179 // TEST_STRING(lods, "rep ");
1180 TEST_STRING(movs
, "");
1181 TEST_STRING(movs
, "rep ");
1182 TEST_STRING(lods
, ""); /* to verify stos */
1184 /* XXX: better tests */
1185 TEST_STRING(scas
, "");
1186 TEST_STRING(scas
, "repz ");
1187 TEST_STRING(scas
, "repnz ");
1188 // REINSTATE? TEST_STRING(cmps, "");
1189 TEST_STRING(cmps
, "repz ");
1190 // REINSTATE? TEST_STRING(cmps, "repnz ");
1193 int main(int argc
, char **argv
)
1195 // The three commented out test cases produce different results at different
1196 // compiler optimisation levels. This suggests to me that their inline
1197 // assembly is incorrect. I don't have time to investigate now, though. So
1198 // they are disabled.
1231 // the expected MD5SUM is 66802c845574c7c69f30d29ef85f7ca3