2 /* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}. Does not
3 check the core arithmetic in any detail. */
9 typedef unsigned char V128
[16];
10 typedef unsigned int UInt
;
11 typedef signed int Int
;
12 typedef unsigned char UChar
;
13 typedef unsigned long long int ULong
;
15 #define False ((Bool)0)
16 #define True ((Bool)1)
18 void show_V128 ( V128
* vec
)
21 for (i
= 15; i
>= 0; i
--)
22 printf("%02x", (UInt
)( (*vec
)[i
] ));
25 void expand ( V128
* dst
, char* summary
)
28 assert( strlen(summary
) == 16 );
29 for (i
= 0; i
< 16; i
++) {
31 UChar x
= summary
[15-i
];
32 if (x
>= '0' && x
<= '9') { xx
= x
- '0'; }
33 else if (x
>= 'A' && x
<= 'F') { xx
= x
- 'A' + 10; }
34 else if (x
>= 'a' && x
<= 'f') { xx
= x
- 'a' + 10; }
44 void one_test ( char* summL
, ULong rdxIN
, char* summR
, ULong raxIN
)
47 expand( &argL
, summL
);
48 expand( &argR
, summR
);
50 printf("rdx %016llx argL ", rdxIN
);
52 printf(" rax %016llx argR ", raxIN
);
56 ULong block
[ 2/*in:argL*/ // 0 0
57 + 2/*in:argR*/ // 2 16
60 + 2/*inout:xmm0*/ // 6 48
61 + 1/*inout:rcx*/ // 8 64
62 + 1/*out:rflags*/ ]; // 9 72
63 assert(sizeof(block
) == 80);
65 UChar
* blockC
= (UChar
*)&block
[0];
67 /* ---------------- ISTRI_4A ---------------- */
68 memset(blockC
, 0x55, 80);
69 memcpy(blockC
+ 0, &argL
, 16);
70 memcpy(blockC
+ 16, &argR
, 16);
71 memcpy(blockC
+ 24, &rdxIN
, 8);
72 memcpy(blockC
+ 32, &raxIN
, 8);
73 memcpy(blockC
+ 40, &rdxIN
, 8);
75 "movupd 0(%0), %%xmm2" "\n\t"
76 "movupd 16(%0), %%xmm13" "\n\t"
77 "movq 32(%0), %%rdx" "\n\t"
78 "movq 40(%0), %%rax" "\n\t"
79 "movupd 48(%0), %%xmm0" "\n\t"
80 "movw 64(%0), %%cx" "\n\t"
81 "pcmpistri $0x4A, %%xmm2, %%xmm13" "\n\t"
82 "movupd %%xmm0, 48(%0)" "\n\t"
83 "movw %%cx, 64(%0)" "\n\t"
86 "movq %%r15, 72(%0)" "\n\t"
89 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
91 printf(" istri $0x4A: ");
93 show_V128( (V128
*)(blockC
+48) );
94 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
96 /* ---------------- ISTRI_0A ---------------- */
97 memset(blockC
, 0x55, 80);
98 memcpy(blockC
+ 0, &argL
, 16);
99 memcpy(blockC
+ 16, &argR
, 16);
100 memcpy(blockC
+ 24, &rdxIN
, 8);
101 memcpy(blockC
+ 32, &raxIN
, 8);
102 memcpy(blockC
+ 40, &rdxIN
, 8);
103 __asm__
__volatile__(
104 "movupd 0(%0), %%xmm2" "\n\t"
105 "movupd 16(%0), %%xmm13" "\n\t"
106 "movq 32(%0), %%rdx" "\n\t"
107 "movq 40(%0), %%rax" "\n\t"
108 "movupd 48(%0), %%xmm0" "\n\t"
109 "movw 64(%0), %%cx" "\n\t"
110 "pcmpistri $0x0A, %%xmm2, %%xmm13" "\n\t"
111 "movupd %%xmm0, 48(%0)" "\n\t"
112 "movw %%cx, 64(%0)" "\n\t"
115 "movq %%r15, 72(%0)" "\n\t"
118 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
120 printf(" istri $0x0A: ");
122 show_V128( (V128
*)(blockC
+48) );
123 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
125 /* ---------------- ISTRM_4A ---------------- */
126 memset(blockC
, 0x55, 80);
127 memcpy(blockC
+ 0, &argL
, 16);
128 memcpy(blockC
+ 16, &argR
, 16);
129 memcpy(blockC
+ 24, &rdxIN
, 8);
130 memcpy(blockC
+ 32, &raxIN
, 8);
131 memcpy(blockC
+ 40, &rdxIN
, 8);
132 __asm__
__volatile__(
133 "movupd 0(%0), %%xmm2" "\n\t"
134 "movupd 16(%0), %%xmm13" "\n\t"
135 "movq 32(%0), %%rdx" "\n\t"
136 "movq 40(%0), %%rax" "\n\t"
137 "movupd 48(%0), %%xmm0" "\n\t"
138 "movw 64(%0), %%cx" "\n\t"
139 "pcmpistrm $0x4A, %%xmm2, %%xmm13" "\n\t"
140 "movupd %%xmm0, 48(%0)" "\n\t"
141 "movw %%cx, 64(%0)" "\n\t"
144 "movq %%r15, 72(%0)" "\n\t"
147 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
149 printf(" istrm $0x4A: ");
151 show_V128( (V128
*)(blockC
+48) );
152 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
154 /* ---------------- ISTRM_0A ---------------- */
155 memset(blockC
, 0x55, 80);
156 memcpy(blockC
+ 0, &argL
, 16);
157 memcpy(blockC
+ 16, &argR
, 16);
158 memcpy(blockC
+ 24, &rdxIN
, 8);
159 memcpy(blockC
+ 32, &raxIN
, 8);
160 memcpy(blockC
+ 40, &rdxIN
, 8);
161 __asm__
__volatile__(
162 "movupd 0(%0), %%xmm2" "\n\t"
163 "movupd 16(%0), %%xmm13" "\n\t"
164 "movq 32(%0), %%rdx" "\n\t"
165 "movq 40(%0), %%rax" "\n\t"
166 "movupd 48(%0), %%xmm0" "\n\t"
167 "movw 64(%0), %%cx" "\n\t"
168 "pcmpistrm $0x0A, %%xmm2, %%xmm13" "\n\t"
169 "movupd %%xmm0, 48(%0)" "\n\t"
170 "movw %%cx, 64(%0)" "\n\t"
173 "movq %%r15, 72(%0)" "\n\t"
176 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
178 printf(" istrm $0x0A: ");
180 show_V128( (V128
*)(blockC
+48) );
181 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
183 /* ---------------- ESTRI_4A ---------------- */
184 memset(blockC
, 0x55, 80);
185 memcpy(blockC
+ 0, &argL
, 16);
186 memcpy(blockC
+ 16, &argR
, 16);
187 memcpy(blockC
+ 24, &rdxIN
, 8);
188 memcpy(blockC
+ 32, &raxIN
, 8);
189 memcpy(blockC
+ 40, &rdxIN
, 8);
190 __asm__
__volatile__(
191 "movupd 0(%0), %%xmm2" "\n\t"
192 "movupd 16(%0), %%xmm13" "\n\t"
193 "movq 32(%0), %%rdx" "\n\t"
194 "movq 40(%0), %%rax" "\n\t"
195 "movupd 48(%0), %%xmm0" "\n\t"
196 "movw 64(%0), %%cx" "\n\t"
197 "pcmpestri $0x4A, %%xmm2, %%xmm13" "\n\t"
198 "movupd %%xmm0, 48(%0)" "\n\t"
199 "movw %%cx, 64(%0)" "\n\t"
202 "movq %%r15, 72(%0)" "\n\t"
205 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
207 printf(" estri $0x4A: ");
209 show_V128( (V128
*)(blockC
+48) );
210 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
212 /* ---------------- ESTRI_0A ---------------- */
213 memset(blockC
, 0x55, 80);
214 memcpy(blockC
+ 0, &argL
, 16);
215 memcpy(blockC
+ 16, &argR
, 16);
216 memcpy(blockC
+ 24, &rdxIN
, 8);
217 memcpy(blockC
+ 32, &raxIN
, 8);
218 memcpy(blockC
+ 40, &rdxIN
, 8);
219 __asm__
__volatile__(
220 "movupd 0(%0), %%xmm2" "\n\t"
221 "movupd 16(%0), %%xmm13" "\n\t"
222 "movq 32(%0), %%rdx" "\n\t"
223 "movq 40(%0), %%rax" "\n\t"
224 "movupd 48(%0), %%xmm0" "\n\t"
225 "movw 64(%0), %%cx" "\n\t"
226 "pcmpestri $0x0A, %%xmm2, %%xmm13" "\n\t"
227 "movupd %%xmm0, 48(%0)" "\n\t"
228 "movw %%cx, 64(%0)" "\n\t"
231 "movq %%r15, 72(%0)" "\n\t"
234 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
236 printf(" estri $0x0A: ");
238 show_V128( (V128
*)(blockC
+48) );
239 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
241 /* ---------------- ESTRM_4A ---------------- */
242 memset(blockC
, 0x55, 80);
243 memcpy(blockC
+ 0, &argL
, 16);
244 memcpy(blockC
+ 16, &argR
, 16);
245 memcpy(blockC
+ 24, &rdxIN
, 8);
246 memcpy(blockC
+ 32, &raxIN
, 8);
247 memcpy(blockC
+ 40, &rdxIN
, 8);
248 __asm__
__volatile__(
249 "movupd 0(%0), %%xmm2" "\n\t"
250 "movupd 16(%0), %%xmm13" "\n\t"
251 "movq 32(%0), %%rdx" "\n\t"
252 "movq 40(%0), %%rax" "\n\t"
253 "movupd 48(%0), %%xmm0" "\n\t"
254 "movw 64(%0), %%cx" "\n\t"
255 "pcmpestrm $0x4A, %%xmm2, %%xmm13" "\n\t"
256 "movupd %%xmm0, 48(%0)" "\n\t"
257 "movw %%cx, 64(%0)" "\n\t"
260 "movq %%r15, 72(%0)" "\n\t"
263 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
265 printf(" estrm $0x4A: ");
267 show_V128( (V128
*)(blockC
+48) );
268 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
270 /* ---------------- ESTRM_0A ---------------- */
271 memset(blockC
, 0x55, 80);
272 memcpy(blockC
+ 0, &argL
, 16);
273 memcpy(blockC
+ 16, &argR
, 16);
274 memcpy(blockC
+ 24, &rdxIN
, 8);
275 memcpy(blockC
+ 32, &raxIN
, 8);
276 memcpy(blockC
+ 40, &rdxIN
, 8);
277 __asm__
__volatile__(
278 "movupd 0(%0), %%xmm2" "\n\t"
279 "movupd 16(%0), %%xmm13" "\n\t"
280 "movq 32(%0), %%rdx" "\n\t"
281 "movq 40(%0), %%rax" "\n\t"
282 "movupd 48(%0), %%xmm0" "\n\t"
283 "movw 64(%0), %%cx" "\n\t"
284 "pcmpestrm $0x0A, %%xmm2, %%xmm13" "\n\t"
285 "movupd %%xmm0, 48(%0)" "\n\t"
286 "movw %%cx, 64(%0)" "\n\t"
289 "movq %%r15, 72(%0)" "\n\t"
292 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
294 printf(" estrm $0x0A: ");
296 show_V128( (V128
*)(blockC
+48) );
297 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
306 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 );
307 one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 );
309 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
310 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
311 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
313 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
314 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
315 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
316 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
318 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
319 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
320 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
321 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
323 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
324 one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
325 one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
326 one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
328 one_test("aaaaaaaaaaaaaaaa", -5, "aaaaaaaaaaaaaaaa", 6 );
329 one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
330 one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
331 one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );