2 /* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}. Does not
3 check the core arithmetic in any detail. This file checks the 16-bit
4 character versions (w is for wide) */
10 typedef unsigned char V128
[16];
11 typedef unsigned int UInt
;
12 typedef signed int Int
;
13 typedef unsigned char UChar
;
14 typedef unsigned long long int ULong
;
16 #define False ((Bool)0)
17 #define True ((Bool)1)
19 void show_V128 ( V128
* vec
)
22 for (i
= 15; i
>= 0; i
--)
23 printf("%02x", (UInt
)( (*vec
)[i
] ));
26 void expand ( V128
* dst
, char* summary
)
29 assert( strlen(summary
) == 16 );
30 for (i
= 0; i
< 16; i
++) {
32 UChar x
= summary
[15-i
];
33 if (x
>= '0' && x
<= '9') { xx
= x
- '0'; }
34 else if (x
>= 'A' && x
<= 'F') { xx
= x
- 'A' + 10; }
35 else if (x
>= 'a' && x
<= 'f') { xx
= x
- 'a' + 10; }
45 void one_test ( char* summL
, ULong rdxIN
, char* summR
, ULong raxIN
)
48 expand( &argL
, summL
);
49 expand( &argR
, summR
);
51 printf("rdx %016llx argL ", rdxIN
);
53 printf(" rax %016llx argR ", raxIN
);
57 ULong block
[ 2/*in:argL*/ // 0 0
58 + 2/*in:argR*/ // 2 16
61 + 2/*inout:xmm0*/ // 6 48
62 + 1/*inout:rcx*/ // 8 64
63 + 1/*out:rflags*/ ]; // 9 72
64 assert(sizeof(block
) == 80);
66 UChar
* blockC
= (UChar
*)&block
[0];
68 /* ---------------- ISTRI_4B ---------------- */
69 memset(blockC
, 0x55, 80);
70 memcpy(blockC
+ 0, &argL
, 16);
71 memcpy(blockC
+ 16, &argR
, 16);
72 memcpy(blockC
+ 24, &rdxIN
, 8);
73 memcpy(blockC
+ 32, &raxIN
, 8);
74 memcpy(blockC
+ 40, &rdxIN
, 8);
76 "movupd 0(%0), %%xmm2" "\n\t"
77 "movupd 16(%0), %%xmm13" "\n\t"
78 "movq 32(%0), %%rdx" "\n\t"
79 "movq 40(%0), %%rax" "\n\t"
80 "movupd 48(%0), %%xmm0" "\n\t"
81 "movw 64(%0), %%cx" "\n\t"
82 "pcmpistri $0x4B, %%xmm2, %%xmm13" "\n\t"
83 "movupd %%xmm0, 48(%0)" "\n\t"
84 "movw %%cx, 64(%0)" "\n\t"
87 "movq %%r15, 72(%0)" "\n\t"
90 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
92 printf(" istri $0x4B: ");
94 show_V128( (V128
*)(blockC
+48) );
95 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
97 /* ---------------- ISTRI_0B ---------------- */
98 memset(blockC
, 0x55, 80);
99 memcpy(blockC
+ 0, &argL
, 16);
100 memcpy(blockC
+ 16, &argR
, 16);
101 memcpy(blockC
+ 24, &rdxIN
, 8);
102 memcpy(blockC
+ 32, &raxIN
, 8);
103 memcpy(blockC
+ 40, &rdxIN
, 8);
104 __asm__
__volatile__(
105 "movupd 0(%0), %%xmm2" "\n\t"
106 "movupd 16(%0), %%xmm13" "\n\t"
107 "movq 32(%0), %%rdx" "\n\t"
108 "movq 40(%0), %%rax" "\n\t"
109 "movupd 48(%0), %%xmm0" "\n\t"
110 "movw 64(%0), %%cx" "\n\t"
111 "pcmpistri $0x0B, %%xmm2, %%xmm13" "\n\t"
112 "movupd %%xmm0, 48(%0)" "\n\t"
113 "movw %%cx, 64(%0)" "\n\t"
116 "movq %%r15, 72(%0)" "\n\t"
119 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
121 printf(" istri $0x0B: ");
123 show_V128( (V128
*)(blockC
+48) );
124 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
126 /* ---------------- ISTRM_4B ---------------- */
127 memset(blockC
, 0x55, 80);
128 memcpy(blockC
+ 0, &argL
, 16);
129 memcpy(blockC
+ 16, &argR
, 16);
130 memcpy(blockC
+ 24, &rdxIN
, 8);
131 memcpy(blockC
+ 32, &raxIN
, 8);
132 memcpy(blockC
+ 40, &rdxIN
, 8);
133 __asm__
__volatile__(
134 "movupd 0(%0), %%xmm2" "\n\t"
135 "movupd 16(%0), %%xmm13" "\n\t"
136 "movq 32(%0), %%rdx" "\n\t"
137 "movq 40(%0), %%rax" "\n\t"
138 "movupd 48(%0), %%xmm0" "\n\t"
139 "movw 64(%0), %%cx" "\n\t"
140 "pcmpistrm $0x4B, %%xmm2, %%xmm13" "\n\t"
141 "movupd %%xmm0, 48(%0)" "\n\t"
142 "movw %%cx, 64(%0)" "\n\t"
145 "movq %%r15, 72(%0)" "\n\t"
148 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
150 printf(" istrm $0x4B: ");
152 show_V128( (V128
*)(blockC
+48) );
153 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
155 /* ---------------- ISTRM_0B ---------------- */
156 memset(blockC
, 0x55, 80);
157 memcpy(blockC
+ 0, &argL
, 16);
158 memcpy(blockC
+ 16, &argR
, 16);
159 memcpy(blockC
+ 24, &rdxIN
, 8);
160 memcpy(blockC
+ 32, &raxIN
, 8);
161 memcpy(blockC
+ 40, &rdxIN
, 8);
162 __asm__
__volatile__(
163 "movupd 0(%0), %%xmm2" "\n\t"
164 "movupd 16(%0), %%xmm13" "\n\t"
165 "movq 32(%0), %%rdx" "\n\t"
166 "movq 40(%0), %%rax" "\n\t"
167 "movupd 48(%0), %%xmm0" "\n\t"
168 "movw 64(%0), %%cx" "\n\t"
169 "pcmpistrm $0x0B, %%xmm2, %%xmm13" "\n\t"
170 "movupd %%xmm0, 48(%0)" "\n\t"
171 "movw %%cx, 64(%0)" "\n\t"
174 "movq %%r15, 72(%0)" "\n\t"
177 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
179 printf(" istrm $0x0B: ");
181 show_V128( (V128
*)(blockC
+48) );
182 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
184 /* ---------------- ESTRI_4B ---------------- */
185 memset(blockC
, 0x55, 80);
186 memcpy(blockC
+ 0, &argL
, 16);
187 memcpy(blockC
+ 16, &argR
, 16);
188 memcpy(blockC
+ 24, &rdxIN
, 8);
189 memcpy(blockC
+ 32, &raxIN
, 8);
190 memcpy(blockC
+ 40, &rdxIN
, 8);
191 __asm__
__volatile__(
192 "movupd 0(%0), %%xmm2" "\n\t"
193 "movupd 16(%0), %%xmm13" "\n\t"
194 "movq 32(%0), %%rdx" "\n\t"
195 "movq 40(%0), %%rax" "\n\t"
196 "movupd 48(%0), %%xmm0" "\n\t"
197 "movw 64(%0), %%cx" "\n\t"
198 "pcmpestri $0x4B, %%xmm2, %%xmm13" "\n\t"
199 "movupd %%xmm0, 48(%0)" "\n\t"
200 "movw %%cx, 64(%0)" "\n\t"
203 "movq %%r15, 72(%0)" "\n\t"
206 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
208 printf(" estri $0x4B: ");
210 show_V128( (V128
*)(blockC
+48) );
211 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
213 /* ---------------- ESTRI_0B ---------------- */
214 memset(blockC
, 0x55, 80);
215 memcpy(blockC
+ 0, &argL
, 16);
216 memcpy(blockC
+ 16, &argR
, 16);
217 memcpy(blockC
+ 24, &rdxIN
, 8);
218 memcpy(blockC
+ 32, &raxIN
, 8);
219 memcpy(blockC
+ 40, &rdxIN
, 8);
220 __asm__
__volatile__(
221 "movupd 0(%0), %%xmm2" "\n\t"
222 "movupd 16(%0), %%xmm13" "\n\t"
223 "movq 32(%0), %%rdx" "\n\t"
224 "movq 40(%0), %%rax" "\n\t"
225 "movupd 48(%0), %%xmm0" "\n\t"
226 "movw 64(%0), %%cx" "\n\t"
227 "pcmpestri $0x0B, %%xmm2, %%xmm13" "\n\t"
228 "movupd %%xmm0, 48(%0)" "\n\t"
229 "movw %%cx, 64(%0)" "\n\t"
232 "movq %%r15, 72(%0)" "\n\t"
235 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
237 printf(" estri $0x0B: ");
239 show_V128( (V128
*)(blockC
+48) );
240 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
242 /* ---------------- ESTRM_4B ---------------- */
243 memset(blockC
, 0x55, 80);
244 memcpy(blockC
+ 0, &argL
, 16);
245 memcpy(blockC
+ 16, &argR
, 16);
246 memcpy(blockC
+ 24, &rdxIN
, 8);
247 memcpy(blockC
+ 32, &raxIN
, 8);
248 memcpy(blockC
+ 40, &rdxIN
, 8);
249 __asm__
__volatile__(
250 "movupd 0(%0), %%xmm2" "\n\t"
251 "movupd 16(%0), %%xmm13" "\n\t"
252 "movq 32(%0), %%rdx" "\n\t"
253 "movq 40(%0), %%rax" "\n\t"
254 "movupd 48(%0), %%xmm0" "\n\t"
255 "movw 64(%0), %%cx" "\n\t"
256 "pcmpestrm $0x4B, %%xmm2, %%xmm13" "\n\t"
257 "movupd %%xmm0, 48(%0)" "\n\t"
258 "movw %%cx, 64(%0)" "\n\t"
261 "movq %%r15, 72(%0)" "\n\t"
264 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
266 printf(" estrm $0x4B: ");
268 show_V128( (V128
*)(blockC
+48) );
269 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
271 /* ---------------- ESTRM_0B ---------------- */
272 memset(blockC
, 0x55, 80);
273 memcpy(blockC
+ 0, &argL
, 16);
274 memcpy(blockC
+ 16, &argR
, 16);
275 memcpy(blockC
+ 24, &rdxIN
, 8);
276 memcpy(blockC
+ 32, &raxIN
, 8);
277 memcpy(blockC
+ 40, &rdxIN
, 8);
278 __asm__
__volatile__(
279 "movupd 0(%0), %%xmm2" "\n\t"
280 "movupd 16(%0), %%xmm13" "\n\t"
281 "movq 32(%0), %%rdx" "\n\t"
282 "movq 40(%0), %%rax" "\n\t"
283 "movupd 48(%0), %%xmm0" "\n\t"
284 "movw 64(%0), %%cx" "\n\t"
285 "pcmpestrm $0x0B, %%xmm2, %%xmm13" "\n\t"
286 "movupd %%xmm0, 48(%0)" "\n\t"
287 "movw %%cx, 64(%0)" "\n\t"
290 "movq %%r15, 72(%0)" "\n\t"
293 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
295 printf(" estrm $0x0B: ");
297 show_V128( (V128
*)(blockC
+48) );
298 printf(" rcx %016llx flags %08llx\n", block
[8], block
[9] & 0x8D5);
307 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa00aaaaaa", 0 );
308 one_test("0000000000000000", 0, "aaaaaaaa00aaaaaa", 0 );
310 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
311 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
312 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
314 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
315 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
316 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
317 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
319 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
320 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
321 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
322 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
324 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
325 one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
326 one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
327 one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
329 one_test("aaaaaaaaaaaaaaaa", -5, "aaaaaaaaaaaaaaaa", 6 );
330 one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
331 one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
332 one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );