none/tests/fdleak_cmsg_supp.supp: Add suppressions for older glibc
[valgrind.git] / none / tests / amd64 / pcmpstr64.c
blob9feae45935570e065c92987bf7f996c13c574116
2 /* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
3 pcmpistri to drive it. Does not check the e-vs-i or i-vs-m
4 aspect. */
6 #include <string.h>
7 #include <stdio.h>
8 #include <assert.h>
10 typedef unsigned int UInt;
11 typedef signed int Int;
12 typedef unsigned char UChar;
13 typedef signed char Char;
14 typedef unsigned long long int ULong;
15 typedef UChar Bool;
16 #define False ((Bool)0)
17 #define True ((Bool)1)
19 //typedef unsigned char V128[16];
20 typedef
21 union {
22 UChar uChar[16];
23 UInt uInt[4];
25 V128;
27 #define SHIFT_O 11
28 #define SHIFT_S 7
29 #define SHIFT_Z 6
30 #define SHIFT_A 4
31 #define SHIFT_C 0
32 #define SHIFT_P 2
34 #define MASK_O (1ULL << SHIFT_O)
35 #define MASK_S (1ULL << SHIFT_S)
36 #define MASK_Z (1ULL << SHIFT_Z)
37 #define MASK_A (1ULL << SHIFT_A)
38 #define MASK_C (1ULL << SHIFT_C)
39 #define MASK_P (1ULL << SHIFT_P)
42 UInt clz32 ( UInt x )
44 Int y, m, n;
45 y = -(x >> 16);
46 m = (y >> 16) & 16;
47 n = 16 - m;
48 x = x >> m;
49 y = x - 0x100;
50 m = (y >> 16) & 8;
51 n = n + m;
52 x = x << m;
53 y = x - 0x1000;
54 m = (y >> 16) & 4;
55 n = n + m;
56 x = x << m;
57 y = x - 0x4000;
58 m = (y >> 16) & 2;
59 n = n + m;
60 x = x << m;
61 y = x >> 14;
62 m = y & ~(y >> 1);
63 return n + 2 - m;
66 UInt ctz32 ( UInt x )
68 return 32 - clz32((~x) & (x-1));
71 void expand ( V128* dst, char* summary )
73 Int i;
74 assert( strlen(summary) == 16 );
75 for (i = 0; i < 16; i++) {
76 UChar xx = 0;
77 UChar x = summary[15-i];
78 if (x >= '0' && x <= '9') { xx = x - '0'; }
79 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
80 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
81 else assert(0);
83 assert(xx < 16);
84 xx = (xx << 4) | xx;
85 assert(xx < 256);
86 dst->uChar[i] = xx;
90 void try_istri ( char* which,
91 UInt(*h_fn)(V128*,V128*),
92 UInt(*s_fn)(V128*,V128*),
93 char* summL, char* summR )
95 assert(strlen(which) == 2);
96 V128 argL, argR;
97 expand(&argL, summL);
98 expand(&argR, summR);
99 UInt h_res = h_fn(&argL, &argR);
100 UInt s_res = s_fn(&argL, &argR);
101 printf("istri %s %s %s -> %08x %08x %s\n",
102 which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
105 UInt zmask_from_V128 ( V128* arg )
107 UInt i, res = 0;
108 for (i = 0; i < 16; i++) {
109 res |= ((arg->uChar[i] == 0) ? 1 : 0) << i;
111 return res;
114 //////////////////////////////////////////////////////////
115 // //
116 // GENERAL //
117 // //
118 //////////////////////////////////////////////////////////
121 /* Given partial results from a pcmpXstrX operation (intRes1,
122 basically), generate an I format (index value for ECX) output, and
123 also the new OSZACP flags.
125 static
126 void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
127 /*OUT*/UInt* resOSZACP,
128 UInt intRes1,
129 UInt zmaskL, UInt zmaskR,
130 UInt validL,
131 UInt pol, UInt idx )
133 assert((pol >> 2) == 0);
134 assert((idx >> 1) == 0);
136 UInt intRes2 = 0;
137 switch (pol) {
138 case 0: intRes2 = intRes1; break; // pol +
139 case 1: intRes2 = ~intRes1; break; // pol -
140 case 2: intRes2 = intRes1; break; // pol m+
141 case 3: intRes2 = intRes1 ^ validL; break; // pol m-
143 intRes2 &= 0xFFFF;
145 // generate ecx value
146 UInt newECX = 0;
147 if (idx) {
148 // index of ms-1-bit
149 newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
150 } else {
151 // index of ls-1-bit
152 newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
155 *(UInt*)(&resV[0]) = newECX;
157 // generate new flags, common to all ISTRI and ISTRM cases
158 *resOSZACP // A, P are zero
159 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
160 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
161 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
162 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
166 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
167 variants.
169 For xSTRI variants, the new ECX value is placed in the 32 bits
170 pointed to by *resV. For xSTRM variants, the result is a 128 bit
171 value and is placed at *resV in the obvious way.
173 For all variants, the new OSZACP value is placed at *resOSZACP.
175 argLV and argRV are the vector args. The caller must prepare a
176 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
177 must be 1 for each zero byte of of the respective arg. For ESTRx
178 variants this is derived from the explicit length indication, and
179 must be 0 in all places except at the bit index corresponding to
180 the valid length (0 .. 16). If the valid length is 16 then the
181 mask must be all zeroes. In all cases, bits 31:16 must be zero.
183 imm8 is the original immediate from the instruction. isSTRM
184 indicates whether this is a xSTRM or xSTRI variant, which controls
185 how much of *res is written.
187 If the given imm8 case can be handled, the return value is True.
188 If not, False is returned, and neither *res not *resOSZACP are
189 altered.
192 Bool pcmpXstrX_WRK ( /*OUT*/V128* resV,
193 /*OUT*/UInt* resOSZACP,
194 V128* argLV, V128* argRV,
195 UInt zmaskL, UInt zmaskR,
196 UInt imm8, Bool isSTRM )
198 assert(imm8 < 0x80);
199 assert((zmaskL >> 16) == 0);
200 assert((zmaskR >> 16) == 0);
202 /* Explicitly reject any imm8 values that haven't been validated,
203 even if they would probably work. Life is too short to have
204 unvalidated cases in the code base. */
205 switch (imm8) {
206 case 0x00: case 0x02:
207 case 0x08: case 0x0A: case 0x0C: case 0x0E:
208 case 0x10: case 0x12: case 0x14:
209 case 0x18: case 0x1A:
210 case 0x30: case 0x34:
211 case 0x38: case 0x3A:
212 case 0x40: case 0x42: case 0x44: case 0x46:
213 case 0x4A:
214 case 0x62:
215 case 0x70: case 0x72:
216 break;
217 default:
218 return False;
221 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format
222 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn
223 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity
224 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask
226 /*----------------------------------------*/
227 /*-- strcmp on byte data --*/
228 /*----------------------------------------*/
230 if (agg == 2/*equal each, aka strcmp*/
231 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
232 && !isSTRM) {
233 Int i;
234 UChar* argL = (UChar*)argLV;
235 UChar* argR = (UChar*)argRV;
236 UInt boolResII = 0;
237 for (i = 15; i >= 0; i--) {
238 UChar cL = argL[i];
239 UChar cR = argR[i];
240 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
242 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
243 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
245 // do invalidation, common to all equal-each cases
246 UInt intRes1
247 = (boolResII & validL & validR) // if both valid, use cmpres
248 | (~ (validL | validR)); // if both invalid, force 1
249 // else force 0
250 intRes1 &= 0xFFFF;
252 // generate I-format output
253 pcmpXstrX_WRK_gen_output_fmt_I(
254 resV, resOSZACP,
255 intRes1, zmaskL, zmaskR, validL, pol, idx
258 return True;
261 /*----------------------------------------*/
262 /*-- set membership on byte data --*/
263 /*----------------------------------------*/
265 if (agg == 0/*equal any, aka find chars in a set*/
266 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
267 && !isSTRM) {
268 /* argL: the string, argR: charset */
269 UInt si, ci;
270 UChar* argL = (UChar*)argLV;
271 UChar* argR = (UChar*)argRV;
272 UInt boolRes = 0;
273 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
274 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
276 for (si = 0; si < 16; si++) {
277 if ((validL & (1 << si)) == 0)
278 // run off the end of the string.
279 break;
280 UInt m = 0;
281 for (ci = 0; ci < 16; ci++) {
282 if ((validR & (1 << ci)) == 0) break;
283 if (argR[ci] == argL[si]) { m = 1; break; }
285 boolRes |= (m << si);
288 // boolRes is "pre-invalidated"
289 UInt intRes1 = boolRes & 0xFFFF;
291 // generate I-format output
292 pcmpXstrX_WRK_gen_output_fmt_I(
293 resV, resOSZACP,
294 intRes1, zmaskL, zmaskR, validL, pol, idx
297 return True;
300 /*----------------------------------------*/
301 /*-- substring search on byte data --*/
302 /*----------------------------------------*/
304 if (agg == 3/*equal ordered, aka substring search*/
305 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
306 && !isSTRM) {
308 /* argL: haystack, argR: needle */
309 UInt ni, hi;
310 UChar* argL = (UChar*)argLV;
311 UChar* argR = (UChar*)argRV;
312 UInt boolRes = 0;
313 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
314 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
315 for (hi = 0; hi < 16; hi++) {
316 UInt m = 1;
317 for (ni = 0; ni < 16; ni++) {
318 if ((validR & (1 << ni)) == 0) break;
319 UInt i = ni + hi;
320 if (i >= 16) break;
321 if (argL[i] != argR[ni]) { m = 0; break; }
323 boolRes |= (m << hi);
324 if ((validL & (1 << hi)) == 0)
325 // run off the end of the haystack
326 break;
329 // boolRes is "pre-invalidated"
330 UInt intRes1 = boolRes & 0xFFFF;
332 // generate I-format output
333 pcmpXstrX_WRK_gen_output_fmt_I(
334 resV, resOSZACP,
335 intRes1, zmaskL, zmaskR, validL, pol, idx
338 return True;
341 /*----------------------------------------*/
342 /*-- ranges, unsigned byte data --*/
343 /*----------------------------------------*/
345 if (agg == 1/*ranges*/
346 && fmt == 0/*ub*/
347 && !isSTRM) {
349 /* argL: string, argR: range-pairs */
350 UInt ri, si;
351 UChar* argL = (UChar*)argLV;
352 UChar* argR = (UChar*)argRV;
353 UInt boolRes = 0;
354 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
355 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
356 for (si = 0; si < 16; si++) {
357 if ((validL & (1 << si)) == 0)
358 // run off the end of the string
359 break;
360 UInt m = 0;
361 for (ri = 0; ri < 16; ri += 2) {
362 if ((validR & (3 << ri)) != (3 << ri)) break;
363 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
364 m = 1; break;
367 boolRes |= (m << si);
370 // boolRes is "pre-invalidated"
371 UInt intRes1 = boolRes & 0xFFFF;
373 // generate I-format output
374 pcmpXstrX_WRK_gen_output_fmt_I(
375 resV, resOSZACP,
376 intRes1, zmaskL, zmaskR, validL, pol, idx
379 return True;
382 /*----------------------------------------*/
383 /*-- ranges, signed byte data --*/
384 /*----------------------------------------*/
386 if (agg == 1/*ranges*/
387 && fmt == 2/*sb*/
388 && !isSTRM) {
390 /* argL: string, argR: range-pairs */
391 UInt ri, si;
392 Char* argL = (Char*)argLV;
393 Char* argR = (Char*)argRV;
394 UInt boolRes = 0;
395 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
396 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
397 for (si = 0; si < 16; si++) {
398 if ((validL & (1 << si)) == 0)
399 // run off the end of the string
400 break;
401 UInt m = 0;
402 for (ri = 0; ri < 16; ri += 2) {
403 if ((validR & (3 << ri)) != (3 << ri)) break;
404 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
405 m = 1; break;
408 boolRes |= (m << si);
411 // boolRes is "pre-invalidated"
412 UInt intRes1 = boolRes & 0xFFFF;
414 // generate I-format output
415 pcmpXstrX_WRK_gen_output_fmt_I(
416 resV, resOSZACP,
417 intRes1, zmaskL, zmaskR, validL, pol, idx
420 return True;
423 return False;
427 //////////////////////////////////////////////////////////
428 // //
429 // ISTRI_4A //
430 // //
431 //////////////////////////////////////////////////////////
433 UInt h_pcmpistri_4A ( V128* argL, V128* argR )
435 V128 block[2];
436 memcpy(&block[0], argL, sizeof(V128));
437 memcpy(&block[1], argR, sizeof(V128));
438 ULong res, flags;
439 __asm__ __volatile__(
440 "subq $1024, %%rsp" "\n\t"
441 "movdqu 0(%2), %%xmm2" "\n\t"
442 "movdqu 16(%2), %%xmm11" "\n\t"
443 "pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t"
444 "pushfq" "\n\t"
445 "popq %%rdx" "\n\t"
446 "movq %%rcx, %0" "\n\t"
447 "movq %%rdx, %1" "\n\t"
448 "addq $1024, %%rsp" "\n\t"
449 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
450 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
452 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
455 UInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
457 V128 resV;
458 UInt resOSZACP, resECX;
459 Bool ok
460 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
461 zmask_from_V128(argLU),
462 zmask_from_V128(argRU),
463 0x4A, False/*!isSTRM*/
465 assert(ok);
466 resECX = resV.uInt[0];
467 return (resOSZACP << 16) | resECX;
470 void istri_4A ( void )
472 char* wot = "4A";
473 UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
474 UInt(*s)(V128*,V128*) = s_pcmpistri_4A;
476 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
478 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
479 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
480 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
481 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
483 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
484 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
485 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
487 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
488 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
489 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
490 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
492 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
493 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
494 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
496 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
498 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
499 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
500 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
502 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
503 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
504 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
506 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
507 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
508 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
510 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
511 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
512 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
514 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
515 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
518 //////////////////////////////////////////////////////////
519 // //
520 // ISTRI_3A //
521 // //
522 //////////////////////////////////////////////////////////
524 UInt h_pcmpistri_3A ( V128* argL, V128* argR )
526 V128 block[2];
527 memcpy(&block[0], argL, sizeof(V128));
528 memcpy(&block[1], argR, sizeof(V128));
529 ULong res, flags;
530 __asm__ __volatile__(
531 "subq $1024, %%rsp" "\n\t"
532 "movdqu 0(%2), %%xmm2" "\n\t"
533 "movdqu 16(%2), %%xmm11" "\n\t"
534 "pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t"
535 "pushfq" "\n\t"
536 "popq %%rdx" "\n\t"
537 "movq %%rcx, %0" "\n\t"
538 "movq %%rdx, %1" "\n\t"
539 "addq $1024, %%rsp" "\n\t"
540 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
541 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
543 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
546 UInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
548 V128 resV;
549 UInt resOSZACP, resECX;
550 Bool ok
551 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
552 zmask_from_V128(argLU),
553 zmask_from_V128(argRU),
554 0x3A, False/*!isSTRM*/
556 assert(ok);
557 resECX = resV.uInt[0];
558 return (resOSZACP << 16) | resECX;
561 void istri_3A ( void )
563 char* wot = "3A";
564 UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
565 UInt(*s)(V128*,V128*) = s_pcmpistri_3A;
567 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
569 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
570 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
571 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
572 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
574 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
575 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
576 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
578 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
579 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
580 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
581 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
583 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
584 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
585 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
587 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
589 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
590 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
591 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
593 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
594 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
595 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
597 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
598 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
599 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
601 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
602 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
603 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
605 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
606 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
611 //////////////////////////////////////////////////////////
612 // //
613 // ISTRI_0C //
614 // //
615 //////////////////////////////////////////////////////////
617 __attribute__((noinline))
618 UInt h_pcmpistri_0C ( V128* argL, V128* argR )
620 V128 block[2];
621 memcpy(&block[0], argL, sizeof(V128));
622 memcpy(&block[1], argR, sizeof(V128));
623 ULong res = 0, flags = 0;
624 __asm__ __volatile__(
625 "movdqu 0(%2), %%xmm2" "\n\t"
626 "movdqu 16(%2), %%xmm11" "\n\t"
627 "pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t"
628 //"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t"
629 //"movd %%xmm0, %%ecx" "\n\t"
630 "pushfq" "\n\t"
631 "popq %%rdx" "\n\t"
632 "movq %%rcx, %0" "\n\t"
633 "movq %%rdx, %1" "\n\t"
634 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
635 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
637 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
640 UInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
642 V128 resV;
643 UInt resOSZACP, resECX;
644 Bool ok
645 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
646 zmask_from_V128(argLU),
647 zmask_from_V128(argRU),
648 0x0C, False/*!isSTRM*/
650 assert(ok);
651 resECX = resV.uInt[0];
652 return (resOSZACP << 16) | resECX;
655 void istri_0C ( void )
657 char* wot = "0C";
658 UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
659 UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
661 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
663 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
665 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
666 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
667 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
669 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
671 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
672 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
673 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
674 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
675 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
677 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
678 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
679 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
681 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
682 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
684 try_istri(wot,h,s, "1111111111111234", "0000000000000000");
685 try_istri(wot,h,s, "1111111111111234", "0000000000000001");
686 try_istri(wot,h,s, "1111111111111234", "0000000000000011");
688 try_istri(wot,h,s, "1111111111111234", "1111111111111234");
689 try_istri(wot,h,s, "a111111111111111", "000000000000000a");
690 try_istri(wot,h,s, "b111111111111111", "000000000000000a");
692 try_istri(wot,h,s, "b111111111111111", "0000000000000000");
693 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
694 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
695 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
699 //////////////////////////////////////////////////////////
700 // //
701 // ISTRI_08 //
702 // //
703 //////////////////////////////////////////////////////////
705 UInt h_pcmpistri_08 ( V128* argL, V128* argR )
707 V128 block[2];
708 memcpy(&block[0], argL, sizeof(V128));
709 memcpy(&block[1], argR, sizeof(V128));
710 ULong res, flags;
711 __asm__ __volatile__(
712 "subq $1024, %%rsp" "\n\t"
713 "movdqu 0(%2), %%xmm2" "\n\t"
714 "movdqu 16(%2), %%xmm11" "\n\t"
715 "pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t"
716 "pushfq" "\n\t"
717 "popq %%rdx" "\n\t"
718 "movq %%rcx, %0" "\n\t"
719 "movq %%rdx, %1" "\n\t"
720 "addq $1024, %%rsp" "\n\t"
721 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
722 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
724 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
727 UInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
729 V128 resV;
730 UInt resOSZACP, resECX;
731 Bool ok
732 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
733 zmask_from_V128(argLU),
734 zmask_from_V128(argRU),
735 0x08, False/*!isSTRM*/
737 assert(ok);
738 resECX = resV.uInt[0];
739 return (resOSZACP << 16) | resECX;
742 void istri_08 ( void )
744 char* wot = "08";
745 UInt(*h)(V128*,V128*) = h_pcmpistri_08;
746 UInt(*s)(V128*,V128*) = s_pcmpistri_08;
748 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
750 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
751 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
752 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
753 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
755 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
756 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
757 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
759 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
760 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
761 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
762 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
764 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
765 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
766 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
768 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
770 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
771 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
772 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
774 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
775 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
776 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
778 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
779 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
780 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
782 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
783 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
784 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
786 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
787 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
792 //////////////////////////////////////////////////////////
793 // //
794 // ISTRI_18 //
795 // //
796 //////////////////////////////////////////////////////////
798 UInt h_pcmpistri_18 ( V128* argL, V128* argR )
800 V128 block[2];
801 memcpy(&block[0], argL, sizeof(V128));
802 memcpy(&block[1], argR, sizeof(V128));
803 ULong res, flags;
804 __asm__ __volatile__(
805 "subq $1024, %%rsp" "\n\t"
806 "movdqu 0(%2), %%xmm2" "\n\t"
807 "movdqu 16(%2), %%xmm11" "\n\t"
808 "pcmpistri $0x18, %%xmm2, %%xmm11" "\n\t"
809 "pushfq" "\n\t"
810 "popq %%rdx" "\n\t"
811 "movq %%rcx, %0" "\n\t"
812 "movq %%rdx, %1" "\n\t"
813 "addq $1024, %%rsp" "\n\t"
814 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
815 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
817 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
820 UInt s_pcmpistri_18 ( V128* argLU, V128* argRU )
822 V128 resV;
823 UInt resOSZACP, resECX;
824 Bool ok
825 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
826 zmask_from_V128(argLU),
827 zmask_from_V128(argRU),
828 0x18, False/*!isSTRM*/
830 assert(ok);
831 resECX = resV.uInt[0];
832 return (resOSZACP << 16) | resECX;
835 void istri_18 ( void )
837 char* wot = "18";
838 UInt(*h)(V128*,V128*) = h_pcmpistri_18;
839 UInt(*s)(V128*,V128*) = s_pcmpistri_18;
841 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
843 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
844 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
845 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
846 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
848 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
849 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
850 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
852 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
853 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
854 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
855 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
857 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
858 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
859 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
861 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
863 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
864 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
865 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
867 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
868 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
869 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
871 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
872 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
873 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
875 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
876 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
877 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
879 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
880 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
885 //////////////////////////////////////////////////////////
886 // //
887 // ISTRI_1A //
888 // //
889 //////////////////////////////////////////////////////////
891 UInt h_pcmpistri_1A ( V128* argL, V128* argR )
893 V128 block[2];
894 memcpy(&block[0], argL, sizeof(V128));
895 memcpy(&block[1], argR, sizeof(V128));
896 ULong res, flags;
897 __asm__ __volatile__(
898 "subq $1024, %%rsp" "\n\t"
899 "movdqu 0(%2), %%xmm2" "\n\t"
900 "movdqu 16(%2), %%xmm11" "\n\t"
901 "pcmpistri $0x1A, %%xmm2, %%xmm11" "\n\t"
902 "pushfq" "\n\t"
903 "popq %%rdx" "\n\t"
904 "movq %%rcx, %0" "\n\t"
905 "movq %%rdx, %1" "\n\t"
906 "addq $1024, %%rsp" "\n\t"
907 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
908 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
910 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
913 UInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
915 V128 resV;
916 UInt resOSZACP, resECX;
917 Bool ok
918 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
919 zmask_from_V128(argLU),
920 zmask_from_V128(argRU),
921 0x1A, False/*!isSTRM*/
923 assert(ok);
924 resECX = resV.uInt[0];
925 return (resOSZACP << 16) | resECX;
928 void istri_1A ( void )
930 char* wot = "1A";
931 UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
932 UInt(*s)(V128*,V128*) = s_pcmpistri_1A;
934 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
936 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
937 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
938 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
939 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
941 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
942 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
943 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
945 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
946 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
947 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
948 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
950 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
951 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
952 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
954 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
956 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
957 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
958 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
960 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
961 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
962 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
964 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
965 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
966 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
968 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
969 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
970 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
972 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
973 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
978 //////////////////////////////////////////////////////////
979 // //
980 // ISTRI_02 //
981 // //
982 //////////////////////////////////////////////////////////
984 UInt h_pcmpistri_02 ( V128* argL, V128* argR )
986 V128 block[2];
987 memcpy(&block[0], argL, sizeof(V128));
988 memcpy(&block[1], argR, sizeof(V128));
989 ULong res, flags;
990 __asm__ __volatile__(
991 "subq $1024, %%rsp" "\n\t"
992 "movdqu 0(%2), %%xmm2" "\n\t"
993 "movdqu 16(%2), %%xmm11" "\n\t"
994 "pcmpistri $0x02, %%xmm2, %%xmm11" "\n\t"
995 //"pcmpistrm $0x02, %%xmm2, %%xmm11" "\n\t"
996 //"movd %%xmm0, %%ecx" "\n\t"
997 "pushfq" "\n\t"
998 "popq %%rdx" "\n\t"
999 "movq %%rcx, %0" "\n\t"
1000 "movq %%rdx, %1" "\n\t"
1001 "addq $1024, %%rsp" "\n\t"
1002 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1003 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1005 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1008 UInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
1010 V128 resV;
1011 UInt resOSZACP, resECX;
1012 Bool ok
1013 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1014 zmask_from_V128(argLU),
1015 zmask_from_V128(argRU),
1016 0x02, False/*!isSTRM*/
1018 assert(ok);
1019 resECX = resV.uInt[0];
1020 return (resOSZACP << 16) | resECX;
1023 void istri_02 ( void )
1025 char* wot = "02";
1026 UInt(*h)(V128*,V128*) = h_pcmpistri_02;
1027 UInt(*s)(V128*,V128*) = s_pcmpistri_02;
1029 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1030 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1031 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1032 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1034 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1035 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1036 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1037 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1038 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1040 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1041 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1042 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1043 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1045 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1046 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1048 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1049 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1050 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1051 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1053 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1055 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1056 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1060 //////////////////////////////////////////////////////////
1061 // //
1062 // ISTRI_12 //
1063 // //
1064 //////////////////////////////////////////////////////////
1066 UInt h_pcmpistri_12 ( V128* argL, V128* argR )
1068 V128 block[2];
1069 memcpy(&block[0], argL, sizeof(V128));
1070 memcpy(&block[1], argR, sizeof(V128));
1071 ULong res, flags;
1072 __asm__ __volatile__(
1073 "subq $1024, %%rsp" "\n\t"
1074 "movdqu 0(%2), %%xmm2" "\n\t"
1075 "movdqu 16(%2), %%xmm11" "\n\t"
1076 "pcmpistri $0x12, %%xmm2, %%xmm11" "\n\t"
1077 //"pcmpistrm $0x12, %%xmm2, %%xmm11" "\n\t"
1078 //"movd %%xmm0, %%ecx" "\n\t"
1079 "pushfq" "\n\t"
1080 "popq %%rdx" "\n\t"
1081 "movq %%rcx, %0" "\n\t"
1082 "movq %%rdx, %1" "\n\t"
1083 "addq $1024, %%rsp" "\n\t"
1084 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1085 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1087 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1090 UInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
1092 V128 resV;
1093 UInt resOSZACP, resECX;
1094 Bool ok
1095 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1096 zmask_from_V128(argLU),
1097 zmask_from_V128(argRU),
1098 0x12, False/*!isSTRM*/
1100 assert(ok);
1101 resECX = resV.uInt[0];
1102 return (resOSZACP << 16) | resECX;
1105 void istri_12 ( void )
1107 char* wot = "12";
1108 UInt(*h)(V128*,V128*) = h_pcmpistri_12;
1109 UInt(*s)(V128*,V128*) = s_pcmpistri_12;
1111 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1112 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1113 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1114 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1116 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1117 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1118 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1119 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1120 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1122 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1123 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1124 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1125 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1127 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1128 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1130 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1131 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1132 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1133 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1135 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1137 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1138 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1143 //////////////////////////////////////////////////////////
1144 // //
1145 // ISTRI_44 //
1146 // //
1147 //////////////////////////////////////////////////////////
1149 UInt h_pcmpistri_44 ( V128* argL, V128* argR )
1151 V128 block[2];
1152 memcpy(&block[0], argL, sizeof(V128));
1153 memcpy(&block[1], argR, sizeof(V128));
1154 ULong res, flags;
1155 __asm__ __volatile__(
1156 "subq $1024, %%rsp" "\n\t"
1157 "movdqu 0(%2), %%xmm2" "\n\t"
1158 "movdqu 16(%2), %%xmm11" "\n\t"
1159 "pcmpistri $0x44, %%xmm2, %%xmm11" "\n\t"
1160 //"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t"
1161 //"movd %%xmm0, %%ecx" "\n\t"
1162 "pushfq" "\n\t"
1163 "popq %%rdx" "\n\t"
1164 "movq %%rcx, %0" "\n\t"
1165 "movq %%rdx, %1" "\n\t"
1166 "addq $1024, %%rsp" "\n\t"
1167 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1168 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1170 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1173 UInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
1175 V128 resV;
1176 UInt resOSZACP, resECX;
1177 Bool ok
1178 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1179 zmask_from_V128(argLU),
1180 zmask_from_V128(argRU),
1181 0x44, False/*!isSTRM*/
1183 assert(ok);
1184 resECX = resV.uInt[0];
1185 return (resOSZACP << 16) | resECX;
1188 void istri_44 ( void )
1190 char* wot = "44";
1191 UInt(*h)(V128*,V128*) = h_pcmpistri_44;
1192 UInt(*s)(V128*,V128*) = s_pcmpistri_44;
1194 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1195 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1196 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1197 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1199 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1200 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1201 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1202 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1203 try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1205 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1207 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1208 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1209 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1211 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1212 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1213 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1215 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1216 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1218 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1219 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1221 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1222 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1223 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1224 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1228 //////////////////////////////////////////////////////////
1229 // //
1230 // ISTRI_00 //
1231 // //
1232 //////////////////////////////////////////////////////////
1234 UInt h_pcmpistri_00 ( V128* argL, V128* argR )
1236 V128 block[2];
1237 memcpy(&block[0], argL, sizeof(V128));
1238 memcpy(&block[1], argR, sizeof(V128));
1239 ULong res, flags;
1240 __asm__ __volatile__(
1241 "subq $1024, %%rsp" "\n\t"
1242 "movdqu 0(%2), %%xmm2" "\n\t"
1243 "movdqu 16(%2), %%xmm11" "\n\t"
1244 "pcmpistri $0x00, %%xmm2, %%xmm11" "\n\t"
1245 //"pcmpistrm $0x00, %%xmm2, %%xmm11" "\n\t"
1246 //"movd %%xmm0, %%ecx" "\n\t"
1247 "pushfq" "\n\t"
1248 "popq %%rdx" "\n\t"
1249 "movq %%rcx, %0" "\n\t"
1250 "movq %%rdx, %1" "\n\t"
1251 "addq $1024, %%rsp" "\n\t"
1252 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1253 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1255 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1258 UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
1260 V128 resV;
1261 UInt resOSZACP, resECX;
1262 Bool ok
1263 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1264 zmask_from_V128(argLU),
1265 zmask_from_V128(argRU),
1266 0x00, False/*!isSTRM*/
1268 assert(ok);
1269 resECX = resV.uInt[0];
1270 return (resOSZACP << 16) | resECX;
1273 void istri_00 ( void )
1275 char* wot = "00";
1276 UInt(*h)(V128*,V128*) = h_pcmpistri_00;
1277 UInt(*s)(V128*,V128*) = s_pcmpistri_00;
1279 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1280 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1281 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1282 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1284 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1285 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1286 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1287 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1288 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1290 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1291 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1292 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1293 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1295 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1296 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1298 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1299 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1300 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1301 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1303 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1305 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1306 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1310 //////////////////////////////////////////////////////////
1311 // //
1312 // ISTRI_38 //
1313 // //
1314 //////////////////////////////////////////////////////////
1316 UInt h_pcmpistri_38 ( V128* argL, V128* argR )
1318 V128 block[2];
1319 memcpy(&block[0], argL, sizeof(V128));
1320 memcpy(&block[1], argR, sizeof(V128));
1321 ULong res, flags;
1322 __asm__ __volatile__(
1323 "subq $1024, %%rsp" "\n\t"
1324 "movdqu 0(%2), %%xmm2" "\n\t"
1325 "movdqu 16(%2), %%xmm11" "\n\t"
1326 "pcmpistri $0x38, %%xmm2, %%xmm11" "\n\t"
1327 "pushfq" "\n\t"
1328 "popq %%rdx" "\n\t"
1329 "movq %%rcx, %0" "\n\t"
1330 "movq %%rdx, %1" "\n\t"
1331 "addq $1024, %%rsp" "\n\t"
1332 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1333 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1335 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1338 UInt s_pcmpistri_38 ( V128* argLU, V128* argRU )
1340 V128 resV;
1341 UInt resOSZACP, resECX;
1342 Bool ok
1343 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1344 zmask_from_V128(argLU),
1345 zmask_from_V128(argRU),
1346 0x38, False/*!isSTRM*/
1348 assert(ok);
1349 resECX = resV.uInt[0];
1350 return (resOSZACP << 16) | resECX;
1353 void istri_38 ( void )
1355 char* wot = "38";
1356 UInt(*h)(V128*,V128*) = h_pcmpistri_38;
1357 UInt(*s)(V128*,V128*) = s_pcmpistri_38;
1359 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1361 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1362 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1363 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
1364 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
1366 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
1367 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
1368 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
1370 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1371 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1372 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1373 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1375 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1376 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
1377 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
1379 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1381 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1382 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1383 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
1385 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
1386 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1387 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
1389 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1390 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
1391 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
1393 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
1394 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
1395 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
1397 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
1398 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
1403 //////////////////////////////////////////////////////////
1404 // //
1405 // ISTRI_46 //
1406 // //
1407 //////////////////////////////////////////////////////////
1409 UInt h_pcmpistri_46 ( V128* argL, V128* argR )
1411 V128 block[2];
1412 memcpy(&block[0], argL, sizeof(V128));
1413 memcpy(&block[1], argR, sizeof(V128));
1414 ULong res, flags;
1415 __asm__ __volatile__(
1416 "subq $1024, %%rsp" "\n\t"
1417 "movdqu 0(%2), %%xmm2" "\n\t"
1418 "movdqu 16(%2), %%xmm11" "\n\t"
1419 "pcmpistri $0x46, %%xmm2, %%xmm11" "\n\t"
1420 "pushfq" "\n\t"
1421 "popq %%rdx" "\n\t"
1422 "movq %%rcx, %0" "\n\t"
1423 "movq %%rdx, %1" "\n\t"
1424 "addq $1024, %%rsp" "\n\t"
1425 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1426 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1428 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1431 UInt s_pcmpistri_46 ( V128* argLU, V128* argRU )
1433 V128 resV;
1434 UInt resOSZACP, resECX;
1435 Bool ok
1436 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1437 zmask_from_V128(argLU),
1438 zmask_from_V128(argRU),
1439 0x46, False/*!isSTRM*/
1441 assert(ok);
1442 resECX = resV.uInt[0];
1443 return (resOSZACP << 16) | resECX;
1446 void istri_46 ( void )
1448 char* wot = "46";
1449 UInt(*h)(V128*,V128*) = h_pcmpistri_46;
1450 UInt(*s)(V128*,V128*) = s_pcmpistri_46;
1452 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1453 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1454 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1455 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1457 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1458 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1459 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1460 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1461 try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1463 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1465 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1466 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1467 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1469 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1470 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1471 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1473 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1474 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1476 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1477 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1479 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1480 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1481 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1482 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1486 //////////////////////////////////////////////////////////
1487 // //
1488 // ISTRI_30 //
1489 // //
1490 //////////////////////////////////////////////////////////
1492 UInt h_pcmpistri_30 ( V128* argL, V128* argR )
1494 V128 block[2];
1495 memcpy(&block[0], argL, sizeof(V128));
1496 memcpy(&block[1], argR, sizeof(V128));
1497 ULong res, flags;
1498 __asm__ __volatile__(
1499 "subq $1024, %%rsp" "\n\t"
1500 "movdqu 0(%2), %%xmm2" "\n\t"
1501 "movdqu 16(%2), %%xmm11" "\n\t"
1502 "pcmpistri $0x30, %%xmm2, %%xmm11" "\n\t"
1503 "pushfq" "\n\t"
1504 "popq %%rdx" "\n\t"
1505 "movq %%rcx, %0" "\n\t"
1506 "movq %%rdx, %1" "\n\t"
1507 "addq $1024, %%rsp" "\n\t"
1508 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1509 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1511 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1514 UInt s_pcmpistri_30 ( V128* argLU, V128* argRU )
1516 V128 resV;
1517 UInt resOSZACP, resECX;
1518 Bool ok
1519 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1520 zmask_from_V128(argLU),
1521 zmask_from_V128(argRU),
1522 0x30, False/*!isSTRM*/
1524 assert(ok);
1525 resECX = resV.uInt[0];
1526 return (resOSZACP << 16) | resECX;
1529 void istri_30 ( void )
1531 char* wot = "30";
1532 UInt(*h)(V128*,V128*) = h_pcmpistri_30;
1533 UInt(*s)(V128*,V128*) = s_pcmpistri_30;
1535 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1536 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1537 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1538 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1540 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1541 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1542 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1543 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1544 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1546 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1547 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1548 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1549 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1551 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1552 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1554 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1555 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1556 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1557 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1559 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1561 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1562 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1566 //////////////////////////////////////////////////////////
1567 // //
1568 // ISTRI_40 //
1569 // //
1570 //////////////////////////////////////////////////////////
1572 UInt h_pcmpistri_40 ( V128* argL, V128* argR )
1574 V128 block[2];
1575 memcpy(&block[0], argL, sizeof(V128));
1576 memcpy(&block[1], argR, sizeof(V128));
1577 ULong res, flags;
1578 __asm__ __volatile__(
1579 "subq $1024, %%rsp" "\n\t"
1580 "movdqu 0(%2), %%xmm2" "\n\t"
1581 "movdqu 16(%2), %%xmm11" "\n\t"
1582 "pcmpistri $0x40, %%xmm2, %%xmm11" "\n\t"
1583 "pushfq" "\n\t"
1584 "popq %%rdx" "\n\t"
1585 "movq %%rcx, %0" "\n\t"
1586 "movq %%rdx, %1" "\n\t"
1587 "addq $1024, %%rsp" "\n\t"
1588 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1589 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1591 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1594 UInt s_pcmpistri_40 ( V128* argLU, V128* argRU )
1596 V128 resV;
1597 UInt resOSZACP, resECX;
1598 Bool ok
1599 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1600 zmask_from_V128(argLU),
1601 zmask_from_V128(argRU),
1602 0x40, False/*!isSTRM*/
1604 assert(ok);
1605 resECX = resV.uInt[0];
1606 return (resOSZACP << 16) | resECX;
1609 void istri_40 ( void )
1611 char* wot = "40";
1612 UInt(*h)(V128*,V128*) = h_pcmpistri_40;
1613 UInt(*s)(V128*,V128*) = s_pcmpistri_40;
1615 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1616 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1617 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1618 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1620 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1621 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1622 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1623 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1624 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1626 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1627 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1628 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1629 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1631 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1632 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1634 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1635 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1636 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1637 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1639 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1641 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1642 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1646 //////////////////////////////////////////////////////////
1647 // //
1648 // ISTRI_42 //
1649 // //
1650 //////////////////////////////////////////////////////////
1652 UInt h_pcmpistri_42 ( V128* argL, V128* argR )
1654 V128 block[2];
1655 memcpy(&block[0], argL, sizeof(V128));
1656 memcpy(&block[1], argR, sizeof(V128));
1657 ULong res, flags;
1658 __asm__ __volatile__(
1659 "subq $1024, %%rsp" "\n\t"
1660 "movdqu 0(%2), %%xmm2" "\n\t"
1661 "movdqu 16(%2), %%xmm11" "\n\t"
1662 "pcmpistri $0x42, %%xmm2, %%xmm11" "\n\t"
1663 "pushfq" "\n\t"
1664 "popq %%rdx" "\n\t"
1665 "movq %%rcx, %0" "\n\t"
1666 "movq %%rdx, %1" "\n\t"
1667 "addq $1024, %%rsp" "\n\t"
1668 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1669 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1671 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1674 UInt s_pcmpistri_42 ( V128* argLU, V128* argRU )
1676 V128 resV;
1677 UInt resOSZACP, resECX;
1678 Bool ok
1679 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1680 zmask_from_V128(argLU),
1681 zmask_from_V128(argRU),
1682 0x42, False/*!isSTRM*/
1684 assert(ok);
1685 resECX = resV.uInt[0];
1686 return (resOSZACP << 16) | resECX;
1689 void istri_42 ( void )
1691 char* wot = "42";
1692 UInt(*h)(V128*,V128*) = h_pcmpistri_42;
1693 UInt(*s)(V128*,V128*) = s_pcmpistri_42;
1695 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1696 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1697 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1698 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1700 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1701 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1702 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1703 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1704 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1706 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1707 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1708 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1709 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1711 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1712 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1714 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1715 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1716 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1717 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1719 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1721 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1722 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1726 //////////////////////////////////////////////////////////
1727 // //
1728 // ISTRI_0E //
1729 // //
1730 //////////////////////////////////////////////////////////
1732 __attribute__((noinline))
1733 UInt h_pcmpistri_0E ( V128* argL, V128* argR )
1735 V128 block[2];
1736 memcpy(&block[0], argL, sizeof(V128));
1737 memcpy(&block[1], argR, sizeof(V128));
1738 ULong res = 0, flags = 0;
1739 __asm__ __volatile__(
1740 "movdqu 0(%2), %%xmm2" "\n\t"
1741 "movdqu 16(%2), %%xmm11" "\n\t"
1742 "pcmpistri $0x0E, %%xmm2, %%xmm11" "\n\t"
1743 "pushfq" "\n\t"
1744 "popq %%rdx" "\n\t"
1745 "movq %%rcx, %0" "\n\t"
1746 "movq %%rdx, %1" "\n\t"
1747 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1748 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1750 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1753 UInt s_pcmpistri_0E ( V128* argLU, V128* argRU )
1755 V128 resV;
1756 UInt resOSZACP, resECX;
1757 Bool ok
1758 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1759 zmask_from_V128(argLU),
1760 zmask_from_V128(argRU),
1761 0x0E, False/*!isSTRM*/
1763 assert(ok);
1764 resECX = resV.uInt[0];
1765 return (resOSZACP << 16) | resECX;
1768 void istri_0E ( void )
1770 char* wot = "0E";
1771 UInt(*h)(V128*,V128*) = h_pcmpistri_0E;
1772 UInt(*s)(V128*,V128*) = s_pcmpistri_0E;
1774 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
1776 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
1778 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
1779 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
1780 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
1782 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
1784 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
1785 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
1786 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
1787 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
1788 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
1790 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
1791 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
1792 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
1794 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
1795 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
1797 try_istri(wot,h,s, "1111111111111234", "0000000000000000");
1798 try_istri(wot,h,s, "1111111111111234", "0000000000000001");
1799 try_istri(wot,h,s, "1111111111111234", "0000000000000011");
1801 try_istri(wot,h,s, "1111111111111234", "1111111111111234");
1802 try_istri(wot,h,s, "a111111111111111", "000000000000000a");
1803 try_istri(wot,h,s, "b111111111111111", "000000000000000a");
1805 try_istri(wot,h,s, "b111111111111111", "0000000000000000");
1806 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1807 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
1808 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
1812 //////////////////////////////////////////////////////////
1813 // //
1814 // ISTRI_34 //
1815 // //
1816 //////////////////////////////////////////////////////////
1818 UInt h_pcmpistri_34 ( V128* argL, V128* argR )
1820 V128 block[2];
1821 memcpy(&block[0], argL, sizeof(V128));
1822 memcpy(&block[1], argR, sizeof(V128));
1823 ULong res, flags;
1824 __asm__ __volatile__(
1825 "subq $1024, %%rsp" "\n\t"
1826 "movdqu 0(%2), %%xmm2" "\n\t"
1827 "movdqu 16(%2), %%xmm11" "\n\t"
1828 "pcmpistri $0x34, %%xmm2, %%xmm11" "\n\t"
1829 "pushfq" "\n\t"
1830 "popq %%rdx" "\n\t"
1831 "movq %%rcx, %0" "\n\t"
1832 "movq %%rdx, %1" "\n\t"
1833 "addq $1024, %%rsp" "\n\t"
1834 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1835 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1837 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1840 UInt s_pcmpistri_34 ( V128* argLU, V128* argRU )
1842 V128 resV;
1843 UInt resOSZACP, resECX;
1844 Bool ok
1845 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1846 zmask_from_V128(argLU),
1847 zmask_from_V128(argRU),
1848 0x34, False/*!isSTRM*/
1850 assert(ok);
1851 resECX = resV.uInt[0];
1852 return (resOSZACP << 16) | resECX;
1855 void istri_34 ( void )
1857 char* wot = "34";
1858 UInt(*h)(V128*,V128*) = h_pcmpistri_34;
1859 UInt(*s)(V128*,V128*) = s_pcmpistri_34;
1861 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1862 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1863 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1864 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1866 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1867 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1868 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1869 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1870 try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1872 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1874 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1875 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1876 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1878 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1879 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1880 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1882 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1883 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1885 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1886 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1888 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1889 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1890 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1891 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1895 //////////////////////////////////////////////////////////
1896 // //
1897 // ISTRI_14 //
1898 // //
1899 //////////////////////////////////////////////////////////
1901 UInt h_pcmpistri_14 ( V128* argL, V128* argR )
1903 V128 block[2];
1904 memcpy(&block[0], argL, sizeof(V128));
1905 memcpy(&block[1], argR, sizeof(V128));
1906 ULong res, flags;
1907 __asm__ __volatile__(
1908 "subq $1024, %%rsp" "\n\t"
1909 "movdqu 0(%2), %%xmm2" "\n\t"
1910 "movdqu 16(%2), %%xmm11" "\n\t"
1911 "pcmpistri $0x14, %%xmm2, %%xmm11" "\n\t"
1912 "pushfq" "\n\t"
1913 "popq %%rdx" "\n\t"
1914 "movq %%rcx, %0" "\n\t"
1915 "movq %%rdx, %1" "\n\t"
1916 "addq $1024, %%rsp" "\n\t"
1917 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1918 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1920 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1923 UInt s_pcmpistri_14 ( V128* argLU, V128* argRU )
1925 V128 resV;
1926 UInt resOSZACP, resECX;
1927 Bool ok
1928 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1929 zmask_from_V128(argLU),
1930 zmask_from_V128(argRU),
1931 0x14, False/*!isSTRM*/
1933 assert(ok);
1934 resECX = resV.uInt[0];
1935 return (resOSZACP << 16) | resECX;
1938 void istri_14 ( void )
1940 char* wot = "14";
1941 UInt(*h)(V128*,V128*) = h_pcmpistri_14;
1942 UInt(*s)(V128*,V128*) = s_pcmpistri_14;
1944 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1945 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1946 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1947 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1949 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1950 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1951 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1952 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1953 try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1955 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1957 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1958 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1959 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1961 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1962 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1963 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1965 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1966 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1968 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1969 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1971 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1972 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1973 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1974 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1978 //////////////////////////////////////////////////////////
1979 // //
1980 // ISTRI_70 //
1981 // //
1982 //////////////////////////////////////////////////////////
1984 UInt h_pcmpistri_70 ( V128* argL, V128* argR )
1986 V128 block[2];
1987 memcpy(&block[0], argL, sizeof(V128));
1988 memcpy(&block[1], argR, sizeof(V128));
1989 ULong res, flags;
1990 __asm__ __volatile__(
1991 "subq $1024, %%rsp" "\n\t"
1992 "movdqu 0(%2), %%xmm2" "\n\t"
1993 "movdqu 16(%2), %%xmm11" "\n\t"
1994 "pcmpistri $0x70, %%xmm2, %%xmm11" "\n\t"
1995 "pushfq" "\n\t"
1996 "popq %%rdx" "\n\t"
1997 "movq %%rcx, %0" "\n\t"
1998 "movq %%rdx, %1" "\n\t"
1999 "addq $1024, %%rsp" "\n\t"
2000 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2001 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2003 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2006 UInt s_pcmpistri_70 ( V128* argLU, V128* argRU )
2008 V128 resV;
2009 UInt resOSZACP, resECX;
2010 Bool ok
2011 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2012 zmask_from_V128(argLU),
2013 zmask_from_V128(argRU),
2014 0x70, False/*!isSTRM*/
2016 assert(ok);
2017 resECX = resV.uInt[0];
2018 return (resOSZACP << 16) | resECX;
2021 void istri_70 ( void )
2023 char* wot = "70";
2024 UInt(*h)(V128*,V128*) = h_pcmpistri_70;
2025 UInt(*s)(V128*,V128*) = s_pcmpistri_70;
2027 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2028 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2029 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2030 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2032 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2033 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2034 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2035 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2036 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2038 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2039 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2040 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2041 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2043 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2044 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2046 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2047 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2048 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2049 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2051 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2053 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2054 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2058 //////////////////////////////////////////////////////////
2059 // //
2060 // ISTRI_62 //
2061 // //
2062 //////////////////////////////////////////////////////////
2064 UInt h_pcmpistri_62 ( V128* argL, V128* argR )
2066 V128 block[2];
2067 memcpy(&block[0], argL, sizeof(V128));
2068 memcpy(&block[1], argR, sizeof(V128));
2069 ULong res, flags;
2070 __asm__ __volatile__(
2071 "subq $1024, %%rsp" "\n\t"
2072 "movdqu 0(%2), %%xmm2" "\n\t"
2073 "movdqu 16(%2), %%xmm11" "\n\t"
2074 "pcmpistri $0x62, %%xmm2, %%xmm11" "\n\t"
2075 "pushfq" "\n\t"
2076 "popq %%rdx" "\n\t"
2077 "movq %%rcx, %0" "\n\t"
2078 "movq %%rdx, %1" "\n\t"
2079 "addq $1024, %%rsp" "\n\t"
2080 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2081 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2083 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2086 UInt s_pcmpistri_62 ( V128* argLU, V128* argRU )
2088 V128 resV;
2089 UInt resOSZACP, resECX;
2090 Bool ok
2091 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2092 zmask_from_V128(argLU),
2093 zmask_from_V128(argRU),
2094 0x62, False/*!isSTRM*/
2096 assert(ok);
2097 resECX = resV.uInt[0];
2098 return (resOSZACP << 16) | resECX;
2101 void istri_62 ( void )
2103 char* wot = "62";
2104 UInt(*h)(V128*,V128*) = h_pcmpistri_62;
2105 UInt(*s)(V128*,V128*) = s_pcmpistri_62;
2107 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2108 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2109 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2110 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2112 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2113 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2114 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2115 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2116 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2118 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2119 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2120 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2121 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2123 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2124 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2126 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2127 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2128 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2129 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2131 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2133 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2134 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2138 //////////////////////////////////////////////////////////
2139 // //
2140 // ISTRI_72 //
2141 // //
2142 //////////////////////////////////////////////////////////
2144 UInt h_pcmpistri_72 ( V128* argL, V128* argR )
2146 V128 block[2];
2147 memcpy(&block[0], argL, sizeof(V128));
2148 memcpy(&block[1], argR, sizeof(V128));
2149 ULong res, flags;
2150 __asm__ __volatile__(
2151 "subq $1024, %%rsp" "\n\t"
2152 "movdqu 0(%2), %%xmm2" "\n\t"
2153 "movdqu 16(%2), %%xmm11" "\n\t"
2154 "pcmpistri $0x72, %%xmm2, %%xmm11" "\n\t"
2155 "pushfq" "\n\t"
2156 "popq %%rdx" "\n\t"
2157 "movq %%rcx, %0" "\n\t"
2158 "movq %%rdx, %1" "\n\t"
2159 "addq $1024, %%rsp" "\n\t"
2160 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2161 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2163 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2166 UInt s_pcmpistri_72 ( V128* argLU, V128* argRU )
2168 V128 resV;
2169 UInt resOSZACP, resECX;
2170 Bool ok
2171 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2172 zmask_from_V128(argLU),
2173 zmask_from_V128(argRU),
2174 0x72, False/*!isSTRM*/
2176 assert(ok);
2177 resECX = resV.uInt[0];
2178 return (resOSZACP << 16) | resECX;
2181 void istri_72 ( void )
2183 char* wot = "72";
2184 UInt(*h)(V128*,V128*) = h_pcmpistri_72;
2185 UInt(*s)(V128*,V128*) = s_pcmpistri_72;
2187 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2188 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2189 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2190 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2192 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2193 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2194 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2195 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2196 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2198 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2199 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2200 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2201 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2203 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2204 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2206 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2207 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2208 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2209 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2211 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2213 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2214 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2218 //////////////////////////////////////////////////////////
2219 // //
2220 // ISTRI_10 //
2221 // //
2222 //////////////////////////////////////////////////////////
2224 UInt h_pcmpistri_10 ( V128* argL, V128* argR )
2226 V128 block[2];
2227 memcpy(&block[0], argL, sizeof(V128));
2228 memcpy(&block[1], argR, sizeof(V128));
2229 ULong res, flags;
2230 __asm__ __volatile__(
2231 "subq $1024, %%rsp" "\n\t"
2232 "movdqu 0(%2), %%xmm2" "\n\t"
2233 "movdqu 16(%2), %%xmm11" "\n\t"
2234 "pcmpistri $0x10, %%xmm2, %%xmm11" "\n\t"
2235 //"pcmpistrm $0x10, %%xmm2, %%xmm11" "\n\t"
2236 //"movd %%xmm0, %%ecx" "\n\t"
2237 "pushfq" "\n\t"
2238 "popq %%rdx" "\n\t"
2239 "movq %%rcx, %0" "\n\t"
2240 "movq %%rdx, %1" "\n\t"
2241 "addq $1024, %%rsp" "\n\t"
2242 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2243 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2245 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2248 UInt s_pcmpistri_10 ( V128* argLU, V128* argRU )
2250 V128 resV;
2251 UInt resOSZACP, resECX;
2252 Bool ok
2253 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2254 zmask_from_V128(argLU),
2255 zmask_from_V128(argRU),
2256 0x10, False/*!isSTRM*/
2258 assert(ok);
2259 resECX = resV.uInt[0];
2260 return (resOSZACP << 16) | resECX;
2263 void istri_10 ( void )
2265 char* wot = "10";
2266 UInt(*h)(V128*,V128*) = h_pcmpistri_10;
2267 UInt(*s)(V128*,V128*) = s_pcmpistri_10;
2269 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2270 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2271 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2272 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2274 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2275 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2276 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2277 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2278 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2280 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2281 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2282 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2283 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2285 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2286 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2288 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2289 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2290 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2291 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2293 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2295 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2296 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2300 //////////////////////////////////////////////////////////
2301 // //
2302 // main //
2303 // //
2304 //////////////////////////////////////////////////////////
2306 int main ( void )
2308 istri_4A();
2309 istri_3A();
2310 istri_08();
2311 istri_18();
2312 istri_1A();
2313 istri_02();
2314 istri_0C();
2315 istri_12();
2316 istri_44();
2317 istri_00();
2318 istri_38();
2319 istri_46();
2320 istri_30();
2321 istri_40();
2322 istri_42();
2323 istri_0E();
2324 istri_14();
2325 istri_34();
2326 istri_70();
2327 istri_62();
2328 istri_72();
2329 istri_10();
2330 return 0;