2 /* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
3 pcmpistri to drive it. Does not check the e-vs-i or i-vs-m
10 typedef unsigned int UInt
;
11 typedef signed int Int
;
12 typedef unsigned char UChar
;
13 typedef signed char Char
;
14 typedef unsigned long long int ULong
;
16 #define False ((Bool)0)
17 #define True ((Bool)1)
19 //typedef unsigned char V128[16];
34 #define MASK_O (1ULL << SHIFT_O)
35 #define MASK_S (1ULL << SHIFT_S)
36 #define MASK_Z (1ULL << SHIFT_Z)
37 #define MASK_A (1ULL << SHIFT_A)
38 #define MASK_C (1ULL << SHIFT_C)
39 #define MASK_P (1ULL << SHIFT_P)
68 return 32 - clz32((~x
) & (x
-1));
71 void expand ( V128
* dst
, char* summary
)
74 assert( strlen(summary
) == 16 );
75 for (i
= 0; i
< 16; i
++) {
77 UChar x
= summary
[15-i
];
78 if (x
>= '0' && x
<= '9') { xx
= x
- '0'; }
79 else if (x
>= 'A' && x
<= 'F') { xx
= x
- 'A' + 10; }
80 else if (x
>= 'a' && x
<= 'f') { xx
= x
- 'a' + 10; }
90 void try_istri ( char* which
,
91 UInt(*h_fn
)(V128
*,V128
*),
92 UInt(*s_fn
)(V128
*,V128
*),
93 char* summL
, char* summR
)
95 assert(strlen(which
) == 2);
99 UInt h_res
= h_fn(&argL
, &argR
);
100 UInt s_res
= s_fn(&argL
, &argR
);
101 printf("istri %s %s %s -> %08x %08x %s\n",
102 which
, summL
, summR
, h_res
, s_res
, h_res
== s_res
? "" : "!!!!");
105 UInt
zmask_from_V128 ( V128
* arg
)
108 for (i
= 0; i
< 16; i
++) {
109 res
|= ((arg
->uChar
[i
] == 0) ? 1 : 0) << i
;
114 //////////////////////////////////////////////////////////
118 //////////////////////////////////////////////////////////
121 /* Given partial results from a pcmpXstrX operation (intRes1,
122 basically), generate an I format (index value for ECX) output, and
123 also the new OSZACP flags.
126 void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128
* resV
,
127 /*OUT*/UInt
* resOSZACP
,
129 UInt zmaskL
, UInt zmaskR
,
133 assert((pol
>> 2) == 0);
134 assert((idx
>> 1) == 0);
138 case 0: intRes2
= intRes1
; break; // pol +
139 case 1: intRes2
= ~intRes1
; break; // pol -
140 case 2: intRes2
= intRes1
; break; // pol m+
141 case 3: intRes2
= intRes1
^ validL
; break; // pol m-
145 // generate ecx value
149 newECX
= intRes2
== 0 ? 16 : (31 - clz32(intRes2
));
152 newECX
= intRes2
== 0 ? 16 : ctz32(intRes2
);
155 *(UInt
*)(&resV
[0]) = newECX
;
157 // generate new flags, common to all ISTRI and ISTRM cases
158 *resOSZACP
// A, P are zero
159 = ((intRes2
== 0) ? 0 : MASK_C
) // C == 0 iff intRes2 == 0
160 | ((zmaskL
== 0) ? 0 : MASK_Z
) // Z == 1 iff any in argL is 0
161 | ((zmaskR
== 0) ? 0 : MASK_S
) // S == 1 iff any in argR is 0
162 | ((intRes2
& 1) << SHIFT_O
); // O == IntRes2[0]
166 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
169 For xSTRI variants, the new ECX value is placed in the 32 bits
170 pointed to by *resV. For xSTRM variants, the result is a 128 bit
171 value and is placed at *resV in the obvious way.
173 For all variants, the new OSZACP value is placed at *resOSZACP.
175 argLV and argRV are the vector args. The caller must prepare a
176 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
177 must be 1 for each zero byte of of the respective arg. For ESTRx
178 variants this is derived from the explicit length indication, and
179 must be 0 in all places except at the bit index corresponding to
180 the valid length (0 .. 16). If the valid length is 16 then the
181 mask must be all zeroes. In all cases, bits 31:16 must be zero.
183 imm8 is the original immediate from the instruction. isSTRM
184 indicates whether this is a xSTRM or xSTRI variant, which controls
185 how much of *res is written.
187 If the given imm8 case can be handled, the return value is True.
188 If not, False is returned, and neither *res not *resOSZACP are
192 Bool
pcmpXstrX_WRK ( /*OUT*/V128
* resV
,
193 /*OUT*/UInt
* resOSZACP
,
194 V128
* argLV
, V128
* argRV
,
195 UInt zmaskL
, UInt zmaskR
,
196 UInt imm8
, Bool isSTRM
)
199 assert((zmaskL
>> 16) == 0);
200 assert((zmaskR
>> 16) == 0);
202 /* Explicitly reject any imm8 values that haven't been validated,
203 even if they would probably work. Life is too short to have
204 unvalidated cases in the code base. */
206 case 0x00: case 0x02:
207 case 0x08: case 0x0A: case 0x0C: case 0x0E:
208 case 0x10: case 0x12: case 0x14:
209 case 0x18: case 0x1A:
210 case 0x30: case 0x34:
211 case 0x38: case 0x3A:
212 case 0x40: case 0x42: case 0x44: case 0x46:
215 case 0x70: case 0x72:
221 UInt fmt
= (imm8
>> 0) & 3; // imm8[1:0] data format
222 UInt agg
= (imm8
>> 2) & 3; // imm8[3:2] aggregation fn
223 UInt pol
= (imm8
>> 4) & 3; // imm8[5:4] polarity
224 UInt idx
= (imm8
>> 6) & 1; // imm8[6] 1==msb/bytemask
226 /*----------------------------------------*/
227 /*-- strcmp on byte data --*/
228 /*----------------------------------------*/
230 if (agg
== 2/*equal each, aka strcmp*/
231 && (fmt
== 0/*ub*/ || fmt
== 2/*sb*/)
234 UChar
* argL
= (UChar
*)argLV
;
235 UChar
* argR
= (UChar
*)argRV
;
237 for (i
= 15; i
>= 0; i
--) {
240 boolResII
= (boolResII
<< 1) | (cL
== cR
? 1 : 0);
242 UInt validL
= ~(zmaskL
| -zmaskL
); // not(left(zmaskL))
243 UInt validR
= ~(zmaskR
| -zmaskR
); // not(left(zmaskR))
245 // do invalidation, common to all equal-each cases
247 = (boolResII
& validL
& validR
) // if both valid, use cmpres
248 | (~ (validL
| validR
)); // if both invalid, force 1
252 // generate I-format output
253 pcmpXstrX_WRK_gen_output_fmt_I(
255 intRes1
, zmaskL
, zmaskR
, validL
, pol
, idx
261 /*----------------------------------------*/
262 /*-- set membership on byte data --*/
263 /*----------------------------------------*/
265 if (agg
== 0/*equal any, aka find chars in a set*/
266 && (fmt
== 0/*ub*/ || fmt
== 2/*sb*/)
268 /* argL: the string, argR: charset */
270 UChar
* argL
= (UChar
*)argLV
;
271 UChar
* argR
= (UChar
*)argRV
;
273 UInt validL
= ~(zmaskL
| -zmaskL
); // not(left(zmaskL))
274 UInt validR
= ~(zmaskR
| -zmaskR
); // not(left(zmaskR))
276 for (si
= 0; si
< 16; si
++) {
277 if ((validL
& (1 << si
)) == 0)
278 // run off the end of the string.
281 for (ci
= 0; ci
< 16; ci
++) {
282 if ((validR
& (1 << ci
)) == 0) break;
283 if (argR
[ci
] == argL
[si
]) { m
= 1; break; }
285 boolRes
|= (m
<< si
);
288 // boolRes is "pre-invalidated"
289 UInt intRes1
= boolRes
& 0xFFFF;
291 // generate I-format output
292 pcmpXstrX_WRK_gen_output_fmt_I(
294 intRes1
, zmaskL
, zmaskR
, validL
, pol
, idx
300 /*----------------------------------------*/
301 /*-- substring search on byte data --*/
302 /*----------------------------------------*/
304 if (agg
== 3/*equal ordered, aka substring search*/
305 && (fmt
== 0/*ub*/ || fmt
== 2/*sb*/)
308 /* argL: haystack, argR: needle */
310 UChar
* argL
= (UChar
*)argLV
;
311 UChar
* argR
= (UChar
*)argRV
;
313 UInt validL
= ~(zmaskL
| -zmaskL
); // not(left(zmaskL))
314 UInt validR
= ~(zmaskR
| -zmaskR
); // not(left(zmaskR))
315 for (hi
= 0; hi
< 16; hi
++) {
317 for (ni
= 0; ni
< 16; ni
++) {
318 if ((validR
& (1 << ni
)) == 0) break;
321 if (argL
[i
] != argR
[ni
]) { m
= 0; break; }
323 boolRes
|= (m
<< hi
);
324 if ((validL
& (1 << hi
)) == 0)
325 // run off the end of the haystack
329 // boolRes is "pre-invalidated"
330 UInt intRes1
= boolRes
& 0xFFFF;
332 // generate I-format output
333 pcmpXstrX_WRK_gen_output_fmt_I(
335 intRes1
, zmaskL
, zmaskR
, validL
, pol
, idx
341 /*----------------------------------------*/
342 /*-- ranges, unsigned byte data --*/
343 /*----------------------------------------*/
345 if (agg
== 1/*ranges*/
349 /* argL: string, argR: range-pairs */
351 UChar
* argL
= (UChar
*)argLV
;
352 UChar
* argR
= (UChar
*)argRV
;
354 UInt validL
= ~(zmaskL
| -zmaskL
); // not(left(zmaskL))
355 UInt validR
= ~(zmaskR
| -zmaskR
); // not(left(zmaskR))
356 for (si
= 0; si
< 16; si
++) {
357 if ((validL
& (1 << si
)) == 0)
358 // run off the end of the string
361 for (ri
= 0; ri
< 16; ri
+= 2) {
362 if ((validR
& (3 << ri
)) != (3 << ri
)) break;
363 if (argR
[ri
] <= argL
[si
] && argL
[si
] <= argR
[ri
+1]) {
367 boolRes
|= (m
<< si
);
370 // boolRes is "pre-invalidated"
371 UInt intRes1
= boolRes
& 0xFFFF;
373 // generate I-format output
374 pcmpXstrX_WRK_gen_output_fmt_I(
376 intRes1
, zmaskL
, zmaskR
, validL
, pol
, idx
382 /*----------------------------------------*/
383 /*-- ranges, signed byte data --*/
384 /*----------------------------------------*/
386 if (agg
== 1/*ranges*/
390 /* argL: string, argR: range-pairs */
392 Char
* argL
= (Char
*)argLV
;
393 Char
* argR
= (Char
*)argRV
;
395 UInt validL
= ~(zmaskL
| -zmaskL
); // not(left(zmaskL))
396 UInt validR
= ~(zmaskR
| -zmaskR
); // not(left(zmaskR))
397 for (si
= 0; si
< 16; si
++) {
398 if ((validL
& (1 << si
)) == 0)
399 // run off the end of the string
402 for (ri
= 0; ri
< 16; ri
+= 2) {
403 if ((validR
& (3 << ri
)) != (3 << ri
)) break;
404 if (argR
[ri
] <= argL
[si
] && argL
[si
] <= argR
[ri
+1]) {
408 boolRes
|= (m
<< si
);
411 // boolRes is "pre-invalidated"
412 UInt intRes1
= boolRes
& 0xFFFF;
414 // generate I-format output
415 pcmpXstrX_WRK_gen_output_fmt_I(
417 intRes1
, zmaskL
, zmaskR
, validL
, pol
, idx
427 //////////////////////////////////////////////////////////
431 //////////////////////////////////////////////////////////
433 UInt
h_pcmpistri_4A ( V128
* argL
, V128
* argR
)
436 memcpy(&block
[0], argL
, sizeof(V128
));
437 memcpy(&block
[1], argR
, sizeof(V128
));
439 __asm__
__volatile__(
440 "subq $1024, %%rsp" "\n\t"
441 "movdqu 0(%2), %%xmm2" "\n\t"
442 "movdqu 16(%2), %%xmm11" "\n\t"
443 "pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t"
446 "movq %%rcx, %0" "\n\t"
447 "movq %%rdx, %1" "\n\t"
448 "addq $1024, %%rsp" "\n\t"
449 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
450 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
452 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
455 UInt
s_pcmpistri_4A ( V128
* argLU
, V128
* argRU
)
458 UInt resOSZACP
, resECX
;
460 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
461 zmask_from_V128(argLU
),
462 zmask_from_V128(argRU
),
463 0x4A, False
/*!isSTRM*/
466 resECX
= resV
.uInt
[0];
467 return (resOSZACP
<< 16) | resECX
;
470 void istri_4A ( void )
473 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_4A
;
474 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_4A
;
476 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
478 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
479 try_istri(wot
,h
,s
, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
480 try_istri(wot
,h
,s
, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
481 try_istri(wot
,h
,s
, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
483 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
484 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
485 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
487 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
488 try_istri(wot
,h
,s
, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
489 try_istri(wot
,h
,s
, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
490 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
492 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
493 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
494 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
496 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
498 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
499 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
500 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
502 try_istri(wot
,h
,s
, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
503 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
504 try_istri(wot
,h
,s
, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
506 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
507 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
508 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
510 try_istri(wot
,h
,s
, "0000000000000000", "aaaaaaaa0aaaaaaa");
511 try_istri(wot
,h
,s
, "8000000000000000", "aaaaaaaa0aaaaaaa");
512 try_istri(wot
,h
,s
, "0000000000000001", "aaaaaaaa0aaaaaaa");
514 try_istri(wot
,h
,s
, "0000000000000000", "aaaaaaaaaaaaaaaa");
515 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "0000000000000000");
518 //////////////////////////////////////////////////////////
522 //////////////////////////////////////////////////////////
524 UInt
h_pcmpistri_3A ( V128
* argL
, V128
* argR
)
527 memcpy(&block
[0], argL
, sizeof(V128
));
528 memcpy(&block
[1], argR
, sizeof(V128
));
530 __asm__
__volatile__(
531 "subq $1024, %%rsp" "\n\t"
532 "movdqu 0(%2), %%xmm2" "\n\t"
533 "movdqu 16(%2), %%xmm11" "\n\t"
534 "pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t"
537 "movq %%rcx, %0" "\n\t"
538 "movq %%rdx, %1" "\n\t"
539 "addq $1024, %%rsp" "\n\t"
540 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
541 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
543 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
546 UInt
s_pcmpistri_3A ( V128
* argLU
, V128
* argRU
)
549 UInt resOSZACP
, resECX
;
551 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
552 zmask_from_V128(argLU
),
553 zmask_from_V128(argRU
),
554 0x3A, False
/*!isSTRM*/
557 resECX
= resV
.uInt
[0];
558 return (resOSZACP
<< 16) | resECX
;
561 void istri_3A ( void )
564 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_3A
;
565 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_3A
;
567 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
569 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
570 try_istri(wot
,h
,s
, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
571 try_istri(wot
,h
,s
, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
572 try_istri(wot
,h
,s
, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
574 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
575 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
576 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
578 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
579 try_istri(wot
,h
,s
, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
580 try_istri(wot
,h
,s
, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
581 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
583 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
584 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
585 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
587 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
589 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
590 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
591 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
593 try_istri(wot
,h
,s
, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
594 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
595 try_istri(wot
,h
,s
, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
597 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
598 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
599 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
601 try_istri(wot
,h
,s
, "0000000000000000", "aaaaaaaa0aaaaaaa");
602 try_istri(wot
,h
,s
, "8000000000000000", "aaaaaaaa0aaaaaaa");
603 try_istri(wot
,h
,s
, "0000000000000001", "aaaaaaaa0aaaaaaa");
605 try_istri(wot
,h
,s
, "0000000000000000", "aaaaaaaaaaaaaaaa");
606 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "0000000000000000");
611 //////////////////////////////////////////////////////////
615 //////////////////////////////////////////////////////////
617 __attribute__((noinline
))
618 UInt
h_pcmpistri_0C ( V128
* argL
, V128
* argR
)
621 memcpy(&block
[0], argL
, sizeof(V128
));
622 memcpy(&block
[1], argR
, sizeof(V128
));
623 ULong res
= 0, flags
= 0;
624 __asm__
__volatile__(
625 "movdqu 0(%2), %%xmm2" "\n\t"
626 "movdqu 16(%2), %%xmm11" "\n\t"
627 "pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t"
628 //"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t"
629 //"movd %%xmm0, %%ecx" "\n\t"
632 "movq %%rcx, %0" "\n\t"
633 "movq %%rdx, %1" "\n\t"
634 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
635 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
637 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
640 UInt
s_pcmpistri_0C ( V128
* argLU
, V128
* argRU
)
643 UInt resOSZACP
, resECX
;
645 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
646 zmask_from_V128(argLU
),
647 zmask_from_V128(argRU
),
648 0x0C, False
/*!isSTRM*/
651 resECX
= resV
.uInt
[0];
652 return (resOSZACP
<< 16) | resECX
;
655 void istri_0C ( void )
658 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_0C
;
659 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_0C
;
661 try_istri(wot
,h
,s
, "111111111abcde11", "00000000000abcde");
663 try_istri(wot
,h
,s
, "111111111abcde11", "0000abcde00abcde");
665 try_istri(wot
,h
,s
, "1111111111abcde1", "00000000000abcde");
666 try_istri(wot
,h
,s
, "11111111111abcde", "00000000000abcde");
667 try_istri(wot
,h
,s
, "111111111111abcd", "00000000000abcde");
669 try_istri(wot
,h
,s
, "111abcde1abcde11", "00000000000abcde");
671 try_istri(wot
,h
,s
, "11abcde11abcde11", "00000000000abcde");
672 try_istri(wot
,h
,s
, "1abcde111abcde11", "00000000000abcde");
673 try_istri(wot
,h
,s
, "abcde1111abcde11", "00000000000abcde");
674 try_istri(wot
,h
,s
, "bcde11111abcde11", "00000000000abcde");
675 try_istri(wot
,h
,s
, "cde111111abcde11", "00000000000abcde");
677 try_istri(wot
,h
,s
, "01abcde11abcde11", "00000000000abcde");
678 try_istri(wot
,h
,s
, "00abcde11abcde11", "00000000000abcde");
679 try_istri(wot
,h
,s
, "000bcde11abcde11", "00000000000abcde");
681 try_istri(wot
,h
,s
, "00abcde10abcde11", "00000000000abcde");
682 try_istri(wot
,h
,s
, "00abcde100bcde11", "00000000000abcde");
684 try_istri(wot
,h
,s
, "1111111111111234", "0000000000000000");
685 try_istri(wot
,h
,s
, "1111111111111234", "0000000000000001");
686 try_istri(wot
,h
,s
, "1111111111111234", "0000000000000011");
688 try_istri(wot
,h
,s
, "1111111111111234", "1111111111111234");
689 try_istri(wot
,h
,s
, "a111111111111111", "000000000000000a");
690 try_istri(wot
,h
,s
, "b111111111111111", "000000000000000a");
692 try_istri(wot
,h
,s
, "b111111111111111", "0000000000000000");
693 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
694 try_istri(wot
,h
,s
, "123456789abcdef1", "0000000000000000");
695 try_istri(wot
,h
,s
, "0000000000000000", "123456789abcdef1");
699 //////////////////////////////////////////////////////////
703 //////////////////////////////////////////////////////////
705 UInt
h_pcmpistri_08 ( V128
* argL
, V128
* argR
)
708 memcpy(&block
[0], argL
, sizeof(V128
));
709 memcpy(&block
[1], argR
, sizeof(V128
));
711 __asm__
__volatile__(
712 "subq $1024, %%rsp" "\n\t"
713 "movdqu 0(%2), %%xmm2" "\n\t"
714 "movdqu 16(%2), %%xmm11" "\n\t"
715 "pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t"
718 "movq %%rcx, %0" "\n\t"
719 "movq %%rdx, %1" "\n\t"
720 "addq $1024, %%rsp" "\n\t"
721 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
722 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
724 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
727 UInt
s_pcmpistri_08 ( V128
* argLU
, V128
* argRU
)
730 UInt resOSZACP
, resECX
;
732 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
733 zmask_from_V128(argLU
),
734 zmask_from_V128(argRU
),
735 0x08, False
/*!isSTRM*/
738 resECX
= resV
.uInt
[0];
739 return (resOSZACP
<< 16) | resECX
;
742 void istri_08 ( void )
745 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_08
;
746 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_08
;
748 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
750 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
751 try_istri(wot
,h
,s
, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
752 try_istri(wot
,h
,s
, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
753 try_istri(wot
,h
,s
, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
755 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
756 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
757 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
759 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
760 try_istri(wot
,h
,s
, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
761 try_istri(wot
,h
,s
, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
762 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
764 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
765 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
766 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
768 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
770 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
771 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
772 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
774 try_istri(wot
,h
,s
, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
775 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
776 try_istri(wot
,h
,s
, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
778 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
779 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
780 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
782 try_istri(wot
,h
,s
, "0000000000000000", "aaaaaaaa0aaaaaaa");
783 try_istri(wot
,h
,s
, "8000000000000000", "aaaaaaaa0aaaaaaa");
784 try_istri(wot
,h
,s
, "0000000000000001", "aaaaaaaa0aaaaaaa");
786 try_istri(wot
,h
,s
, "0000000000000000", "aaaaaaaaaaaaaaaa");
787 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "0000000000000000");
792 //////////////////////////////////////////////////////////
796 //////////////////////////////////////////////////////////
798 UInt
h_pcmpistri_18 ( V128
* argL
, V128
* argR
)
801 memcpy(&block
[0], argL
, sizeof(V128
));
802 memcpy(&block
[1], argR
, sizeof(V128
));
804 __asm__
__volatile__(
805 "subq $1024, %%rsp" "\n\t"
806 "movdqu 0(%2), %%xmm2" "\n\t"
807 "movdqu 16(%2), %%xmm11" "\n\t"
808 "pcmpistri $0x18, %%xmm2, %%xmm11" "\n\t"
811 "movq %%rcx, %0" "\n\t"
812 "movq %%rdx, %1" "\n\t"
813 "addq $1024, %%rsp" "\n\t"
814 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
815 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
817 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
820 UInt
s_pcmpistri_18 ( V128
* argLU
, V128
* argRU
)
823 UInt resOSZACP
, resECX
;
825 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
826 zmask_from_V128(argLU
),
827 zmask_from_V128(argRU
),
828 0x18, False
/*!isSTRM*/
831 resECX
= resV
.uInt
[0];
832 return (resOSZACP
<< 16) | resECX
;
835 void istri_18 ( void )
838 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_18
;
839 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_18
;
841 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
843 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
844 try_istri(wot
,h
,s
, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
845 try_istri(wot
,h
,s
, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
846 try_istri(wot
,h
,s
, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
848 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
849 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
850 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
852 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
853 try_istri(wot
,h
,s
, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
854 try_istri(wot
,h
,s
, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
855 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
857 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
858 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
859 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
861 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
863 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
864 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
865 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
867 try_istri(wot
,h
,s
, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
868 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
869 try_istri(wot
,h
,s
, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
871 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
872 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
873 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
875 try_istri(wot
,h
,s
, "0000000000000000", "aaaaaaaa0aaaaaaa");
876 try_istri(wot
,h
,s
, "8000000000000000", "aaaaaaaa0aaaaaaa");
877 try_istri(wot
,h
,s
, "0000000000000001", "aaaaaaaa0aaaaaaa");
879 try_istri(wot
,h
,s
, "0000000000000000", "aaaaaaaaaaaaaaaa");
880 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "0000000000000000");
885 //////////////////////////////////////////////////////////
889 //////////////////////////////////////////////////////////
891 UInt
h_pcmpistri_1A ( V128
* argL
, V128
* argR
)
894 memcpy(&block
[0], argL
, sizeof(V128
));
895 memcpy(&block
[1], argR
, sizeof(V128
));
897 __asm__
__volatile__(
898 "subq $1024, %%rsp" "\n\t"
899 "movdqu 0(%2), %%xmm2" "\n\t"
900 "movdqu 16(%2), %%xmm11" "\n\t"
901 "pcmpistri $0x1A, %%xmm2, %%xmm11" "\n\t"
904 "movq %%rcx, %0" "\n\t"
905 "movq %%rdx, %1" "\n\t"
906 "addq $1024, %%rsp" "\n\t"
907 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
908 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
910 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
913 UInt
s_pcmpistri_1A ( V128
* argLU
, V128
* argRU
)
916 UInt resOSZACP
, resECX
;
918 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
919 zmask_from_V128(argLU
),
920 zmask_from_V128(argRU
),
921 0x1A, False
/*!isSTRM*/
924 resECX
= resV
.uInt
[0];
925 return (resOSZACP
<< 16) | resECX
;
928 void istri_1A ( void )
931 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_1A
;
932 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_1A
;
934 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
936 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
937 try_istri(wot
,h
,s
, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
938 try_istri(wot
,h
,s
, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
939 try_istri(wot
,h
,s
, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
941 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
942 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
943 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
945 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
946 try_istri(wot
,h
,s
, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
947 try_istri(wot
,h
,s
, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
948 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
950 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
951 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
952 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
954 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
956 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
957 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
958 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
960 try_istri(wot
,h
,s
, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
961 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
962 try_istri(wot
,h
,s
, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
964 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
965 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
966 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
968 try_istri(wot
,h
,s
, "0000000000000000", "aaaaaaaa0aaaaaaa");
969 try_istri(wot
,h
,s
, "8000000000000000", "aaaaaaaa0aaaaaaa");
970 try_istri(wot
,h
,s
, "0000000000000001", "aaaaaaaa0aaaaaaa");
972 try_istri(wot
,h
,s
, "0000000000000000", "aaaaaaaaaaaaaaaa");
973 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "0000000000000000");
978 //////////////////////////////////////////////////////////
982 //////////////////////////////////////////////////////////
984 UInt
h_pcmpistri_02 ( V128
* argL
, V128
* argR
)
987 memcpy(&block
[0], argL
, sizeof(V128
));
988 memcpy(&block
[1], argR
, sizeof(V128
));
990 __asm__
__volatile__(
991 "subq $1024, %%rsp" "\n\t"
992 "movdqu 0(%2), %%xmm2" "\n\t"
993 "movdqu 16(%2), %%xmm11" "\n\t"
994 "pcmpistri $0x02, %%xmm2, %%xmm11" "\n\t"
995 //"pcmpistrm $0x02, %%xmm2, %%xmm11" "\n\t"
996 //"movd %%xmm0, %%ecx" "\n\t"
999 "movq %%rcx, %0" "\n\t"
1000 "movq %%rdx, %1" "\n\t"
1001 "addq $1024, %%rsp" "\n\t"
1002 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
1003 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1005 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
1008 UInt
s_pcmpistri_02 ( V128
* argLU
, V128
* argRU
)
1011 UInt resOSZACP
, resECX
;
1013 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
1014 zmask_from_V128(argLU
),
1015 zmask_from_V128(argRU
),
1016 0x02, False
/*!isSTRM*/
1019 resECX
= resV
.uInt
[0];
1020 return (resOSZACP
<< 16) | resECX
;
1023 void istri_02 ( void )
1026 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_02
;
1027 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_02
;
1029 try_istri(wot
,h
,s
, "abcdacbdabcdabcd", "000000000000000a");
1030 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000000b");
1031 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "00000000000000ab");
1032 try_istri(wot
,h
,s
, "abcdabc0abcdabcd", "000000000000abcd");
1034 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
1035 try_istri(wot
,h
,s
, "0bcdabcdabcdabcd", "000000000000abcd");
1036 try_istri(wot
,h
,s
, "abcdabcdabcda0cd", "000000000000abcd");
1037 try_istri(wot
,h
,s
, "abcdabcdabcdab0d", "000000000000abcd");
1038 try_istri(wot
,h
,s
, "abcdabcdabcdabc0", "000000000000abcd");
1040 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
1041 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000a0cd");
1042 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000ab0d");
1043 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abc0");
1045 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
1046 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1048 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000abcd");
1049 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000dcba");
1050 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000bbbb");
1051 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000baba");
1053 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "00000000000baba0");
1055 try_istri(wot
,h
,s
, "0ddc0ffeebadf00d", "00000000cafebabe");
1056 try_istri(wot
,h
,s
, "0ddc0ffeebadfeed", "00000000cafebabe");
1060 //////////////////////////////////////////////////////////
1064 //////////////////////////////////////////////////////////
1066 UInt
h_pcmpistri_12 ( V128
* argL
, V128
* argR
)
1069 memcpy(&block
[0], argL
, sizeof(V128
));
1070 memcpy(&block
[1], argR
, sizeof(V128
));
1072 __asm__
__volatile__(
1073 "subq $1024, %%rsp" "\n\t"
1074 "movdqu 0(%2), %%xmm2" "\n\t"
1075 "movdqu 16(%2), %%xmm11" "\n\t"
1076 "pcmpistri $0x12, %%xmm2, %%xmm11" "\n\t"
1077 //"pcmpistrm $0x12, %%xmm2, %%xmm11" "\n\t"
1078 //"movd %%xmm0, %%ecx" "\n\t"
1081 "movq %%rcx, %0" "\n\t"
1082 "movq %%rdx, %1" "\n\t"
1083 "addq $1024, %%rsp" "\n\t"
1084 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
1085 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1087 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
1090 UInt
s_pcmpistri_12 ( V128
* argLU
, V128
* argRU
)
1093 UInt resOSZACP
, resECX
;
1095 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
1096 zmask_from_V128(argLU
),
1097 zmask_from_V128(argRU
),
1098 0x12, False
/*!isSTRM*/
1101 resECX
= resV
.uInt
[0];
1102 return (resOSZACP
<< 16) | resECX
;
1105 void istri_12 ( void )
1108 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_12
;
1109 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_12
;
1111 try_istri(wot
,h
,s
, "abcdacbdabcdabcd", "000000000000000a");
1112 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000000b");
1113 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "00000000000000ab");
1114 try_istri(wot
,h
,s
, "abcdabc0abcdabcd", "000000000000abcd");
1116 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
1117 try_istri(wot
,h
,s
, "0bcdabcdabcdabcd", "000000000000abcd");
1118 try_istri(wot
,h
,s
, "abcdabcdabcda0cd", "000000000000abcd");
1119 try_istri(wot
,h
,s
, "abcdabcdabcdab0d", "000000000000abcd");
1120 try_istri(wot
,h
,s
, "abcdabcdabcdabc0", "000000000000abcd");
1122 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
1123 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000a0cd");
1124 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000ab0d");
1125 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abc0");
1127 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
1128 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1130 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000abcd");
1131 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000dcba");
1132 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000bbbb");
1133 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000baba");
1135 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "00000000000baba0");
1137 try_istri(wot
,h
,s
, "0ddc0ffeebadf00d", "00000000cafebabe");
1138 try_istri(wot
,h
,s
, "0ddc0ffeebadfeed", "00000000cafebabe");
1143 //////////////////////////////////////////////////////////
1147 //////////////////////////////////////////////////////////
1149 UInt
h_pcmpistri_44 ( V128
* argL
, V128
* argR
)
1152 memcpy(&block
[0], argL
, sizeof(V128
));
1153 memcpy(&block
[1], argR
, sizeof(V128
));
1155 __asm__
__volatile__(
1156 "subq $1024, %%rsp" "\n\t"
1157 "movdqu 0(%2), %%xmm2" "\n\t"
1158 "movdqu 16(%2), %%xmm11" "\n\t"
1159 "pcmpistri $0x44, %%xmm2, %%xmm11" "\n\t"
1160 //"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t"
1161 //"movd %%xmm0, %%ecx" "\n\t"
1164 "movq %%rcx, %0" "\n\t"
1165 "movq %%rdx, %1" "\n\t"
1166 "addq $1024, %%rsp" "\n\t"
1167 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
1168 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1170 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
1173 UInt
s_pcmpistri_44 ( V128
* argLU
, V128
* argRU
)
1176 UInt resOSZACP
, resECX
;
1178 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
1179 zmask_from_V128(argLU
),
1180 zmask_from_V128(argRU
),
1181 0x44, False
/*!isSTRM*/
1184 resECX
= resV
.uInt
[0];
1185 return (resOSZACP
<< 16) | resECX
;
1188 void istri_44 ( void )
1191 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_44
;
1192 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_44
;
1194 try_istri(wot
,h
,s
, "aaaabbbbccccdddd", "00000000000000bc");
1195 try_istri(wot
,h
,s
, "aaaabbbbccccdddd", "00000000000000cb");
1196 try_istri(wot
,h
,s
, "baaabbbbccccdddd", "00000000000000cb");
1197 try_istri(wot
,h
,s
, "baaabbbbccccdddc", "00000000000000cb");
1199 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1200 try_istri(wot
,h
,s
, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1201 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1202 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1203 try_istri(wot
,h
,s
, "0000000000000000", "00000000000000cb");
1205 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
1207 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1208 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbbb", "000000000000000b");
1209 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1211 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1212 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1213 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "000000000000000b");
1215 try_istri(wot
,h
,s
, "0123456789abcdef", "000000fecb975421");
1216 try_istri(wot
,h
,s
, "123456789abcdef1", "000000fecb975421");
1218 try_istri(wot
,h
,s
, "0123456789abcdef", "00000000dca86532");
1219 try_istri(wot
,h
,s
, "123456789abcdef1", "00000000dca86532");
1221 try_istri(wot
,h
,s
, "163887ec041a9b72", "fcd75adb9b3e895a");
1222 try_istri(wot
,h
,s
, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1223 try_istri(wot
,h
,s
, "2ca34182c29a82ab", "302ebd646775ab54");
1224 try_istri(wot
,h
,s
, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1228 //////////////////////////////////////////////////////////
1232 //////////////////////////////////////////////////////////
1234 UInt
h_pcmpistri_00 ( V128
* argL
, V128
* argR
)
1237 memcpy(&block
[0], argL
, sizeof(V128
));
1238 memcpy(&block
[1], argR
, sizeof(V128
));
1240 __asm__
__volatile__(
1241 "subq $1024, %%rsp" "\n\t"
1242 "movdqu 0(%2), %%xmm2" "\n\t"
1243 "movdqu 16(%2), %%xmm11" "\n\t"
1244 "pcmpistri $0x00, %%xmm2, %%xmm11" "\n\t"
1245 //"pcmpistrm $0x00, %%xmm2, %%xmm11" "\n\t"
1246 //"movd %%xmm0, %%ecx" "\n\t"
1249 "movq %%rcx, %0" "\n\t"
1250 "movq %%rdx, %1" "\n\t"
1251 "addq $1024, %%rsp" "\n\t"
1252 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
1253 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1255 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
1258 UInt
s_pcmpistri_00 ( V128
* argLU
, V128
* argRU
)
1261 UInt resOSZACP
, resECX
;
1263 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
1264 zmask_from_V128(argLU
),
1265 zmask_from_V128(argRU
),
1266 0x00, False
/*!isSTRM*/
1269 resECX
= resV
.uInt
[0];
1270 return (resOSZACP
<< 16) | resECX
;
1273 void istri_00 ( void )
1276 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_00
;
1277 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_00
;
1279 try_istri(wot
,h
,s
, "abcdacbdabcdabcd", "000000000000000a");
1280 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000000b");
1281 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "00000000000000ab");
1282 try_istri(wot
,h
,s
, "abcdabc0abcdabcd", "000000000000abcd");
1284 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
1285 try_istri(wot
,h
,s
, "0bcdabcdabcdabcd", "000000000000abcd");
1286 try_istri(wot
,h
,s
, "abcdabcdabcda0cd", "000000000000abcd");
1287 try_istri(wot
,h
,s
, "abcdabcdabcdab0d", "000000000000abcd");
1288 try_istri(wot
,h
,s
, "abcdabcdabcdabc0", "000000000000abcd");
1290 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
1291 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000a0cd");
1292 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000ab0d");
1293 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abc0");
1295 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
1296 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1298 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000abcd");
1299 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000dcba");
1300 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000bbbb");
1301 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000baba");
1303 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "00000000000baba0");
1305 try_istri(wot
,h
,s
, "0ddc0ffeebadf00d", "00000000cafebabe");
1306 try_istri(wot
,h
,s
, "0ddc0ffeebadfeed", "00000000cafebabe");
1310 //////////////////////////////////////////////////////////
1314 //////////////////////////////////////////////////////////
1316 UInt
h_pcmpistri_38 ( V128
* argL
, V128
* argR
)
1319 memcpy(&block
[0], argL
, sizeof(V128
));
1320 memcpy(&block
[1], argR
, sizeof(V128
));
1322 __asm__
__volatile__(
1323 "subq $1024, %%rsp" "\n\t"
1324 "movdqu 0(%2), %%xmm2" "\n\t"
1325 "movdqu 16(%2), %%xmm11" "\n\t"
1326 "pcmpistri $0x38, %%xmm2, %%xmm11" "\n\t"
1329 "movq %%rcx, %0" "\n\t"
1330 "movq %%rdx, %1" "\n\t"
1331 "addq $1024, %%rsp" "\n\t"
1332 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
1333 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1335 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
1338 UInt
s_pcmpistri_38 ( V128
* argLU
, V128
* argRU
)
1341 UInt resOSZACP
, resECX
;
1343 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
1344 zmask_from_V128(argLU
),
1345 zmask_from_V128(argRU
),
1346 0x38, False
/*!isSTRM*/
1349 resECX
= resV
.uInt
[0];
1350 return (resOSZACP
<< 16) | resECX
;
1353 void istri_38 ( void )
1356 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_38
;
1357 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_38
;
1359 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
1361 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1362 try_istri(wot
,h
,s
, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1363 try_istri(wot
,h
,s
, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
1364 try_istri(wot
,h
,s
, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
1366 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
1367 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
1368 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
1370 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1371 try_istri(wot
,h
,s
, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1372 try_istri(wot
,h
,s
, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1373 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1375 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1376 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
1377 try_istri(wot
,h
,s
, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
1379 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1381 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1382 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1383 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
1385 try_istri(wot
,h
,s
, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
1386 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1387 try_istri(wot
,h
,s
, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
1389 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1390 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
1391 try_istri(wot
,h
,s
, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
1393 try_istri(wot
,h
,s
, "0000000000000000", "aaaaaaaa0aaaaaaa");
1394 try_istri(wot
,h
,s
, "8000000000000000", "aaaaaaaa0aaaaaaa");
1395 try_istri(wot
,h
,s
, "0000000000000001", "aaaaaaaa0aaaaaaa");
1397 try_istri(wot
,h
,s
, "0000000000000000", "aaaaaaaaaaaaaaaa");
1398 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "0000000000000000");
1403 //////////////////////////////////////////////////////////
1407 //////////////////////////////////////////////////////////
1409 UInt
h_pcmpistri_46 ( V128
* argL
, V128
* argR
)
1412 memcpy(&block
[0], argL
, sizeof(V128
));
1413 memcpy(&block
[1], argR
, sizeof(V128
));
1415 __asm__
__volatile__(
1416 "subq $1024, %%rsp" "\n\t"
1417 "movdqu 0(%2), %%xmm2" "\n\t"
1418 "movdqu 16(%2), %%xmm11" "\n\t"
1419 "pcmpistri $0x46, %%xmm2, %%xmm11" "\n\t"
1422 "movq %%rcx, %0" "\n\t"
1423 "movq %%rdx, %1" "\n\t"
1424 "addq $1024, %%rsp" "\n\t"
1425 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
1426 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1428 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
1431 UInt
s_pcmpistri_46 ( V128
* argLU
, V128
* argRU
)
1434 UInt resOSZACP
, resECX
;
1436 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
1437 zmask_from_V128(argLU
),
1438 zmask_from_V128(argRU
),
1439 0x46, False
/*!isSTRM*/
1442 resECX
= resV
.uInt
[0];
1443 return (resOSZACP
<< 16) | resECX
;
1446 void istri_46 ( void )
1449 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_46
;
1450 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_46
;
1452 try_istri(wot
,h
,s
, "aaaabbbbccccdddd", "00000000000000bc");
1453 try_istri(wot
,h
,s
, "aaaabbbbccccdddd", "00000000000000cb");
1454 try_istri(wot
,h
,s
, "baaabbbbccccdddd", "00000000000000cb");
1455 try_istri(wot
,h
,s
, "baaabbbbccccdddc", "00000000000000cb");
1457 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1458 try_istri(wot
,h
,s
, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1459 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1460 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1461 try_istri(wot
,h
,s
, "0000000000000000", "00000000000000cb");
1463 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
1465 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1466 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbbb", "000000000000000b");
1467 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1469 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1470 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1471 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "000000000000000b");
1473 try_istri(wot
,h
,s
, "0123456789abcdef", "000000fecb975421");
1474 try_istri(wot
,h
,s
, "123456789abcdef1", "000000fecb975421");
1476 try_istri(wot
,h
,s
, "0123456789abcdef", "00000000dca86532");
1477 try_istri(wot
,h
,s
, "123456789abcdef1", "00000000dca86532");
1479 try_istri(wot
,h
,s
, "163887ec041a9b72", "fcd75adb9b3e895a");
1480 try_istri(wot
,h
,s
, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1481 try_istri(wot
,h
,s
, "2ca34182c29a82ab", "302ebd646775ab54");
1482 try_istri(wot
,h
,s
, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1486 //////////////////////////////////////////////////////////
1490 //////////////////////////////////////////////////////////
1492 UInt
h_pcmpistri_30 ( V128
* argL
, V128
* argR
)
1495 memcpy(&block
[0], argL
, sizeof(V128
));
1496 memcpy(&block
[1], argR
, sizeof(V128
));
1498 __asm__
__volatile__(
1499 "subq $1024, %%rsp" "\n\t"
1500 "movdqu 0(%2), %%xmm2" "\n\t"
1501 "movdqu 16(%2), %%xmm11" "\n\t"
1502 "pcmpistri $0x30, %%xmm2, %%xmm11" "\n\t"
1505 "movq %%rcx, %0" "\n\t"
1506 "movq %%rdx, %1" "\n\t"
1507 "addq $1024, %%rsp" "\n\t"
1508 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
1509 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1511 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
1514 UInt
s_pcmpistri_30 ( V128
* argLU
, V128
* argRU
)
1517 UInt resOSZACP
, resECX
;
1519 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
1520 zmask_from_V128(argLU
),
1521 zmask_from_V128(argRU
),
1522 0x30, False
/*!isSTRM*/
1525 resECX
= resV
.uInt
[0];
1526 return (resOSZACP
<< 16) | resECX
;
1529 void istri_30 ( void )
1532 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_30
;
1533 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_30
;
1535 try_istri(wot
,h
,s
, "abcdacbdabcdabcd", "000000000000000a");
1536 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000000b");
1537 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "00000000000000ab");
1538 try_istri(wot
,h
,s
, "abcdabc0abcdabcd", "000000000000abcd");
1540 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
1541 try_istri(wot
,h
,s
, "0bcdabcdabcdabcd", "000000000000abcd");
1542 try_istri(wot
,h
,s
, "abcdabcdabcda0cd", "000000000000abcd");
1543 try_istri(wot
,h
,s
, "abcdabcdabcdab0d", "000000000000abcd");
1544 try_istri(wot
,h
,s
, "abcdabcdabcdabc0", "000000000000abcd");
1546 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
1547 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000a0cd");
1548 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000ab0d");
1549 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abc0");
1551 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
1552 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1554 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000abcd");
1555 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000dcba");
1556 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000bbbb");
1557 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000baba");
1559 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "00000000000baba0");
1561 try_istri(wot
,h
,s
, "0ddc0ffeebadf00d", "00000000cafebabe");
1562 try_istri(wot
,h
,s
, "0ddc0ffeebadfeed", "00000000cafebabe");
1566 //////////////////////////////////////////////////////////
1570 //////////////////////////////////////////////////////////
1572 UInt
h_pcmpistri_40 ( V128
* argL
, V128
* argR
)
1575 memcpy(&block
[0], argL
, sizeof(V128
));
1576 memcpy(&block
[1], argR
, sizeof(V128
));
1578 __asm__
__volatile__(
1579 "subq $1024, %%rsp" "\n\t"
1580 "movdqu 0(%2), %%xmm2" "\n\t"
1581 "movdqu 16(%2), %%xmm11" "\n\t"
1582 "pcmpistri $0x40, %%xmm2, %%xmm11" "\n\t"
1585 "movq %%rcx, %0" "\n\t"
1586 "movq %%rdx, %1" "\n\t"
1587 "addq $1024, %%rsp" "\n\t"
1588 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
1589 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1591 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
1594 UInt
s_pcmpistri_40 ( V128
* argLU
, V128
* argRU
)
1597 UInt resOSZACP
, resECX
;
1599 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
1600 zmask_from_V128(argLU
),
1601 zmask_from_V128(argRU
),
1602 0x40, False
/*!isSTRM*/
1605 resECX
= resV
.uInt
[0];
1606 return (resOSZACP
<< 16) | resECX
;
1609 void istri_40 ( void )
1612 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_40
;
1613 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_40
;
1615 try_istri(wot
,h
,s
, "abcdacbdabcdabcd", "000000000000000a");
1616 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000000b");
1617 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "00000000000000ab");
1618 try_istri(wot
,h
,s
, "abcdabc0abcdabcd", "000000000000abcd");
1620 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
1621 try_istri(wot
,h
,s
, "0bcdabcdabcdabcd", "000000000000abcd");
1622 try_istri(wot
,h
,s
, "abcdabcdabcda0cd", "000000000000abcd");
1623 try_istri(wot
,h
,s
, "abcdabcdabcdab0d", "000000000000abcd");
1624 try_istri(wot
,h
,s
, "abcdabcdabcdabc0", "000000000000abcd");
1626 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
1627 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000a0cd");
1628 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000ab0d");
1629 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abc0");
1631 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
1632 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1634 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000abcd");
1635 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000dcba");
1636 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000bbbb");
1637 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000baba");
1639 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "00000000000baba0");
1641 try_istri(wot
,h
,s
, "0ddc0ffeebadf00d", "00000000cafebabe");
1642 try_istri(wot
,h
,s
, "0ddc0ffeebadfeed", "00000000cafebabe");
1646 //////////////////////////////////////////////////////////
1650 //////////////////////////////////////////////////////////
1652 UInt
h_pcmpistri_42 ( V128
* argL
, V128
* argR
)
1655 memcpy(&block
[0], argL
, sizeof(V128
));
1656 memcpy(&block
[1], argR
, sizeof(V128
));
1658 __asm__
__volatile__(
1659 "subq $1024, %%rsp" "\n\t"
1660 "movdqu 0(%2), %%xmm2" "\n\t"
1661 "movdqu 16(%2), %%xmm11" "\n\t"
1662 "pcmpistri $0x42, %%xmm2, %%xmm11" "\n\t"
1665 "movq %%rcx, %0" "\n\t"
1666 "movq %%rdx, %1" "\n\t"
1667 "addq $1024, %%rsp" "\n\t"
1668 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
1669 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1671 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
1674 UInt
s_pcmpistri_42 ( V128
* argLU
, V128
* argRU
)
1677 UInt resOSZACP
, resECX
;
1679 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
1680 zmask_from_V128(argLU
),
1681 zmask_from_V128(argRU
),
1682 0x42, False
/*!isSTRM*/
1685 resECX
= resV
.uInt
[0];
1686 return (resOSZACP
<< 16) | resECX
;
1689 void istri_42 ( void )
1692 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_42
;
1693 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_42
;
1695 try_istri(wot
,h
,s
, "abcdacbdabcdabcd", "000000000000000a");
1696 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000000b");
1697 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "00000000000000ab");
1698 try_istri(wot
,h
,s
, "abcdabc0abcdabcd", "000000000000abcd");
1700 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
1701 try_istri(wot
,h
,s
, "0bcdabcdabcdabcd", "000000000000abcd");
1702 try_istri(wot
,h
,s
, "abcdabcdabcda0cd", "000000000000abcd");
1703 try_istri(wot
,h
,s
, "abcdabcdabcdab0d", "000000000000abcd");
1704 try_istri(wot
,h
,s
, "abcdabcdabcdabc0", "000000000000abcd");
1706 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
1707 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000a0cd");
1708 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000ab0d");
1709 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abc0");
1711 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
1712 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1714 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000abcd");
1715 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000dcba");
1716 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000bbbb");
1717 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000baba");
1719 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "00000000000baba0");
1721 try_istri(wot
,h
,s
, "0ddc0ffeebadf00d", "00000000cafebabe");
1722 try_istri(wot
,h
,s
, "0ddc0ffeebadfeed", "00000000cafebabe");
1726 //////////////////////////////////////////////////////////
1730 //////////////////////////////////////////////////////////
1732 __attribute__((noinline
))
1733 UInt
h_pcmpistri_0E ( V128
* argL
, V128
* argR
)
1736 memcpy(&block
[0], argL
, sizeof(V128
));
1737 memcpy(&block
[1], argR
, sizeof(V128
));
1738 ULong res
= 0, flags
= 0;
1739 __asm__
__volatile__(
1740 "movdqu 0(%2), %%xmm2" "\n\t"
1741 "movdqu 16(%2), %%xmm11" "\n\t"
1742 "pcmpistri $0x0E, %%xmm2, %%xmm11" "\n\t"
1745 "movq %%rcx, %0" "\n\t"
1746 "movq %%rdx, %1" "\n\t"
1747 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
1748 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1750 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
1753 UInt
s_pcmpistri_0E ( V128
* argLU
, V128
* argRU
)
1756 UInt resOSZACP
, resECX
;
1758 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
1759 zmask_from_V128(argLU
),
1760 zmask_from_V128(argRU
),
1761 0x0E, False
/*!isSTRM*/
1764 resECX
= resV
.uInt
[0];
1765 return (resOSZACP
<< 16) | resECX
;
1768 void istri_0E ( void )
1771 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_0E
;
1772 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_0E
;
1774 try_istri(wot
,h
,s
, "111111111abcde11", "00000000000abcde");
1776 try_istri(wot
,h
,s
, "111111111abcde11", "0000abcde00abcde");
1778 try_istri(wot
,h
,s
, "1111111111abcde1", "00000000000abcde");
1779 try_istri(wot
,h
,s
, "11111111111abcde", "00000000000abcde");
1780 try_istri(wot
,h
,s
, "111111111111abcd", "00000000000abcde");
1782 try_istri(wot
,h
,s
, "111abcde1abcde11", "00000000000abcde");
1784 try_istri(wot
,h
,s
, "11abcde11abcde11", "00000000000abcde");
1785 try_istri(wot
,h
,s
, "1abcde111abcde11", "00000000000abcde");
1786 try_istri(wot
,h
,s
, "abcde1111abcde11", "00000000000abcde");
1787 try_istri(wot
,h
,s
, "bcde11111abcde11", "00000000000abcde");
1788 try_istri(wot
,h
,s
, "cde111111abcde11", "00000000000abcde");
1790 try_istri(wot
,h
,s
, "01abcde11abcde11", "00000000000abcde");
1791 try_istri(wot
,h
,s
, "00abcde11abcde11", "00000000000abcde");
1792 try_istri(wot
,h
,s
, "000bcde11abcde11", "00000000000abcde");
1794 try_istri(wot
,h
,s
, "00abcde10abcde11", "00000000000abcde");
1795 try_istri(wot
,h
,s
, "00abcde100bcde11", "00000000000abcde");
1797 try_istri(wot
,h
,s
, "1111111111111234", "0000000000000000");
1798 try_istri(wot
,h
,s
, "1111111111111234", "0000000000000001");
1799 try_istri(wot
,h
,s
, "1111111111111234", "0000000000000011");
1801 try_istri(wot
,h
,s
, "1111111111111234", "1111111111111234");
1802 try_istri(wot
,h
,s
, "a111111111111111", "000000000000000a");
1803 try_istri(wot
,h
,s
, "b111111111111111", "000000000000000a");
1805 try_istri(wot
,h
,s
, "b111111111111111", "0000000000000000");
1806 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
1807 try_istri(wot
,h
,s
, "123456789abcdef1", "0000000000000000");
1808 try_istri(wot
,h
,s
, "0000000000000000", "123456789abcdef1");
1812 //////////////////////////////////////////////////////////
1816 //////////////////////////////////////////////////////////
1818 UInt
h_pcmpistri_34 ( V128
* argL
, V128
* argR
)
1821 memcpy(&block
[0], argL
, sizeof(V128
));
1822 memcpy(&block
[1], argR
, sizeof(V128
));
1824 __asm__
__volatile__(
1825 "subq $1024, %%rsp" "\n\t"
1826 "movdqu 0(%2), %%xmm2" "\n\t"
1827 "movdqu 16(%2), %%xmm11" "\n\t"
1828 "pcmpistri $0x34, %%xmm2, %%xmm11" "\n\t"
1831 "movq %%rcx, %0" "\n\t"
1832 "movq %%rdx, %1" "\n\t"
1833 "addq $1024, %%rsp" "\n\t"
1834 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
1835 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1837 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
1840 UInt
s_pcmpistri_34 ( V128
* argLU
, V128
* argRU
)
1843 UInt resOSZACP
, resECX
;
1845 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
1846 zmask_from_V128(argLU
),
1847 zmask_from_V128(argRU
),
1848 0x34, False
/*!isSTRM*/
1851 resECX
= resV
.uInt
[0];
1852 return (resOSZACP
<< 16) | resECX
;
1855 void istri_34 ( void )
1858 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_34
;
1859 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_34
;
1861 try_istri(wot
,h
,s
, "aaaabbbbccccdddd", "00000000000000bc");
1862 try_istri(wot
,h
,s
, "aaaabbbbccccdddd", "00000000000000cb");
1863 try_istri(wot
,h
,s
, "baaabbbbccccdddd", "00000000000000cb");
1864 try_istri(wot
,h
,s
, "baaabbbbccccdddc", "00000000000000cb");
1866 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1867 try_istri(wot
,h
,s
, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1868 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1869 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1870 try_istri(wot
,h
,s
, "0000000000000000", "00000000000000cb");
1872 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
1874 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1875 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbbb", "000000000000000b");
1876 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1878 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1879 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1880 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "000000000000000b");
1882 try_istri(wot
,h
,s
, "0123456789abcdef", "000000fecb975421");
1883 try_istri(wot
,h
,s
, "123456789abcdef1", "000000fecb975421");
1885 try_istri(wot
,h
,s
, "0123456789abcdef", "00000000dca86532");
1886 try_istri(wot
,h
,s
, "123456789abcdef1", "00000000dca86532");
1888 try_istri(wot
,h
,s
, "163887ec041a9b72", "fcd75adb9b3e895a");
1889 try_istri(wot
,h
,s
, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1890 try_istri(wot
,h
,s
, "2ca34182c29a82ab", "302ebd646775ab54");
1891 try_istri(wot
,h
,s
, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1895 //////////////////////////////////////////////////////////
1899 //////////////////////////////////////////////////////////
1901 UInt
h_pcmpistri_14 ( V128
* argL
, V128
* argR
)
1904 memcpy(&block
[0], argL
, sizeof(V128
));
1905 memcpy(&block
[1], argR
, sizeof(V128
));
1907 __asm__
__volatile__(
1908 "subq $1024, %%rsp" "\n\t"
1909 "movdqu 0(%2), %%xmm2" "\n\t"
1910 "movdqu 16(%2), %%xmm11" "\n\t"
1911 "pcmpistri $0x14, %%xmm2, %%xmm11" "\n\t"
1914 "movq %%rcx, %0" "\n\t"
1915 "movq %%rdx, %1" "\n\t"
1916 "addq $1024, %%rsp" "\n\t"
1917 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
1918 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1920 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
1923 UInt
s_pcmpistri_14 ( V128
* argLU
, V128
* argRU
)
1926 UInt resOSZACP
, resECX
;
1928 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
1929 zmask_from_V128(argLU
),
1930 zmask_from_V128(argRU
),
1931 0x14, False
/*!isSTRM*/
1934 resECX
= resV
.uInt
[0];
1935 return (resOSZACP
<< 16) | resECX
;
1938 void istri_14 ( void )
1941 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_14
;
1942 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_14
;
1944 try_istri(wot
,h
,s
, "aaaabbbbccccdddd", "00000000000000bc");
1945 try_istri(wot
,h
,s
, "aaaabbbbccccdddd", "00000000000000cb");
1946 try_istri(wot
,h
,s
, "baaabbbbccccdddd", "00000000000000cb");
1947 try_istri(wot
,h
,s
, "baaabbbbccccdddc", "00000000000000cb");
1949 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1950 try_istri(wot
,h
,s
, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1951 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1952 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1953 try_istri(wot
,h
,s
, "0000000000000000", "00000000000000cb");
1955 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
1957 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1958 try_istri(wot
,h
,s
, "bbbbbbbbbbbbbbbb", "000000000000000b");
1959 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1961 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1962 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1963 try_istri(wot
,h
,s
, "b4b4b4b4b4b4b4b4", "000000000000000b");
1965 try_istri(wot
,h
,s
, "0123456789abcdef", "000000fecb975421");
1966 try_istri(wot
,h
,s
, "123456789abcdef1", "000000fecb975421");
1968 try_istri(wot
,h
,s
, "0123456789abcdef", "00000000dca86532");
1969 try_istri(wot
,h
,s
, "123456789abcdef1", "00000000dca86532");
1971 try_istri(wot
,h
,s
, "163887ec041a9b72", "fcd75adb9b3e895a");
1972 try_istri(wot
,h
,s
, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1973 try_istri(wot
,h
,s
, "2ca34182c29a82ab", "302ebd646775ab54");
1974 try_istri(wot
,h
,s
, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1978 //////////////////////////////////////////////////////////
1982 //////////////////////////////////////////////////////////
1984 UInt
h_pcmpistri_70 ( V128
* argL
, V128
* argR
)
1987 memcpy(&block
[0], argL
, sizeof(V128
));
1988 memcpy(&block
[1], argR
, sizeof(V128
));
1990 __asm__
__volatile__(
1991 "subq $1024, %%rsp" "\n\t"
1992 "movdqu 0(%2), %%xmm2" "\n\t"
1993 "movdqu 16(%2), %%xmm11" "\n\t"
1994 "pcmpistri $0x70, %%xmm2, %%xmm11" "\n\t"
1997 "movq %%rcx, %0" "\n\t"
1998 "movq %%rdx, %1" "\n\t"
1999 "addq $1024, %%rsp" "\n\t"
2000 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
2001 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2003 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
2006 UInt
s_pcmpistri_70 ( V128
* argLU
, V128
* argRU
)
2009 UInt resOSZACP
, resECX
;
2011 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
2012 zmask_from_V128(argLU
),
2013 zmask_from_V128(argRU
),
2014 0x70, False
/*!isSTRM*/
2017 resECX
= resV
.uInt
[0];
2018 return (resOSZACP
<< 16) | resECX
;
2021 void istri_70 ( void )
2024 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_70
;
2025 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_70
;
2027 try_istri(wot
,h
,s
, "abcdacbdabcdabcd", "000000000000000a");
2028 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000000b");
2029 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "00000000000000ab");
2030 try_istri(wot
,h
,s
, "abcdabc0abcdabcd", "000000000000abcd");
2032 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
2033 try_istri(wot
,h
,s
, "0bcdabcdabcdabcd", "000000000000abcd");
2034 try_istri(wot
,h
,s
, "abcdabcdabcda0cd", "000000000000abcd");
2035 try_istri(wot
,h
,s
, "abcdabcdabcdab0d", "000000000000abcd");
2036 try_istri(wot
,h
,s
, "abcdabcdabcdabc0", "000000000000abcd");
2038 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
2039 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000a0cd");
2040 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000ab0d");
2041 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abc0");
2043 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
2044 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2046 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000abcd");
2047 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000dcba");
2048 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000bbbb");
2049 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000baba");
2051 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "00000000000baba0");
2053 try_istri(wot
,h
,s
, "0ddc0ffeebadf00d", "00000000cafebabe");
2054 try_istri(wot
,h
,s
, "0ddc0ffeebadfeed", "00000000cafebabe");
2058 //////////////////////////////////////////////////////////
2062 //////////////////////////////////////////////////////////
2064 UInt
h_pcmpistri_62 ( V128
* argL
, V128
* argR
)
2067 memcpy(&block
[0], argL
, sizeof(V128
));
2068 memcpy(&block
[1], argR
, sizeof(V128
));
2070 __asm__
__volatile__(
2071 "subq $1024, %%rsp" "\n\t"
2072 "movdqu 0(%2), %%xmm2" "\n\t"
2073 "movdqu 16(%2), %%xmm11" "\n\t"
2074 "pcmpistri $0x62, %%xmm2, %%xmm11" "\n\t"
2077 "movq %%rcx, %0" "\n\t"
2078 "movq %%rdx, %1" "\n\t"
2079 "addq $1024, %%rsp" "\n\t"
2080 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
2081 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2083 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
2086 UInt
s_pcmpistri_62 ( V128
* argLU
, V128
* argRU
)
2089 UInt resOSZACP
, resECX
;
2091 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
2092 zmask_from_V128(argLU
),
2093 zmask_from_V128(argRU
),
2094 0x62, False
/*!isSTRM*/
2097 resECX
= resV
.uInt
[0];
2098 return (resOSZACP
<< 16) | resECX
;
2101 void istri_62 ( void )
2104 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_62
;
2105 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_62
;
2107 try_istri(wot
,h
,s
, "abcdacbdabcdabcd", "000000000000000a");
2108 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000000b");
2109 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "00000000000000ab");
2110 try_istri(wot
,h
,s
, "abcdabc0abcdabcd", "000000000000abcd");
2112 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
2113 try_istri(wot
,h
,s
, "0bcdabcdabcdabcd", "000000000000abcd");
2114 try_istri(wot
,h
,s
, "abcdabcdabcda0cd", "000000000000abcd");
2115 try_istri(wot
,h
,s
, "abcdabcdabcdab0d", "000000000000abcd");
2116 try_istri(wot
,h
,s
, "abcdabcdabcdabc0", "000000000000abcd");
2118 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
2119 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000a0cd");
2120 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000ab0d");
2121 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abc0");
2123 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
2124 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2126 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000abcd");
2127 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000dcba");
2128 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000bbbb");
2129 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000baba");
2131 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "00000000000baba0");
2133 try_istri(wot
,h
,s
, "0ddc0ffeebadf00d", "00000000cafebabe");
2134 try_istri(wot
,h
,s
, "0ddc0ffeebadfeed", "00000000cafebabe");
2138 //////////////////////////////////////////////////////////
2142 //////////////////////////////////////////////////////////
2144 UInt
h_pcmpistri_72 ( V128
* argL
, V128
* argR
)
2147 memcpy(&block
[0], argL
, sizeof(V128
));
2148 memcpy(&block
[1], argR
, sizeof(V128
));
2150 __asm__
__volatile__(
2151 "subq $1024, %%rsp" "\n\t"
2152 "movdqu 0(%2), %%xmm2" "\n\t"
2153 "movdqu 16(%2), %%xmm11" "\n\t"
2154 "pcmpistri $0x72, %%xmm2, %%xmm11" "\n\t"
2157 "movq %%rcx, %0" "\n\t"
2158 "movq %%rdx, %1" "\n\t"
2159 "addq $1024, %%rsp" "\n\t"
2160 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
2161 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2163 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
2166 UInt
s_pcmpistri_72 ( V128
* argLU
, V128
* argRU
)
2169 UInt resOSZACP
, resECX
;
2171 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
2172 zmask_from_V128(argLU
),
2173 zmask_from_V128(argRU
),
2174 0x72, False
/*!isSTRM*/
2177 resECX
= resV
.uInt
[0];
2178 return (resOSZACP
<< 16) | resECX
;
2181 void istri_72 ( void )
2184 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_72
;
2185 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_72
;
2187 try_istri(wot
,h
,s
, "abcdacbdabcdabcd", "000000000000000a");
2188 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000000b");
2189 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "00000000000000ab");
2190 try_istri(wot
,h
,s
, "abcdabc0abcdabcd", "000000000000abcd");
2192 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
2193 try_istri(wot
,h
,s
, "0bcdabcdabcdabcd", "000000000000abcd");
2194 try_istri(wot
,h
,s
, "abcdabcdabcda0cd", "000000000000abcd");
2195 try_istri(wot
,h
,s
, "abcdabcdabcdab0d", "000000000000abcd");
2196 try_istri(wot
,h
,s
, "abcdabcdabcdabc0", "000000000000abcd");
2198 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
2199 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000a0cd");
2200 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000ab0d");
2201 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abc0");
2203 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
2204 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2206 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000abcd");
2207 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000dcba");
2208 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000bbbb");
2209 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000baba");
2211 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "00000000000baba0");
2213 try_istri(wot
,h
,s
, "0ddc0ffeebadf00d", "00000000cafebabe");
2214 try_istri(wot
,h
,s
, "0ddc0ffeebadfeed", "00000000cafebabe");
2218 //////////////////////////////////////////////////////////
2222 //////////////////////////////////////////////////////////
2224 UInt
h_pcmpistri_10 ( V128
* argL
, V128
* argR
)
2227 memcpy(&block
[0], argL
, sizeof(V128
));
2228 memcpy(&block
[1], argR
, sizeof(V128
));
2230 __asm__
__volatile__(
2231 "subq $1024, %%rsp" "\n\t"
2232 "movdqu 0(%2), %%xmm2" "\n\t"
2233 "movdqu 16(%2), %%xmm11" "\n\t"
2234 "pcmpistri $0x10, %%xmm2, %%xmm11" "\n\t"
2235 //"pcmpistrm $0x10, %%xmm2, %%xmm11" "\n\t"
2236 //"movd %%xmm0, %%ecx" "\n\t"
2239 "movq %%rcx, %0" "\n\t"
2240 "movq %%rdx, %1" "\n\t"
2241 "addq $1024, %%rsp" "\n\t"
2242 : /*out*/ "=r"(res
), "=r"(flags
) : "r"/*in*/(&block
[0])
2243 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2245 return ((flags
& 0x8D5) << 16) | (res
& 0xFFFF);
2248 UInt
s_pcmpistri_10 ( V128
* argLU
, V128
* argRU
)
2251 UInt resOSZACP
, resECX
;
2253 = pcmpXstrX_WRK( &resV
, &resOSZACP
, argLU
, argRU
,
2254 zmask_from_V128(argLU
),
2255 zmask_from_V128(argRU
),
2256 0x10, False
/*!isSTRM*/
2259 resECX
= resV
.uInt
[0];
2260 return (resOSZACP
<< 16) | resECX
;
2263 void istri_10 ( void )
2266 UInt(*h
)(V128
*,V128
*) = h_pcmpistri_10
;
2267 UInt(*s
)(V128
*,V128
*) = s_pcmpistri_10
;
2269 try_istri(wot
,h
,s
, "abcdacbdabcdabcd", "000000000000000a");
2270 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000000b");
2271 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "00000000000000ab");
2272 try_istri(wot
,h
,s
, "abcdabc0abcdabcd", "000000000000abcd");
2274 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
2275 try_istri(wot
,h
,s
, "0bcdabcdabcdabcd", "000000000000abcd");
2276 try_istri(wot
,h
,s
, "abcdabcdabcda0cd", "000000000000abcd");
2277 try_istri(wot
,h
,s
, "abcdabcdabcdab0d", "000000000000abcd");
2278 try_istri(wot
,h
,s
, "abcdabcdabcdabc0", "000000000000abcd");
2280 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abcd");
2281 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000a0cd");
2282 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000ab0d");
2283 try_istri(wot
,h
,s
, "abcdabcdabcdabcd", "000000000000abc0");
2285 try_istri(wot
,h
,s
, "0000000000000000", "0000000000000000");
2286 try_istri(wot
,h
,s
, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2288 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000abcd");
2289 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000dcba");
2290 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000bbbb");
2291 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "000000000000baba");
2293 try_istri(wot
,h
,s
, "0000abcdabcdabcd", "00000000000baba0");
2295 try_istri(wot
,h
,s
, "0ddc0ffeebadf00d", "00000000cafebabe");
2296 try_istri(wot
,h
,s
, "0ddc0ffeebadfeed", "00000000cafebabe");
2300 //////////////////////////////////////////////////////////
2304 //////////////////////////////////////////////////////////